1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Provides an assembler that assembles to basic block instruction lists.
16 :
17 : #ifndef SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_ASSEMBLER_H_
18 : #define SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_ASSEMBLER_H_
19 :
20 : #include "syzygy/block_graph/basic_block.h"
21 : #include "syzygy/core/assembler.h"
22 :
23 : namespace block_graph {
24 :
25 : using core::ValueSize;
26 :
27 : // Forward declarations.
28 : class BasicBlockAssembler;
29 : class Operand;
30 :
31 : // Declares a BasicBlockReference-like class that has no type or size
32 : // information. The size information is stored in the Operand or Value housing
33 : // the untyped reference, and the type is inferred from the instruction being
34 : // assembled.
35 : class UntypedReference {
36 : public:
37 : typedef BlockGraph::Block Block;
38 : typedef BlockGraph::Offset Offset;
39 :
40 : // Default constructor.
41 E : UntypedReference()
42 : : basic_block_(NULL), block_(NULL), offset_(0), base_(0) {
43 E : }
44 :
45 : // Copy constructor.
46 : // @param other The reference to be copied.
47 : UntypedReference(const UntypedReference& other)
48 : : basic_block_(other.basic_block_), block_(other.block_),
49 E : offset_(other.offset_), base_(other.base_) {
50 E : }
51 :
52 : // Constructor from a basic block reference.
53 : // @param bb_ref The basic block reference to be copied.
54 : explicit UntypedReference(const BasicBlockReference& bb_ref)
55 : : basic_block_(bb_ref.basic_block()), block_(bb_ref.block()),
56 E : offset_(bb_ref.offset()), base_(bb_ref.base()) {
57 E : DCHECK(block_ != NULL || basic_block_ != NULL);
58 E : }
59 :
60 : // Constructs a reference to a basic block.
61 : // @param basic_block The basic block to be referred to.
62 E : explicit UntypedReference(BasicBlock* basic_block)
63 : : basic_block_(basic_block), block_(NULL), offset_(0), base_(0) {
64 E : DCHECK(basic_block != NULL);
65 E : }
66 :
67 : // Constructs a reference to a block.
68 : // @param block The block to be referred to.
69 : // @param offset The offset from the start of the block actually being
70 : // pointed to.
71 : // @param base The offset from the start of the block semantically being
72 : // referred to.
73 E : UntypedReference(Block* block, Offset offset, Offset base)
74 : : basic_block_(NULL), block_(block), offset_(offset), base_(base) {
75 E : DCHECK(block != NULL);
76 E : }
77 :
78 : // @name Accessors.
79 : // @{
80 E : BasicBlock* basic_block() const { return basic_block_; }
81 E : Block* block() const { return block_; }
82 E : Offset offset() const { return offset_; }
83 E : Offset base() const { return base_; }
84 : // @}
85 :
86 : // @returns true if this reference is valid.
87 E : bool IsValid() const { return block_ != NULL || basic_block_ != NULL; }
88 :
89 : // Returns the type of the object being referred to.
90 E : BasicBlockReference::ReferredType referred_type() const {
91 E : if (block_ != NULL)
92 E : return BasicBlockReference::REFERRED_TYPE_BLOCK;
93 E : if (basic_block_ != NULL)
94 E : return BasicBlockReference::REFERRED_TYPE_BASIC_BLOCK;
95 E : return BasicBlockReference::REFERRED_TYPE_UNKNOWN;
96 E : }
97 :
98 : // Comparison operator.
99 : // @returns true if this reference is the same as the @p other.
100 E : bool operator==(const UntypedReference& other) const {
101 : return basic_block_ == other.basic_block_ &&
102 : block_ == other.block_ &&
103 : offset_ == other.offset_ &&
104 E : base_ == other.base_;
105 E : }
106 :
107 : private:
108 : BasicBlock* basic_block_;
109 : Block* block_;
110 : Offset offset_;
111 : Offset base_;
112 : };
113 :
114 : class Value {
115 : public:
116 : typedef BlockGraph::Block Block;
117 : typedef BlockGraph::Offset Offset;
118 : typedef core::ValueImpl ValueImpl;
119 : typedef core::ValueSize ValueSize;
120 :
121 : // Default construction.
122 : Value();
123 :
124 : // Constructs an 8- or 32-bit value, depending on the minimum number of bits
125 : // required to represent the Value. If the value can be encoded using 8-bits
126 : // to have the same representation under sign extension, then an 8-bit Value
127 : // will be created; otherwise, a 32-bit absolute Value will be created.
128 : // @param value The value to be stored.
129 : explicit Value(uint32 value);
130 :
131 : // Constructs an absolute value having a specific bit width.
132 : // @param value The value to be stored.
133 : // @param size The size of the value.
134 : Value(uint32 value, ValueSize size);
135 :
136 : // Constructs a 32-bit direct reference to the basic block @p bb.
137 : // @param bb The basic block to be referred to.
138 : // @note This is fine even for jmps (which may be encoded using 8-bit
139 : // references) as the BB layout algorithm will use the shortest jmp
140 : // possible.
141 : explicit Value(BasicBlock* bb);
142 :
143 : // Constructs a 32-bit direct reference to @p block at the given @p offset.
144 : // @param block The block to be referred to.
145 : // @param offset The offset to be referred to, both semantically and
146 : // literally. The base and offset of the reference will be set to this.
147 : // @note This is fine even for jmps (which may be encoded using 8-bit
148 : // references) as the BB layout algorithm will use the shortest jmp
149 : // possible.
150 : Value(Block* block, Offset offset);
151 :
152 : // Constructs a 32-bit reference to @p block at the given @p offset and
153 : // @p base.
154 : // @param block The block to be referred to.
155 : // @param offset The offset to be literally referred to.
156 : // @param base The offset to be semantically referred to. This must be
157 : // within the data of @p block.
158 : Value(Block* block, Offset offset, Offset base);
159 :
160 : // Full constructor.
161 : // @param value The value to be stored.
162 : // @param size The size of the value.
163 : // @param ref The untyped reference backing this value. The reference must
164 : // be valid.
165 : Value(uint32 value, ValueSize size, const UntypedReference& ref);
166 :
167 : // Copy constructor.
168 : // @param other The value to be copied.
169 : Value(const Value& other);
170 :
171 : // Destructor.
172 : ~Value();
173 :
174 : // Assignment operator.
175 : const Value& operator=(const Value& other);
176 :
177 : // @name Accessors.
178 : // @{
179 E : uint32 value() const { return value_.value(); }
180 E : ValueSize size() const { return value_.size(); }
181 E : const UntypedReference& reference() const { return reference_; }
182 : // @}
183 :
184 : // Comparison operator.
185 : bool operator==(const Value& rhs) const;
186 :
187 : private:
188 : // Private constructor for Operand.
189 : Value(const UntypedReference& ref, const core::ValueImpl& value);
190 :
191 : friend class BasicBlockAssembler;
192 : friend class Operand;
193 :
194 : UntypedReference reference_;
195 : ValueImpl value_;
196 : };
197 :
198 : // Displacements and immediates behave near-identically, but are semantically
199 : // slightly different.
200 : typedef Value Immediate;
201 : typedef Value Displacement;
202 :
203 : // An operand implies indirection to memory through one of the myriad
204 : // modes supported by IA32.
205 : class Operand {
206 : public:
207 : // A register-indirect mode.
208 : explicit Operand(const core::Register32& base);
209 :
210 : // A register-indirect with displacement mode.
211 : Operand(const core::Register32& base, const Displacement& displ);
212 :
213 : // A displacement-only mode.
214 : explicit Operand(const Displacement& displ);
215 :
216 : // The full [base + index * scale + displ32] mode.
217 : // @note esp cannot be used as an index register.
218 : Operand(const core::Register32& base,
219 : const core::Register32& index,
220 : core::ScaleFactor scale,
221 : const Displacement& displ);
222 :
223 : // The full [base + index * scale] mode.
224 : // @note esp cannot be used as an index register.
225 : Operand(const core::Register32& base,
226 : const core::Register32& index,
227 : core::ScaleFactor scale);
228 :
229 : // The [index * scale + displ32] mode.
230 : // @note esp cannot be used as an index register.
231 : Operand(const core::Register32& index,
232 : core::ScaleFactor scale,
233 : const Displacement& displ);
234 :
235 : // Copy constructor.
236 : Operand(const Operand& o);
237 :
238 : // Destructor.
239 : ~Operand();
240 :
241 : // Assignment operator.
242 : const Operand& operator=(const Operand& other);
243 :
244 : // @name Accessors.
245 : // @{
246 E : const core::RegisterId base() const { return operand_.base(); }
247 E : const core::RegisterId index() const { return operand_.index(); }
248 E : core::ScaleFactor scale() const { return operand_.scale(); }
249 E : Displacement displacement() const {
250 E : return Displacement(reference_, operand_.displacement());
251 E : }
252 : // @}
253 :
254 : private:
255 : friend class BasicBlockAssembler;
256 :
257 : UntypedReference reference_;
258 : core::OperandImpl operand_;
259 : };
260 :
261 : class BasicBlockAssembler {
262 : public:
263 : typedef BlockGraph::Block::SourceRange SourceRange;
264 : typedef BasicBlock::Instructions Instructions;
265 : typedef core::Register8 Register8;
266 : typedef core::Register16 Register16;
267 : typedef core::Register32 Register32;
268 : typedef core::ConditionCode ConditionCode;
269 :
270 : // Constructs a basic block assembler that inserts new instructions
271 : // into @p *list at @p where.
272 : BasicBlockAssembler(const Instructions::iterator& where,
273 : Instructions *list);
274 :
275 : // Constructs a basic block assembler that inserts new instructions into
276 : // @p *list at @p where, assuming a starting address of @p location.
277 : BasicBlockAssembler(uint32 location,
278 : const Instructions::iterator& where,
279 : Instructions *list);
280 :
281 : // @returns The source range injected into created instructions.
282 E : SourceRange source_range() const { return serializer_.source_range(); }
283 :
284 : // Set the SourceRange injected repeatedly into each instruction created via
285 : // the assembler. This should be used with care because it causes the OMAP
286 : // information to no longer be 1:1 mapping, and may confuse some debuggers.
287 : // @param source_range The source range set to each created instructions.
288 E : void set_source_range(const SourceRange& source_range) {
289 E : serializer_.set_source_range(source_range);
290 E : }
291 :
292 : // Emits one or more NOP instructions, their total length being @p size
293 : // bytes.
294 : // @param size The number of bytes of NOPs to generate.
295 : // @note For a generated NOP sequence of optimal performance it is best to
296 : // call nop once rather than successively (ie: the NOP sequence generated
297 : // by nop(x) nop(y) may perform worse than that generated by nop(x + y).
298 : void nop(size_t size);
299 :
300 : // @name Call instructions.
301 : // @{
302 : void call(const Immediate& dst);
303 : void call(const Operand& dst);
304 : // @}
305 :
306 : // @name Jmp instructions.
307 : // @{
308 : void jmp(const Immediate& dst);
309 : void jmp(const Operand& dst);
310 : // @}
311 :
312 : // @name Conditional branch instruction.
313 : // @{
314 : void j(ConditionCode code, const Immediate& dst);
315 : // @}
316 :
317 : // @name Manipulation of flags.
318 : // @{
319 : void pushfd();
320 : void popfd();
321 : void lahf();
322 : void sahf();
323 : void set(ConditionCode code, const Register32& dst);
324 : // @}
325 :
326 : // @name Arithmetic operations.
327 : // @{
328 : void test(const Register8& dst, const Register8& src);
329 : void test(const Register8& dst, const Immediate& src);
330 :
331 : void test(const Register32& dst, const Register32& src);
332 : void test(const Register32& dst, const Operand& src);
333 : void test(const Operand& dst, const Register32& src);
334 : void test(const Register32& dst, const Immediate& src);
335 : void test(const Operand& dst, const Immediate& src);
336 :
337 : void cmp(const Register8& dst, const Register8& src);
338 : void cmp(const Register8& dst, const Immediate& src);
339 :
340 : void cmp(const Register32& dst, const Register32& src);
341 : void cmp(const Register32& dst, const Operand& src);
342 : void cmp(const Operand& dst, const Register32& src);
343 : void cmp(const Register32& dst, const Immediate& src);
344 : void cmp(const Operand& dst, const Immediate& src);
345 :
346 : void add(const Register8& dst, const Register8& src);
347 : void add(const Register8& dst, const Immediate& src);
348 :
349 : void add(const Register32& dst, const Register32& src);
350 : void add(const Register32& dst, const Operand& src);
351 : void add(const Operand& dst, const Register32& src);
352 : void add(const Register32& dst, const Immediate& src);
353 : void add(const Operand& dst, const Immediate& src);
354 :
355 : void sub(const Register8& dst, const Register8& src);
356 : void sub(const Register8& dst, const Immediate& src);
357 :
358 : void sub(const Register32& dst, const Register32& src);
359 : void sub(const Register32& dst, const Operand& src);
360 : void sub(const Operand& dst, const Register32& src);
361 : void sub(const Register32& dst, const Immediate& src);
362 : void sub(const Operand& dst, const Immediate& src);
363 : // @}
364 :
365 : // @name Shifting operations.
366 : // @{
367 : void shl(const Register32& dst, const Immediate& src);
368 : void shr(const Register32& dst, const Immediate& src);
369 : // @}
370 :
371 : // @name Byte mov varieties.
372 : // @{
373 : void mov_b(const Operand& dst, const Immediate& src);
374 : void movzx_b(const Register32& dst, const Operand& src);
375 : // @}
376 :
377 : // @name Double-word mov varieties.
378 : // @{
379 : void mov(const Register32& dst, const Register32& src);
380 : void mov(const Register32& dst, const Operand& src);
381 : void mov(const Operand& dst, const Register32& src);
382 : void mov(const Register32& dst, const Immediate& src);
383 : void mov(const Operand& dst, const Immediate& src);
384 : void mov_fs(const Register32& dst, const Operand& src);
385 : void mov_fs(const Operand& dst, const Register32& src);
386 : // @}
387 :
388 : // @name Load effective address.
389 : void lea(const Register32& dst, const Operand& src);
390 :
391 : // @name Stack manipulation.
392 : // @{
393 : void push(const Register32& src);
394 : void push(const Immediate& src);
395 : void push(const Operand& src);
396 :
397 : void pop(const Register32& dst);
398 : void pop(const Operand& dst);
399 : // @}
400 :
401 : // @name Ret instructions.
402 : // @{
403 : void ret();
404 : void ret(uint16 n);
405 : // @}
406 :
407 : // Exchange contents of two registers.
408 : // @param dst The destination register.
409 : // @param src The source register.
410 : // @note Exchanges involving eax generate shorter byte code.
411 : void xchg(const Register32& dst, const Register32& src);
412 : void xchg(const Register16& dst, const Register16& src);
413 : void xchg(const Register8& dst, const Register8& src);
414 :
415 : private:
416 : typedef BlockGraph::ReferenceType ReferenceType;
417 :
418 : class BasicBlockSerializer
419 : : public core::AssemblerImpl::InstructionSerializer {
420 : public:
421 : BasicBlockSerializer(const Instructions::iterator& where,
422 : Instructions* list);
423 :
424 : virtual void AppendInstruction(uint32 location,
425 : const uint8* bytes,
426 : size_t num_bytes,
427 : const size_t *ref_locations,
428 : const void* const* refs,
429 : size_t num_refs) OVERRIDE;
430 :
431 E : SourceRange source_range() const { return source_range_; }
432 E : void set_source_range(const SourceRange& source_range) {
433 E : source_range_ = source_range;
434 E : }
435 :
436 : // Pushes back a reference type to be associated with a untyped reference.
437 : // @param type The type of the reference.
438 : // @param size The size of the reference, as a ValueSize.
439 : void PushReferenceInfo(ReferenceType type, core::ValueSize size);
440 :
441 : private:
442 : struct ReferenceInfo {
443 : BlockGraph::ReferenceType type;
444 : size_t size; // In bytes.
445 : };
446 :
447 : Instructions::iterator where_;
448 : Instructions* list_;
449 :
450 : // Source range set to instructions appended by this serializer.
451 : SourceRange source_range_;
452 :
453 : // The reference types and sizes associated with references in the
454 : // instructions parameters. These are provided to the serializer out of
455 : // band (not via Operand/Immediate/Value) by the implementations of the
456 : // various instructions. They allow the corresponding UntypedReferences to
457 : // be completed.
458 : ReferenceInfo ref_infos_[2];
459 : size_t num_ref_infos_;
460 : };
461 :
462 : // @name Utility functions for pushing/validating reference info.
463 : // @{
464 : void PushMandatoryReferenceInfo(ReferenceType type, const Immediate& imm);
465 : void PushOptionalReferenceInfo(ReferenceType type, const Immediate& imm);
466 : void PushOptionalReferenceInfo(ReferenceType type, const Operand& op);
467 : void CheckReferenceSize(core::ValueSize size, const Immediate& imm) const;
468 : void CheckReferenceSize(core::ValueSize size, const Operand& op) const;
469 : // @}
470 :
471 : BasicBlockSerializer serializer_;
472 : core::AssemblerImpl asm_;
473 : };
474 :
475 : } // namespace block_graph
476 :
477 : #endif // SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_ASSEMBLER_H_
|