1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/disassembler_util.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/strings/stringprintf.h"
19 : #include "mnemonics.h" // NOLINT
20 :
21 : namespace core {
22 :
23 : namespace {
24 :
25 : // Return the size of a 3-byte VEX encoded instruction.
26 : //
27 : // The layout of these instructions is as follows, starting with a byte with
28 : // value 0xC4:
29 : // - First byte:
30 : // +---+---+---+---+---+---+---+---+
31 : // | 1 1 0 0 0 1 0 0 |
32 : // +---+---+---+---+---+---+---+---+
33 : // - Second byte:
34 : // +---+---+---+---+---+---+---+---+
35 : // |~R |~X |~B | map_select |
36 : // +---+---+---+---+---+---+---+---+
37 : // - Third byte:
38 : // +---+---+---+---+---+---+---+---+
39 : // |W/E| ~vvvv | L | pp |
40 : // +---+---+---+---+---+---+---+---+
41 : // - Fourth byte: The opcode for this instruction.
42 : //
43 : // |map_select| Indicates the opcode map that should be used for this
44 : // instruction.
45 : //
46 : // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
47 : // for more details.
48 E : size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
49 E : DCHECK_EQ(0xC4, ci->code[0]);
50 : // Switch case based on the opcode map used by this instruction.
51 E : switch (ci->code[1] & 0x1F) {
52 : case 0x01: {
53 i : switch (ci->code[3]) {
54 i : case 0x1D: return 5; // vpermd
55 : default: break;
56 : }
57 i : break;
58 : }
59 : case 0x02: {
60 E : switch (ci->code[3]) {
61 E : case 0x36: return 5; // vpermd
62 E : case 0x5A: return 6; // vbroadcasti128
63 E : case 0x78: return 5; // vpbroadcastb
64 : default: break;
65 : }
66 i : break;
67 : }
68 : case 0x03: {
69 E : switch (ci->code[3]) {
70 E : case 0x00: return 6; // vpermq
71 E : case 0x38: return 7; // vinserti128
72 E : case 0x39: return 6; // vextracti128
73 : default: break;
74 : }
75 : break;
76 : }
77 : default:
78 : break;
79 : }
80 i : return 0;
81 E : }
82 :
83 : // Handle improperly decoded instructions. Returns true if an instruction was
84 : // handled, false otherwise. If this returns false then none of the output
85 : // parameters will have been changed.
86 : bool HandleBadDecode(_CodeInfo* ci,
87 : _DInst result[],
88 : unsigned int max_instructions,
89 : unsigned int* used_instructions_count,
90 E : _DecodeResult* ret) {
91 E : DCHECK_NE(reinterpret_cast<_CodeInfo*>(NULL), ci);
92 E : DCHECK_LE(1u, max_instructions);
93 E : DCHECK_NE(reinterpret_cast<unsigned int*>(NULL), used_instructions_count);
94 E : DCHECK_NE(reinterpret_cast<_DecodeResult*>(NULL), ret);
95 :
96 E : size_t size = 0;
97 :
98 E : if (ci->code[0] == 0xC4)
99 E : size = Get3ByteVexEncodedInstructionSize(ci);
100 :
101 E : if (size == 0)
102 i : return false;
103 :
104 : // We set the bare minimum properties that are required for any
105 : // subsequent processing that we perform.
106 :
107 E : *used_instructions_count = 1;
108 :
109 E : ::memset(result, 0, sizeof(result[0]));
110 E : result[0].addr = ci->codeOffset;
111 E : result[0].size = size;
112 :
113 E : DCHECK_EQ(FC_NONE, META_GET_FC(result[0].meta));
114 E : DCHECK_EQ(O_NONE, result[0].ops[0].type);
115 E : DCHECK_EQ(O_NONE, result[0].ops[1].type);
116 E : DCHECK_EQ(O_NONE, result[0].ops[2].type);
117 E : DCHECK_EQ(O_NONE, result[0].ops[3].type);
118 :
119 E : *ret = DECRES_SUCCESS;
120 :
121 E : return true;
122 E : }
123 :
124 : } // namespace
125 :
126 : _DecodeResult DistormDecompose(_CodeInfo* ci,
127 : _DInst result[],
128 : unsigned int max_instructions,
129 E : unsigned int* used_instructions_count) {
130 : _DecodeResult ret =
131 E : distorm_decompose(ci, result, max_instructions, used_instructions_count);
132 :
133 : // Distorm @229 has a bug where it has problems decoding some AVX
134 : // instructions. The encoding is described in detail here:
135 : // http://en.wikipedia.org/wiki/VEX_prefix
136 : // An issue has been filed here:
137 : // https://code.google.com/p/distorm/issues/detail?id=77
138 : // This is a workaround until the bug is fixed. We only care about the case
139 : // where decoding failed.
140 E : if (ret != DECRES_SUCCESS && *used_instructions_count == 0) {
141 : if (HandleBadDecode(ci, result, max_instructions, used_instructions_count,
142 E : &ret)) {
143 E : return ret;
144 : }
145 : }
146 E : return ret;
147 E : }
148 :
149 : bool DecodeOneInstruction(
150 E : uint32 address, const uint8* buffer, size_t length, _DInst* instruction) {
151 E : DCHECK(buffer != NULL);
152 E : DCHECK(instruction != NULL);
153 :
154 E : _CodeInfo code = {};
155 E : code.dt = Decode32Bits;
156 E : code.features = DF_NONE;
157 E : code.codeOffset = address;
158 E : code.codeLen = length;
159 E : code.code = buffer;
160 :
161 E : unsigned int decoded = 0;
162 E : ::memset(instruction, 0, sizeof(*instruction));
163 E : _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
164 :
165 E : if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
166 i : return false;
167 :
168 : // It's possible for the decode to fail as having decoded a single partially
169 : // valid instruction (ie: valid prefix of an instruction, waiting on more
170 : // data), in which case it will return MEMORYERR (wants more data) and a
171 : // decoded length of zero.
172 E : if (decoded == 0)
173 i : return false;
174 :
175 E : DCHECK_GE(length, instruction->size);
176 E : DCHECK_LT(0, instruction->size);
177 :
178 E : return true;
179 E : }
180 :
181 : bool DecodeOneInstruction(
182 E : const uint8* buffer, size_t length, _DInst* instruction) {
183 E : DCHECK(buffer != NULL);
184 E : DCHECK(instruction != NULL);
185 E : if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
186 i : return false;
187 E : return true;
188 E : }
189 :
190 : bool InstructionToString(
191 : const _DInst& instruction,
192 : const uint8_t* data,
193 : int code_length,
194 E : std::string* buffer) {
195 E : DCHECK(data != NULL);
196 E : DCHECK(buffer != NULL);
197 :
198 E : _CodeInfo code = {};
199 E : code.codeOffset = 0;
200 E : code.code = data;
201 E : code.codeLen = code_length;
202 E : code.dt = Decode32Bits;
203 E : _DecodedInst decoded = {};
204 E : _DInst dinst = instruction;
205 :
206 E : dinst.addr = 0;
207 E : distorm_format64(&code, &dinst, &decoded);
208 :
209 : *buffer = base::StringPrintf("%-14s %s %s",
210 : decoded.instructionHex.p,
211 : decoded.mnemonic.p,
212 E : decoded.operands.p);
213 E : return true;
214 E : }
215 :
216 E : bool IsNop(const _DInst& instruction) {
217 E : switch (instruction.opcode) {
218 : default:
219 : // Only the sequences recognized below qualify as NOP instructions.
220 E : return false;
221 :
222 : case I_XCHG:
223 : // This handles the 1 bytes NOP sequence.
224 : // 1-byte: xchg eax, eax.
225 : return instruction.ops[0].type == O_REG &&
226 : instruction.ops[0].index == RM_AX &&
227 : instruction.ops[1].type == O_REG &&
228 E : instruction.ops[1].index == RM_AX;
229 :
230 : case I_NOP:
231 : // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
232 : // 2-byte: 66 NOP
233 : // 4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
234 : // 5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
235 : // 7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
236 : // 8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
237 : // 9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
238 E : return true;
239 :
240 : case I_LEA:
241 : // This handles the 3 and 6 byte NOP sequences.
242 : // 3-byte: LEA REG, 0 (REG) (8-bit displacement)
243 : // 6-byte: LEA REG, 0 (REG) (32-bit displacement)
244 : return instruction.ops[0].type == O_REG &&
245 : instruction.ops[1].type == O_SMEM &&
246 : instruction.ops[0].index == instruction.ops[1].index &&
247 E : instruction.disp == 0;
248 :
249 : case I_MOV:
250 : // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
251 : return instruction.ops[0].type == O_REG &&
252 : instruction.ops[1].type == O_REG &&
253 E : instruction.ops[0].index == instruction.ops[1].index;
254 : }
255 E : }
256 :
257 E : bool IsCall(const _DInst& instruction) {
258 E : return META_GET_FC(instruction.meta) == FC_CALL;
259 E : }
260 :
261 E : bool IsReturn(const _DInst& instruction) {
262 E : return META_GET_FC(instruction.meta) == FC_RET;
263 E : }
264 :
265 E : bool IsSystemCall(const _DInst& instruction) {
266 E : return META_GET_FC(instruction.meta) == FC_SYS;
267 E : }
268 :
269 E : bool IsConditionalBranch(const _DInst& instruction) {
270 E : return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
271 E : }
272 :
273 E : bool IsUnconditionalBranch(const _DInst& instruction) {
274 E : return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
275 E : }
276 :
277 E : bool IsBranch(const _DInst& instruction) {
278 E : return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
279 E : }
280 :
281 E : bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
282 E : DCHECK_LE(0, operand_index);
283 E : DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
284 E : return instruction.ops[operand_index].type == O_PC;
285 E : }
286 :
287 E : bool IsControlFlow(const _DInst& instruction) {
288 : // For the purposes of Syzygy we include all of the control flow altering
289 : // instruction EXCEPT for call as true control flow.
290 : return IsBranch(instruction) ||
291 : IsReturn(instruction) ||
292 E : IsSystemCall(instruction);
293 E : }
294 :
295 E : bool IsImplicitControlFlow(const _DInst& instruction) {
296 : // Control flow jumps implicitly out of the block for RET and SYS
297 E : if (IsReturn(instruction) || IsSystemCall(instruction))
298 E : return true;
299 :
300 : // Control flow is implicit for non PC-relative jumps (i.e., explicit
301 : // branches where the target is computed, stored in a register, stored
302 : // in a memory location, or otherwise indirect).
303 : if (IsUnconditionalBranch(instruction) &&
304 E : !HasPcRelativeOperand(instruction, 0)) {
305 E : return true;
306 : }
307 :
308 : // Otherwise it's not implicit control flow.
309 E : return false;
310 E : }
311 :
312 E : bool IsInterrupt(const _DInst& instruction) {
313 E : return META_GET_FC(instruction.meta) == FC_INT;
314 E : }
315 :
316 E : bool IsDebugInterrupt(const _DInst& instruction) {
317 : return IsInterrupt(instruction) && instruction.size == 1 &&
318 E : instruction.opcode == I_INT_3;
319 E : }
320 :
321 E : _RegisterType GetRegisterType(const Register& reg) {
322 E : return GetRegisterType(reg.id());
323 E : }
324 :
325 E : _RegisterType GetRegisterType(RegisterId reg_id) {
326 : static const _RegisterType kRegisterTypesById[assm::kRegisterMax] = {
327 : R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH, // 8-bit.
328 : R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI, // 16-bit.
329 : R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI // 32-bit.
330 : };
331 E : DCHECK_LE(assm::kRegisterMin, reg_id);
332 E : DCHECK_GT(assm::kRegisterMax, reg_id);
333 E : return kRegisterTypesById[reg_id];
334 E : }
335 :
336 E : RegisterId GetRegisterId(uint32 distorm_reg_type) {
337 E : switch (distorm_reg_type) {
338 : // 8-bit registers.
339 E : case R_AL: return assm::kRegisterAl;
340 i : case R_CL: return assm::kRegisterCl;
341 i : case R_DL: return assm::kRegisterDl;
342 i : case R_BL: return assm::kRegisterBl;
343 i : case R_AH: return assm::kRegisterAh;
344 i : case R_CH: return assm::kRegisterCh;
345 i : case R_DH: return assm::kRegisterDh;
346 E : case R_BH: return assm::kRegisterBh;
347 :
348 : // 16-bit registers.
349 i : case R_AX: return assm::kRegisterAx;
350 E : case R_CX: return assm::kRegisterCx;
351 i : case R_DX: return assm::kRegisterDx;
352 i : case R_BX: return assm::kRegisterBx;
353 E : case R_SP: return assm::kRegisterSp;
354 i : case R_BP: return assm::kRegisterBp;
355 i : case R_SI: return assm::kRegisterSi;
356 i : case R_DI: return assm::kRegisterDi;
357 :
358 : // 32-bit registers.
359 E : case R_EAX: return assm::kRegisterEax;
360 E : case R_ECX: return assm::kRegisterEcx;
361 E : case R_EDX: return assm::kRegisterEdx;
362 E : case R_EBX: return assm::kRegisterEbx;
363 E : case R_ESP: return assm::kRegisterEsp;
364 E : case R_EBP: return assm::kRegisterEbp;
365 E : case R_ESI: return assm::kRegisterEsi;
366 E : case R_EDI: return assm::kRegisterEdi;
367 :
368 i : default: return assm::kRegisterNone;
369 : }
370 E : }
371 :
372 E : const Register& GetRegister(uint32 distorm_reg_type) {
373 E : return Register::Get(GetRegisterId(distorm_reg_type));
374 E : }
375 :
376 : } // namespace core
|