1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/disassembler_util.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/strings/stringprintf.h"
19 : #include "mnemonics.h" // NOLINT
20 :
21 : namespace core {
22 :
23 : namespace {
24 :
25 : // Return the size of a 3-byte VEX encoded instruction.
26 : //
27 : // The layout of these instructions is as follows, starting with a byte with
28 : // value 0xC4:
29 : // - First byte:
30 : // +---+---+---+---+---+---+---+---+
31 : // | 1 1 0 0 0 1 0 0 |
32 : // +---+---+---+---+---+---+---+---+
33 : // - Second byte:
34 : // +---+---+---+---+---+---+---+---+
35 : // |~R |~X |~B | map_select |
36 : // +---+---+---+---+---+---+---+---+
37 : // - Third byte:
38 : // +---+---+---+---+---+---+---+---+
39 : // |W/E| ~vvvv | L | pp |
40 : // +---+---+---+---+---+---+---+---+
41 : // - Fourth byte: The opcode for this instruction.
42 : //
43 : // |map_select| Indicates the opcode map that should be used for this
44 : // instruction.
45 : //
46 : // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
47 : // for more details.
48 E : size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
49 E : DCHECK_EQ(0xC4, ci->code[0]);
50 : // Switch case based on the opcode map used by this instruction.
51 E : switch (ci->code[1] & 0x1F) {
52 : case 0x01: {
53 i : switch (ci->code[3]) {
54 i : case 0x1D: return 5; // vpermd
55 : default: break;
56 : }
57 i : break;
58 : }
59 : case 0x02: {
60 E : switch (ci->code[3]) {
61 E : case 0x13: return 5; // vcvtps2ps
62 E : case 0x36: return 5; // vpermd
63 E : case 0x5A: return 6; // vbroadcasti128
64 E : case 0x78: return 5; // vpbroadcastb
65 : default: break;
66 : }
67 i : break;
68 : }
69 : case 0x03: {
70 E : switch (ci->code[3]) {
71 E : case 0x00: return 6; // vpermq
72 E : case 0x1D: return 6; // vcvtps2ph
73 E : case 0x38: return 7; // vinserti128
74 E : case 0x39: return 6; // vextracti128
75 : default: break;
76 : }
77 : break;
78 : }
79 : default:
80 : break;
81 : }
82 i : return 0;
83 E : }
84 :
85 : // Handle improperly decoded instructions. Returns true if an instruction was
86 : // handled, false otherwise. If this returns false then none of the output
87 : // parameters will have been changed.
88 : bool HandleBadDecode(_CodeInfo* ci,
89 : _DInst result[],
90 : unsigned int max_instructions,
91 : unsigned int* used_instructions_count,
92 E : _DecodeResult* ret) {
93 E : DCHECK_NE(reinterpret_cast<_CodeInfo*>(NULL), ci);
94 E : DCHECK_LE(1u, max_instructions);
95 E : DCHECK_NE(reinterpret_cast<unsigned int*>(NULL), used_instructions_count);
96 E : DCHECK_NE(reinterpret_cast<_DecodeResult*>(NULL), ret);
97 :
98 E : size_t size = 0;
99 :
100 E : if (ci->code[0] == 0xC4)
101 E : size = Get3ByteVexEncodedInstructionSize(ci);
102 :
103 E : if (size == 0)
104 E : return false;
105 :
106 : // We set the bare minimum properties that are required for any
107 : // subsequent processing that we perform.
108 :
109 E : *used_instructions_count = 1;
110 :
111 E : ::memset(result, 0, sizeof(result[0]));
112 E : result[0].addr = ci->codeOffset;
113 E : result[0].size = static_cast<uint8_t>(size);
114 :
115 E : DCHECK_EQ(FC_NONE, META_GET_FC(result[0].meta));
116 E : DCHECK_EQ(O_NONE, result[0].ops[0].type);
117 E : DCHECK_EQ(O_NONE, result[0].ops[1].type);
118 E : DCHECK_EQ(O_NONE, result[0].ops[2].type);
119 E : DCHECK_EQ(O_NONE, result[0].ops[3].type);
120 :
121 E : *ret = DECRES_SUCCESS;
122 :
123 E : return true;
124 E : }
125 :
126 : } // namespace
127 :
128 : _DecodeResult DistormDecompose(_CodeInfo* ci,
129 : _DInst result[],
130 : unsigned int max_instructions,
131 E : unsigned int* used_instructions_count) {
132 : _DecodeResult ret =
133 E : distorm_decompose(ci, result, max_instructions, used_instructions_count);
134 :
135 : // Distorm @ac277fb has a bug where it has problems decoding some AVX
136 : // instructions. The encoding is described in detail here:
137 : // http://en.wikipedia.org/wiki/VEX_prefix
138 : // An issue has been filed here:
139 : // https://code.google.com/p/distorm/issues/detail?id=77
140 : // This is a workaround until the bug is fixed. We only care about the case
141 : // where decoding failed.
142 E : if (ret != DECRES_SUCCESS && *used_instructions_count == 0) {
143 E : if (HandleBadDecode(ci, result, max_instructions, used_instructions_count,
144 : &ret)) {
145 E : return ret;
146 : }
147 : }
148 :
149 E : for (unsigned int i = 0; i < *used_instructions_count; ++i) {
150 E : switch (result[i].opcode) {
151 : // Distorm @ac277fb has a bug where the access size for I_FXRSTOR and
152 : // I_FXSAVE destination operand is 0 instead of 64. I've filed
153 : // https://github.com/gdabah/distorm/issues/96 to have this fixed.
154 : // In the meantime this is a workaround to have the correct operand size.
155 : case I_FXRSTOR:
156 : case I_FXSAVE:
157 E : DCHECK_EQ(0U, result[i].ops[0].size);
158 E : result[i].ops[0].size = 64;
159 : break;
160 : default:
161 : break;
162 : }
163 E : }
164 :
165 E : return ret;
166 E : }
167 :
168 : bool DecodeOneInstruction(uint32_t address,
169 : const uint8_t* buffer,
170 : size_t length,
171 E : _DInst* instruction) {
172 E : DCHECK(buffer != NULL);
173 E : DCHECK(instruction != NULL);
174 :
175 E : _CodeInfo code = {};
176 E : code.dt = Decode32Bits;
177 E : code.features = DF_NONE;
178 E : code.codeOffset = address;
179 E : code.codeLen = length;
180 E : code.code = buffer;
181 :
182 E : unsigned int decoded = 0;
183 E : ::memset(instruction, 0, sizeof(*instruction));
184 E : _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
185 :
186 E : if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
187 i : return false;
188 :
189 : // It's possible for the decode to fail as having decoded a single partially
190 : // valid instruction (ie: valid prefix of an instruction, waiting on more
191 : // data), in which case it will return MEMORYERR (wants more data) and a
192 : // decoded length of zero.
193 E : if (decoded == 0)
194 E : return false;
195 :
196 E : DCHECK_GE(length, instruction->size);
197 E : DCHECK_LT(0, instruction->size);
198 :
199 E : return true;
200 E : }
201 :
202 : bool DecodeOneInstruction(const uint8_t* buffer,
203 : size_t length,
204 E : _DInst* instruction) {
205 E : DCHECK(buffer != NULL);
206 E : DCHECK(instruction != NULL);
207 E : if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
208 E : return false;
209 E : return true;
210 E : }
211 :
212 : bool InstructionToString(
213 : const _DInst& instruction,
214 : const uint8_t* data,
215 : int code_length,
216 E : std::string* buffer) {
217 E : DCHECK(data != NULL);
218 E : DCHECK(buffer != NULL);
219 :
220 E : _CodeInfo code = {};
221 E : code.codeOffset = 0;
222 E : code.code = data;
223 E : code.codeLen = code_length;
224 E : code.dt = Decode32Bits;
225 E : _DecodedInst decoded = {};
226 E : _DInst dinst = instruction;
227 :
228 E : dinst.addr = 0;
229 E : distorm_format64(&code, &dinst, &decoded);
230 :
231 E : *buffer = base::StringPrintf("%-14s %s %s",
232 : decoded.instructionHex.p,
233 : decoded.mnemonic.p,
234 : decoded.operands.p);
235 E : return true;
236 E : }
237 :
238 E : bool IsNop(const _DInst& instruction) {
239 E : switch (instruction.opcode) {
240 : default:
241 : // Only the sequences recognized below qualify as NOP instructions.
242 E : return false;
243 :
244 : case I_XCHG:
245 : // This handles the 1 bytes NOP sequence.
246 : // 1-byte: xchg eax, eax.
247 E : return instruction.ops[0].type == O_REG &&
248 : instruction.ops[0].index == RM_AX &&
249 : instruction.ops[1].type == O_REG &&
250 : instruction.ops[1].index == RM_AX;
251 :
252 : case I_NOP:
253 : // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
254 : // 2-byte: 66 NOP
255 : // 4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
256 : // 5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
257 : // 7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
258 : // 8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
259 : // 9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
260 E : return true;
261 :
262 : case I_LEA:
263 : // This handles the 3 and 6 byte NOP sequences.
264 : // 3-byte: LEA REG, 0 (REG) (8-bit displacement)
265 : // 6-byte: LEA REG, 0 (REG) (32-bit displacement)
266 E : return instruction.ops[0].type == O_REG &&
267 : instruction.ops[1].type == O_SMEM &&
268 : instruction.ops[0].index == instruction.ops[1].index &&
269 : instruction.disp == 0;
270 :
271 : case I_MOV:
272 : // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
273 E : return instruction.ops[0].type == O_REG &&
274 : instruction.ops[1].type == O_REG &&
275 : instruction.ops[0].index == instruction.ops[1].index;
276 : }
277 E : }
278 :
279 E : bool IsCall(const _DInst& instruction) {
280 E : return META_GET_FC(instruction.meta) == FC_CALL;
281 E : }
282 :
283 E : bool IsReturn(const _DInst& instruction) {
284 E : return META_GET_FC(instruction.meta) == FC_RET;
285 E : }
286 :
287 E : bool IsSystemCall(const _DInst& instruction) {
288 E : return META_GET_FC(instruction.meta) == FC_SYS;
289 E : }
290 :
291 E : bool IsConditionalBranch(const _DInst& instruction) {
292 E : return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
293 E : }
294 :
295 E : bool IsUnconditionalBranch(const _DInst& instruction) {
296 E : return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
297 E : }
298 :
299 E : bool IsBranch(const _DInst& instruction) {
300 E : return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
301 E : }
302 :
303 E : bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
304 E : DCHECK_LE(0, operand_index);
305 E : DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
306 E : return instruction.ops[operand_index].type == O_PC;
307 E : }
308 :
309 E : bool IsControlFlow(const _DInst& instruction) {
310 : // For the purposes of Syzygy we include all of the control flow altering
311 : // instruction EXCEPT for call as true control flow.
312 E : return IsBranch(instruction) ||
313 : IsReturn(instruction) ||
314 : IsSystemCall(instruction);
315 E : }
316 :
317 E : bool IsImplicitControlFlow(const _DInst& instruction) {
318 : // Control flow jumps implicitly out of the block for RET and SYS
319 E : if (IsReturn(instruction) || IsSystemCall(instruction))
320 E : return true;
321 :
322 : // Control flow is implicit for non PC-relative jumps (i.e., explicit
323 : // branches where the target is computed, stored in a register, stored
324 : // in a memory location, or otherwise indirect).
325 E : if (IsUnconditionalBranch(instruction) &&
326 : !HasPcRelativeOperand(instruction, 0)) {
327 E : return true;
328 : }
329 :
330 : // Otherwise it's not implicit control flow.
331 E : return false;
332 E : }
333 :
334 E : bool IsInterrupt(const _DInst& instruction) {
335 E : return META_GET_FC(instruction.meta) == FC_INT;
336 E : }
337 :
338 E : bool IsDebugInterrupt(const _DInst& instruction) {
339 E : return IsInterrupt(instruction) && instruction.size == 1 &&
340 : instruction.opcode == I_INT_3;
341 E : }
342 :
343 E : _RegisterType GetRegisterType(const Register& reg) {
344 E : return GetRegisterType(reg.id());
345 E : }
346 :
347 E : _RegisterType GetRegisterType(RegisterId reg_id) {
348 : static const _RegisterType kRegisterTypesById[assm::kRegisterMax] = {
349 : R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH, // 8-bit.
350 : R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI, // 16-bit.
351 : R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI // 32-bit.
352 : };
353 E : DCHECK_LE(assm::kRegisterMin, reg_id);
354 E : DCHECK_GT(assm::kRegisterMax, reg_id);
355 E : return kRegisterTypesById[reg_id];
356 E : }
357 :
358 E : RegisterId GetRegisterId(uint32_t distorm_reg_type) {
359 E : switch (distorm_reg_type) {
360 : // 8-bit registers.
361 E : case R_AL: return assm::kRegisterAl;
362 i : case R_CL: return assm::kRegisterCl;
363 i : case R_DL: return assm::kRegisterDl;
364 i : case R_BL: return assm::kRegisterBl;
365 i : case R_AH: return assm::kRegisterAh;
366 i : case R_CH: return assm::kRegisterCh;
367 i : case R_DH: return assm::kRegisterDh;
368 E : case R_BH: return assm::kRegisterBh;
369 :
370 : // 16-bit registers.
371 i : case R_AX: return assm::kRegisterAx;
372 E : case R_CX: return assm::kRegisterCx;
373 i : case R_DX: return assm::kRegisterDx;
374 i : case R_BX: return assm::kRegisterBx;
375 E : case R_SP: return assm::kRegisterSp;
376 i : case R_BP: return assm::kRegisterBp;
377 i : case R_SI: return assm::kRegisterSi;
378 i : case R_DI: return assm::kRegisterDi;
379 :
380 : // 32-bit registers.
381 E : case R_EAX: return assm::kRegisterEax;
382 E : case R_ECX: return assm::kRegisterEcx;
383 E : case R_EDX: return assm::kRegisterEdx;
384 E : case R_EBX: return assm::kRegisterEbx;
385 E : case R_ESP: return assm::kRegisterEsp;
386 E : case R_EBP: return assm::kRegisterEbp;
387 E : case R_ESI: return assm::kRegisterEsi;
388 E : case R_EDI: return assm::kRegisterEdi;
389 :
390 i : default: return assm::kRegisterNone;
391 : }
392 E : }
393 :
394 E : const Register& GetRegister(uint32_t distorm_reg_type) {
395 E : return Register::Get(GetRegisterId(distorm_reg_type));
396 E : }
397 :
398 : } // namespace core
|