1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/disassembler_util.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/stringprintf.h"
19 : #include "mnemonics.h" // NOLINT
20 :
21 : namespace core {
22 :
23 : namespace {
24 :
25 : // Handle improperly decoded instructions. Returns true if an instruction was
26 : // handled, false otherwise. If this returns false then none of the output
27 : // parameters will have been changed.
28 : bool HandleBadDecode(_CodeInfo* ci,
29 : _DInst result[],
30 : unsigned int max_instructions,
31 : unsigned int* used_instructions_count,
32 E : _DecodeResult* ret) {
33 E : DCHECK_NE(reinterpret_cast<_CodeInfo*>(NULL), ci);
34 E : DCHECK_LE(1u, max_instructions);
35 E : DCHECK_NE(reinterpret_cast<unsigned int*>(NULL), used_instructions_count);
36 E : DCHECK_NE(reinterpret_cast<_DecodeResult*>(NULL), ret);
37 :
38 E : size_t size = 0;
39 :
40 : // 3-byte VEX encoded instructions.
41 E : if (ci->code[0] == 0xC4) {
42 : // vpermq
43 E : if (ci->code[1] == 0xE3 && ci->code[2] == 0xFD) {
44 E : size = 6;
45 E : } else if (ci->code[1] == 0xE2 && ci->code[2] == 0x4D) {
46 : // vpermd
47 E : size = 5;
48 : }
49 : }
50 :
51 E : if (size == 0)
52 i : return false;
53 :
54 : // We set the bare minimum properties that are required for any
55 : // subsequent processing that we perform.
56 :
57 E : *used_instructions_count = 1;
58 :
59 E : ::memset(result, 0, sizeof(result[0]));
60 E : result[0].addr = ci->codeOffset;
61 E : result[0].size = size;
62 :
63 E : DCHECK_EQ(FC_NONE, META_GET_FC(result[0].meta));
64 E : DCHECK_EQ(O_NONE, result[0].ops[0].type);
65 E : DCHECK_EQ(O_NONE, result[0].ops[1].type);
66 E : DCHECK_EQ(O_NONE, result[0].ops[2].type);
67 E : DCHECK_EQ(O_NONE, result[0].ops[3].type);
68 :
69 E : *ret = DECRES_SUCCESS;
70 :
71 E : return true;
72 E : }
73 :
74 : } // namespace
75 :
76 : _DecodeResult DistormDecompose(_CodeInfo* ci,
77 : _DInst result[],
78 : unsigned int max_instructions,
79 E : unsigned int* used_instructions_count) {
80 : _DecodeResult ret =
81 E : distorm_decompose(ci, result, max_instructions, used_instructions_count);
82 :
83 : // Distorm @229 has a bug where it has problems decoding some AVX
84 : // instructions. The encoding is described in detail here:
85 : // http://en.wikipedia.org/wiki/VEX_prefix
86 : // An issue has been filed here:
87 : // https://code.google.com/p/distorm/issues/detail?id=77
88 : // This is a workaround until the bug is fixed. We only care about the case
89 : // where decoding failed.
90 E : if (ret != DECRES_SUCCESS && *used_instructions_count == 0) {
91 : if (HandleBadDecode(ci, result, max_instructions, used_instructions_count,
92 E : &ret)) {
93 E : return ret;
94 : }
95 : }
96 :
97 E : for (unsigned int i = 0; i < *used_instructions_count; ++i) {
98 : // Distorm @229 has a bug where the access size for I_FNSTCW and I_FLDCW
99 : // destination operand is 0 instead of 16. I've filed issue
100 : // http://code.google.com/p/distorm/issues/detail?id=58 to have this fixed.
101 : // In the meantime this is a workaround to have the correct operand size.
102 E : switch (result[i].opcode) {
103 : case I_FNSTCW:
104 : case I_FLDCW:
105 : // If result[i].ops[0].size is not zero that means that distorm has been
106 : // fixed and that this workaround is not needed anymore.
107 E : DCHECK(result[i].ops[0].size == 0);
108 E : result[i].ops[0].size = 16;
109 E : break;
110 : case I_FST:
111 : case I_FSTP:
112 : case I_FIST:
113 : case I_FISTP:
114 : // Distorm @229 has a bug, the flag do no reflect the memory store.
115 : // https://code.google.com/p/distorm/issues/detail?id=70
116 : // If FLAG_DST_WR is set that means that distorm has been fixed.
117 E : DCHECK_EQ(0, result[i].flags & FLAG_DST_WR);
118 E : result[i].flags |= FLAG_DST_WR;
119 : break;
120 : default:
121 : break;
122 : }
123 E : }
124 E : return ret;
125 E : }
126 :
127 : bool DecodeOneInstruction(
128 E : uint32 address, const uint8* buffer, size_t length, _DInst* instruction) {
129 E : DCHECK(buffer != NULL);
130 E : DCHECK(instruction != NULL);
131 :
132 E : _CodeInfo code = {};
133 E : code.dt = Decode32Bits;
134 E : code.features = DF_NONE;
135 E : code.codeOffset = address;
136 E : code.codeLen = length;
137 E : code.code = buffer;
138 :
139 E : unsigned int decoded = 0;
140 E : ::memset(instruction, 0, sizeof(*instruction));
141 E : _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
142 :
143 E : if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
144 i : return false;
145 :
146 : // It's possible for the decode to fail as having decoded a single partially
147 : // valid instruction (ie: valid prefix of an instruction, waiting on more
148 : // data), in which case it will return MEMORYERR (wants more data) and a
149 : // decoded length of zero.
150 E : if (decoded == 0)
151 i : return false;
152 :
153 E : DCHECK_GE(length, instruction->size);
154 E : DCHECK_LT(0, instruction->size);
155 :
156 E : return true;
157 E : }
158 :
159 : bool DecodeOneInstruction(
160 E : const uint8* buffer, size_t length, _DInst* instruction) {
161 E : DCHECK(buffer != NULL);
162 E : DCHECK(instruction != NULL);
163 E : if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
164 i : return false;
165 E : return true;
166 E : }
167 :
168 : bool InstructionToString(
169 : const _DInst& instruction,
170 : const uint8_t* data,
171 : int code_length,
172 E : std::string* buffer) {
173 E : DCHECK(data != NULL);
174 E : DCHECK(buffer != NULL);
175 :
176 E : _CodeInfo code = {};
177 E : code.codeOffset = 0;
178 E : code.code = data;
179 E : code.codeLen = code_length;
180 E : code.dt = Decode32Bits;
181 E : _DecodedInst decoded = {};
182 E : _DInst dinst = instruction;
183 :
184 E : dinst.addr = 0;
185 E : distorm_format64(&code, &dinst, &decoded);
186 :
187 : *buffer = base::StringPrintf("%-14s %s %s",
188 : decoded.instructionHex.p,
189 : decoded.mnemonic.p,
190 E : decoded.operands.p);
191 E : return true;
192 E : }
193 :
194 E : bool IsNop(const _DInst& instruction) {
195 E : switch (instruction.opcode) {
196 : default:
197 : // Only the sequences recognized below qualify as NOP instructions.
198 E : return false;
199 :
200 : case I_XCHG:
201 : // This handles the 1 bytes NOP sequence.
202 : // 1-byte: xchg eax, eax.
203 : return instruction.ops[0].type == O_REG &&
204 : instruction.ops[0].index == RM_AX &&
205 : instruction.ops[1].type == O_REG &&
206 i : instruction.ops[1].index == RM_AX;
207 :
208 : case I_NOP:
209 : // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
210 : // 2-byte: 66 NOP
211 : // 4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
212 : // 5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
213 : // 7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
214 : // 8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
215 : // 9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
216 E : return true;
217 :
218 : case I_LEA:
219 : // This handles the 3 and 6 byte NOP sequences.
220 : // 3-byte: LEA REG, 0 (REG) (8-bit displacement)
221 : // 6-byte: LEA REG, 0 (REG) (32-bit displacement)
222 : return instruction.ops[0].type == O_REG &&
223 : instruction.ops[1].type == O_SMEM &&
224 : instruction.ops[0].index == instruction.ops[1].index &&
225 E : instruction.disp == 0;
226 :
227 : case I_MOV:
228 : // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
229 : return instruction.ops[0].type == O_REG &&
230 : instruction.ops[1].type == O_REG &&
231 E : instruction.ops[0].index == instruction.ops[1].index;
232 : }
233 E : }
234 :
235 E : bool IsCall(const _DInst& instruction) {
236 E : return META_GET_FC(instruction.meta) == FC_CALL;
237 E : }
238 :
239 E : bool IsReturn(const _DInst& instruction) {
240 E : return META_GET_FC(instruction.meta) == FC_RET;
241 E : }
242 :
243 E : bool IsSystemCall(const _DInst& instruction) {
244 E : return META_GET_FC(instruction.meta) == FC_SYS;
245 E : }
246 :
247 E : bool IsConditionalBranch(const _DInst& instruction) {
248 E : return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
249 E : }
250 :
251 E : bool IsUnconditionalBranch(const _DInst& instruction) {
252 E : return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
253 E : }
254 :
255 E : bool IsBranch(const _DInst& instruction) {
256 E : return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
257 E : }
258 :
259 E : bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
260 E : DCHECK_LE(0, operand_index);
261 E : DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
262 E : return instruction.ops[operand_index].type == O_PC;
263 E : }
264 :
265 E : bool IsControlFlow(const _DInst& instruction) {
266 : // For the purposes of Syzygy we include all of the control flow altering
267 : // instruction EXCEPT for call as true control flow.
268 : return IsBranch(instruction) ||
269 : IsReturn(instruction) ||
270 E : IsSystemCall(instruction);
271 E : }
272 :
273 E : bool IsImplicitControlFlow(const _DInst& instruction) {
274 : // Control flow jumps implicitly out of the block for RET and SYS
275 E : if (IsReturn(instruction) || IsSystemCall(instruction))
276 E : return true;
277 :
278 : // Control flow is implicit for non PC-relative jumps (i.e., explicit
279 : // branches where the target is computed, stored in a register, stored
280 : // in a memory location, or otherwise indirect).
281 : if (IsUnconditionalBranch(instruction) &&
282 E : !HasPcRelativeOperand(instruction, 0)) {
283 E : return true;
284 : }
285 :
286 : // Otherwise it's not implicit control flow.
287 E : return false;
288 E : }
289 :
290 E : bool IsInterrupt(const _DInst& instruction) {
291 E : return META_GET_FC(instruction.meta) == FC_INT;
292 E : }
293 :
294 E : bool IsDebugInterrupt(const _DInst& instruction) {
295 : return IsInterrupt(instruction) && instruction.size == 1 &&
296 E : instruction.opcode == I_INT_3;
297 E : }
298 :
299 E : _RegisterType GetRegisterType(const Register& reg) {
300 E : return GetRegisterType(reg.id());
301 E : }
302 :
303 E : _RegisterType GetRegisterType(RegisterId reg_id) {
304 : static const _RegisterType kRegisterTypesById[kRegisterMax] = {
305 : R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH, // 8-bit.
306 : R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI, // 16-bit.
307 : R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI // 32-bit.
308 : };
309 E : DCHECK_LE(kRegisterMin, reg_id);
310 E : DCHECK_GT(kRegisterMax, reg_id);
311 E : return kRegisterTypesById[reg_id];
312 E : }
313 :
314 E : RegisterId GetRegisterId(uint32 distorm_reg_type) {
315 E : switch (distorm_reg_type) {
316 : // 8-bit registers.
317 E : case R_AL: return kRegisterAl;
318 i : case R_CL: return kRegisterCl;
319 i : case R_DL: return kRegisterDl;
320 i : case R_BL: return kRegisterBl;
321 i : case R_AH: return kRegisterAh;
322 i : case R_CH: return kRegisterCh;
323 i : case R_DH: return kRegisterDh;
324 E : case R_BH: return kRegisterBh;
325 :
326 : // 16-bit registers.
327 i : case R_AX: return kRegisterAx;
328 E : case R_CX: return kRegisterCx;
329 i : case R_DX: return kRegisterDx;
330 i : case R_BX: return kRegisterBx;
331 E : case R_SP: return kRegisterSp;
332 i : case R_BP: return kRegisterBp;
333 i : case R_SI: return kRegisterSi;
334 i : case R_DI: return kRegisterDi;
335 :
336 : // 32-bit registers.
337 E : case R_EAX: return kRegisterEax;
338 E : case R_ECX: return kRegisterEcx;
339 E : case R_EDX: return kRegisterEdx;
340 E : case R_EBX: return kRegisterEbx;
341 E : case R_ESP: return kRegisterEsp;
342 E : case R_EBP: return kRegisterEbp;
343 E : case R_ESI: return kRegisterEsi;
344 E : case R_EDI: return kRegisterEdi;
345 :
346 i : default: return kRegisterNone;
347 : }
348 E : }
349 :
350 E : const Register& GetRegister(uint32 distorm_reg_type) {
351 E : return Register::Get(GetRegisterId(distorm_reg_type));
352 E : }
353 :
354 : } // namespace core
|