1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/disassembler_util.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/stringprintf.h"
19 : #include "mnemonics.h" // NOLINT
20 :
21 : namespace core {
22 :
23 : _DecodeResult DistormDecompose(_CodeInfo* ci,
24 : _DInst result[],
25 : unsigned int max_instructions,
26 E : unsigned int* used_instructions_count) {
27 : _DecodeResult ret =
28 E : distorm_decompose(ci, result, max_instructions, used_instructions_count);
29 :
30 E : for (unsigned int i = 0; i < *used_instructions_count; ++i) {
31 : // Distorm @229 has a bug where the access size for I_FNSTCW and I_FLDCW
32 : // destination operand is 0 instead of 16. I've filed issue
33 : // http://code.google.com/p/distorm/issues/detail?id=58 to have this fixed.
34 : // In the meantime this is a workaround to have the correct operand size.
35 E : switch (result[i].opcode) {
36 : case I_FNSTCW:
37 : case I_FLDCW:
38 : // If result[i].ops[0].size is not zero that means that distorm has been
39 : // fixed and that this workaround is not needed anymore.
40 E : DCHECK(result[i].ops[0].size == 0);
41 E : result[i].ops[0].size = 16;
42 E : break;
43 : case I_FST:
44 : case I_FSTP:
45 : case I_FIST:
46 : case I_FISTP:
47 : // Distorm @229 has a bug, the flag do no reflect the memory store.
48 : // https://code.google.com/p/distorm/issues/detail?id=70
49 : // If FLAG_DST_WR is set that means that distorm has been fixed.
50 E : DCHECK_EQ(0, result[i].flags & FLAG_DST_WR);
51 E : result[i].flags |= FLAG_DST_WR;
52 : break;
53 : default:
54 : break;
55 : }
56 E : }
57 E : return ret;
58 E : }
59 :
60 : bool DecodeOneInstruction(
61 E : uint32 address, const uint8* buffer, size_t length, _DInst* instruction) {
62 E : DCHECK(buffer != NULL);
63 E : DCHECK(instruction != NULL);
64 :
65 E : _CodeInfo code = {};
66 E : code.dt = Decode32Bits;
67 E : code.features = DF_NONE;
68 E : code.codeOffset = address;
69 E : code.codeLen = length;
70 E : code.code = buffer;
71 :
72 E : unsigned int decoded = 0;
73 E : ::memset(instruction, 0, sizeof(*instruction));
74 E : _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
75 :
76 E : if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
77 i : return false;
78 :
79 E : DCHECK_EQ(1u, decoded);
80 E : DCHECK_GE(length, instruction->size);
81 E : DCHECK_LT(0, instruction->size);
82 :
83 E : return true;
84 E : }
85 :
86 : bool DecodeOneInstruction(
87 E : const uint8* buffer, size_t length, _DInst* instruction) {
88 E : DCHECK(buffer != NULL);
89 E : DCHECK(instruction != NULL);
90 E : if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
91 i : return false;
92 E : return true;
93 E : }
94 :
95 : bool InstructionToString(
96 : const _DInst& instruction,
97 : const uint8_t* data,
98 : int code_length,
99 E : std::string* buffer) {
100 E : DCHECK(data != NULL);
101 E : DCHECK(buffer != NULL);
102 :
103 E : _CodeInfo code = {};
104 E : code.codeOffset = 0;
105 E : code.code = data;
106 E : code.codeLen = code_length;
107 E : code.dt = Decode32Bits;
108 E : _DecodedInst decoded = {};
109 E : _DInst dinst = instruction;
110 :
111 E : dinst.addr = 0;
112 E : distorm_format64(&code, &dinst, &decoded);
113 :
114 : *buffer = base::StringPrintf("%-14s %s %s",
115 : decoded.instructionHex.p,
116 : decoded.mnemonic.p,
117 E : decoded.operands.p);
118 E : return true;
119 E : }
120 :
121 E : bool IsNop(const _DInst& instruction) {
122 E : switch (instruction.opcode) {
123 : default:
124 : // Only the sequences recognized below qualify as NOP instructions.
125 E : return false;
126 :
127 : case I_XCHG:
128 : // This handles the 1 bytes NOP sequence.
129 : // 1-byte: xchg eax, eax.
130 : return instruction.ops[0].type == O_REG &&
131 : instruction.ops[0].index == RM_AX &&
132 : instruction.ops[1].type == O_REG &&
133 i : instruction.ops[1].index == RM_AX;
134 :
135 : case I_NOP:
136 : // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
137 : // 2-byte: 66 NOP
138 : // 4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
139 : // 5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
140 : // 7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
141 : // 8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
142 : // 9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
143 E : return true;
144 :
145 : case I_LEA:
146 : // This handles the 3 and 6 byte NOP sequences.
147 : // 3-byte: LEA REG, 0 (REG) (8-bit displacement)
148 : // 6-byte: LEA REG, 0 (REG) (32-bit displacement)
149 : return instruction.ops[0].type == O_REG &&
150 : instruction.ops[1].type == O_SMEM &&
151 : instruction.ops[0].index == instruction.ops[1].index &&
152 E : instruction.disp == 0;
153 :
154 : case I_MOV:
155 : // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
156 : return instruction.ops[0].type == O_REG &&
157 : instruction.ops[1].type == O_REG &&
158 E : instruction.ops[0].index == instruction.ops[1].index;
159 : }
160 E : }
161 :
162 E : bool IsCall(const _DInst& instruction) {
163 E : return META_GET_FC(instruction.meta) == FC_CALL;
164 E : }
165 :
166 E : bool IsReturn(const _DInst& instruction) {
167 E : return META_GET_FC(instruction.meta) == FC_RET;
168 E : }
169 :
170 E : bool IsSystemCall(const _DInst& instruction) {
171 E : return META_GET_FC(instruction.meta) == FC_SYS;
172 E : }
173 :
174 E : bool IsConditionalBranch(const _DInst& instruction) {
175 E : return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
176 E : }
177 :
178 E : bool IsUnconditionalBranch(const _DInst& instruction) {
179 E : return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
180 E : }
181 :
182 E : bool IsBranch(const _DInst& instruction) {
183 E : return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
184 E : }
185 :
186 E : bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
187 E : DCHECK_LE(0, operand_index);
188 E : DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
189 E : return instruction.ops[operand_index].type == O_PC;
190 E : }
191 :
192 E : bool IsControlFlow(const _DInst& instruction) {
193 : // For the purposes of Syzygy we include all of the control flow altering
194 : // instruction EXCEPT for call as true control flow.
195 : return IsBranch(instruction) ||
196 : IsReturn(instruction) ||
197 E : IsSystemCall(instruction);
198 E : }
199 :
200 E : bool IsImplicitControlFlow(const _DInst& instruction) {
201 : // Control flow jumps implicitly out of the block for RET and SYS
202 E : if (IsReturn(instruction) || IsSystemCall(instruction))
203 E : return true;
204 :
205 : // Control flow is implicit for non PC-relative jumps (i.e., explicit
206 : // branches where the target is computed, stored in a register, stored
207 : // in a memory location, or otherwise indirect).
208 : if (IsUnconditionalBranch(instruction) &&
209 E : !HasPcRelativeOperand(instruction, 0)) {
210 E : return true;
211 : }
212 :
213 : // Otherwise it's not implicit control flow.
214 E : return false;
215 E : }
216 :
217 E : bool IsInterrupt(const _DInst& instruction) {
218 E : return META_GET_FC(instruction.meta) == FC_INT;
219 E : }
220 :
221 E : bool IsDebugInterrupt(const _DInst& instruction) {
222 : return IsInterrupt(instruction) && instruction.size == 1 &&
223 E : instruction.opcode == I_INT_3;
224 E : }
225 :
226 : } // namespace core
|