1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/disassembler_util.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/stringprintf.h"
19 : #include "mnemonics.h" // NOLINT
20 :
21 : namespace core {
22 :
23 : _DecodeResult DistormDecompose(_CodeInfo* ci,
24 : _DInst result[],
25 : unsigned int max_instructions,
26 E : unsigned int* used_instructions_count) {
27 : _DecodeResult ret =
28 E : distorm_decompose(ci, result, max_instructions, used_instructions_count);
29 :
30 E : for (unsigned int i = 0; i < *used_instructions_count; ++i) {
31 : // Distorm @229 has a bug where the access size for I_FNSTCW and I_FLDCW
32 : // destination operand is 0 instead of 16. I've filed issue
33 : // http://code.google.com/p/distorm/issues/detail?id=58 to have this fixed.
34 : // In the meantime this is a workaround to have the correct operand size.
35 E : switch (result[i].opcode) {
36 : case I_FNSTCW:
37 : case I_FLDCW:
38 : // If result[i].ops[0].size is not zero that means that distorm has been
39 : // fixed and that this workaround is not needed anymore.
40 E : DCHECK(result[i].ops[0].size == 0);
41 E : result[i].ops[0].size = 16;
42 : break;
43 : default:
44 : break;
45 : }
46 E : }
47 E : return ret;
48 E : }
49 :
50 : bool DecodeOneInstruction(
51 E : uint32 address, const uint8* buffer, size_t length, _DInst* instruction) {
52 E : DCHECK(buffer != NULL);
53 E : DCHECK(instruction != NULL);
54 :
55 E : _CodeInfo code = {};
56 E : code.dt = Decode32Bits;
57 E : code.features = DF_NONE;
58 E : code.codeOffset = address;
59 E : code.codeLen = length;
60 E : code.code = buffer;
61 :
62 E : unsigned int decoded = 0;
63 E : ::memset(instruction, 0, sizeof(instruction));
64 E : _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
65 :
66 E : if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
67 i : return false;
68 :
69 E : DCHECK_EQ(1u, decoded);
70 E : DCHECK_GE(length, instruction->size);
71 E : DCHECK_LT(0, instruction->size);
72 :
73 E : return true;
74 E : }
75 :
76 : bool DecodeOneInstruction(
77 E : const uint8* buffer, size_t length, _DInst* instruction) {
78 E : DCHECK(buffer != NULL);
79 E : DCHECK(instruction != NULL);
80 E : if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
81 i : return false;
82 E : return true;
83 E : }
84 :
85 : bool InstructionToString(
86 : const _DInst& instruction,
87 : const uint8_t* data,
88 : int code_length,
89 E : std::string* buffer) {
90 E : DCHECK(data != NULL);
91 E : DCHECK(buffer != NULL);
92 :
93 E : _CodeInfo code = {};
94 E : code.codeOffset = 0;
95 E : code.code = data;
96 E : code.codeLen = code_length;
97 E : code.dt = Decode32Bits;
98 E : _DecodedInst decoded = {};
99 E : _DInst dinst = instruction;
100 :
101 E : dinst.addr = 0;
102 E : distorm_format64(&code, &dinst, &decoded);
103 :
104 : *buffer = base::StringPrintf("%-14s %s %s",
105 : decoded.instructionHex.p,
106 : decoded.mnemonic.p,
107 E : decoded.operands.p);
108 E : return true;
109 E : }
110 :
111 E : bool IsNop(const _DInst& instruction) {
112 E : switch (instruction.opcode) {
113 : default:
114 : // Only the sequences recognized below qualify as NOP instructions.
115 E : return false;
116 :
117 : case I_XCHG:
118 : // This handles the 1 bytes NOP sequence.
119 : // 1-byte: xchg eax, eax.
120 : return instruction.ops[0].type == O_REG &&
121 : instruction.ops[0].index == RM_AX &&
122 : instruction.ops[1].type == O_REG &&
123 i : instruction.ops[1].index == RM_AX;
124 :
125 : case I_NOP:
126 : // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
127 : // 2-byte: 66 NOP
128 : // 4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
129 : // 5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
130 : // 7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
131 : // 8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
132 : // 9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
133 E : return true;
134 :
135 : case I_LEA:
136 : // This handles the 3 and 6 byte NOP sequences.
137 : // 3-byte: LEA REG, 0 (REG) (8-bit displacement)
138 : // 6-byte: LEA REG, 0 (REG) (32-bit displacement)
139 : return instruction.ops[0].type == O_REG &&
140 : instruction.ops[1].type == O_SMEM &&
141 : instruction.ops[0].index == instruction.ops[1].index &&
142 E : instruction.disp == 0;
143 :
144 : case I_MOV:
145 : // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
146 : return instruction.ops[0].type == O_REG &&
147 : instruction.ops[1].type == O_REG &&
148 E : instruction.ops[0].index == instruction.ops[1].index;
149 : }
150 E : }
151 :
152 E : bool IsCall(const _DInst& instruction) {
153 E : return META_GET_FC(instruction.meta) == FC_CALL;
154 E : }
155 :
156 E : bool IsReturn(const _DInst& instruction) {
157 E : return META_GET_FC(instruction.meta) == FC_RET;
158 E : }
159 :
160 E : bool IsSystemCall(const _DInst& instruction) {
161 E : return META_GET_FC(instruction.meta) == FC_SYS;
162 E : }
163 :
164 E : bool IsConditionalBranch(const _DInst& instruction) {
165 E : return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
166 E : }
167 :
168 E : bool IsUnconditionalBranch(const _DInst& instruction) {
169 E : return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
170 E : }
171 :
172 E : bool IsBranch(const _DInst& instruction) {
173 E : return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
174 E : }
175 :
176 E : bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
177 E : DCHECK_LE(0, operand_index);
178 E : DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
179 E : return instruction.ops[operand_index].type == O_PC;
180 E : }
181 :
182 E : bool IsControlFlow(const _DInst& instruction) {
183 : // For the purposes of Syzygy we include all of the control flow altering
184 : // instruction EXCEPT for call as true control flow.
185 : return IsBranch(instruction) ||
186 : IsReturn(instruction) ||
187 E : IsSystemCall(instruction);
188 E : }
189 :
190 E : bool IsImplicitControlFlow(const _DInst& instruction) {
191 : // Control flow jumps implicitly out of the block for RET and SYS
192 E : if (IsReturn(instruction) || IsSystemCall(instruction))
193 E : return true;
194 :
195 : // Control flow is implicit for non PC-relative jumps (i.e., explicit
196 : // branches where the target is computed, stored in a register, stored
197 : // in a memory location, or otherwise indirect).
198 : if (IsUnconditionalBranch(instruction) &&
199 E : !HasPcRelativeOperand(instruction, 0)) {
200 E : return true;
201 : }
202 :
203 : // Otherwise it's not implicit control flow.
204 E : return false;
205 E : }
206 :
207 E : bool IsInterrupt(const _DInst& instruction) {
208 E : return META_GET_FC(instruction.meta) == FC_INT;
209 E : }
210 :
211 E : bool IsDebugInterrupt(const _DInst& instruction) {
212 : return IsInterrupt(instruction) && instruction.size == 1 &&
213 E : instruction.opcode == I_INT_3;
214 E : }
215 :
216 : } // namespace core
|