1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of disassembler.
16 : #include "syzygy/core/disassembler.h"
17 :
18 : #include "base/logging.h"
19 : #include "base/stringprintf.h"
20 :
21 : namespace core {
22 :
23 : Disassembler::Disassembler(const uint8* code,
24 : size_t code_size,
25 : AbsoluteAddress code_addr,
26 : const InstructionCallback& on_instruction)
27 : : code_(code),
28 : code_size_(code_size),
29 : code_addr_(code_addr),
30 : on_instruction_(on_instruction),
31 E : disassembled_bytes_(0) {
32 E : }
33 :
34 : Disassembler::Disassembler(const uint8* code,
35 : size_t code_size,
36 : AbsoluteAddress code_addr,
37 : const AddressSet& entry_points,
38 : const InstructionCallback& on_instruction)
39 : : code_(code),
40 : code_size_(code_size),
41 : code_addr_(code_addr),
42 : on_instruction_(on_instruction),
43 E : disassembled_bytes_(0) {
44 :
45 E : AddressSet::const_iterator it = entry_points.begin();
46 E : for (; it != entry_points.end(); ++it)
47 E : Unvisited(*it);
48 E : }
49 :
50 E : Disassembler::~Disassembler() {
51 E : }
52 :
53 : Disassembler::CallbackDirective Disassembler::OnInstruction(
54 E : AbsoluteAddress addr, const _DInst& inst) {
55 E : return kDirectiveContinue;
56 E : }
57 :
58 : Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
59 E : AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
60 E : return kDirectiveContinue;
61 E : }
62 :
63 : Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
64 E : AbsoluteAddress start_address) {
65 E : return kDirectiveContinue;
66 E : }
67 :
68 : Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
69 E : AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
70 E : return kDirectiveContinue;
71 E : }
72 :
73 E : Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
74 E : return kDirectiveContinue;
75 E : }
76 :
77 E : Disassembler::WalkResult Disassembler::Walk() {
78 : // Initialize our disassembly state.
79 E : _CodeInfo code = {};
80 E : code.dt = Decode32Bits;
81 E : code.features = DF_NONE;
82 :
83 : // This is to keep track of whether we cover the entire function.
84 E : bool incomplete_branches = false;
85 :
86 E : while (!unvisited_.empty()) {
87 E : AddressSet::iterator it = unvisited_.begin();
88 E : AbsoluteAddress addr(*it);
89 E : unvisited_.erase(it);
90 :
91 : // Unvisited addresses must be within the code block we're currently
92 : // disassembling.
93 E : DCHECK_LE(code_addr_, addr);
94 E : DCHECK_GT(code_addr_ + code_size_, addr);
95 :
96 : // Notify of the beginning of a new instruction run.
97 E : if (OnStartInstructionRun(addr) == kDirectiveAbort)
98 i : return kWalkError;
99 :
100 : // This continues disassembly along a contiguous instruction run until we
101 : // run out of code, jump somewhere else, or are requested to terminate the
102 : // path by the OnInstruction callback. We call notification methods to
103 : // notify of the start of a run, the end of a run and when branch
104 : // instructions with computable destination addresses are hit.
105 E : bool terminate = false;
106 E : ControlFlowFlag control_flow = kControlFlowTerminates;
107 E : _DInst inst = {};
108 E : for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
109 E : code.codeOffset = addr.value();
110 E : code.codeLen = code_size_ - (addr - code_addr_);
111 E : code.code = code_ + (addr - code_addr_);
112 E : if (code.codeLen == 0)
113 i : break;
114 :
115 E : bool conditional_branch_handled = false;
116 :
117 E : unsigned int decoded = 0;
118 E : _DecodeResult result = distorm_decompose(&code, &inst, 1, &decoded);
119 :
120 E : if (decoded == 0) {
121 i : LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
122 :
123 : // Dump the next few bytes. The longest X86 instruction possible is 15
124 : // bytes according to distorm.
125 i : int max_bytes = code.codeLen;
126 i : if (max_bytes > 15)
127 i : max_bytes = 15;
128 i : std::string dump;
129 i : for (int i = 0; i < max_bytes; ++i) {
130 i : dump += base::StringPrintf(" 0x%02X", code.code[i]);
131 i : }
132 i : LOG(ERROR) << ".text =" << dump
133 : << (max_bytes < code.codeLen ? " ..." : ".");
134 i : return kWalkError;
135 : }
136 :
137 E : CHECK_EQ(1U, decoded);
138 E : CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
139 :
140 : // Try to visit this instruction.
141 E : VisitedSpace::Range range(addr, inst.size);
142 E : if (!visited_.Insert(range, 0)) {
143 : // If the collision is a repeat of a previously disassembled
144 : // instruction at a different offset then something went wrong.
145 E : if (!visited_.ContainsExactly(range)) {
146 i : LOG(ERROR) << "Two disassembled instructions overlap.";
147 i : return kWalkError;
148 : }
149 E : break;
150 : }
151 :
152 : // Tally the code bytes we just disassembled.
153 E : disassembled_bytes_ += inst.size;
154 :
155 : // Invoke the callback and terminate if need be.
156 E : switch (NotifyOnInstruction(addr, inst)) {
157 : case kDirectiveTerminateWalk:
158 E : return kWalkTerminated;
159 :
160 : case kDirectiveAbort:
161 i : return kWalkError;
162 :
163 : case kDirectiveTerminatePath:
164 E : terminate = true;
165 :
166 : default:
167 : break;
168 : }
169 :
170 E : uint8 fc = META_GET_FC(inst.meta);
171 E : switch (fc) {
172 : case FC_NONE:
173 : case FC_CALL:
174 : case FC_CMOV:
175 : // Do nothing with these flow control types.
176 E : break;
177 :
178 : case FC_RET:
179 : // It's a RET instruction, we're done with this branch.
180 E : terminate = true;
181 E : break;
182 :
183 : case FC_SYS:
184 i : incomplete_branches = true;
185 i : terminate = true;
186 i : NOTREACHED() << "Unexpected SYS* instruction encountered";
187 i : break;
188 :
189 : case FC_CND_BRANCH:
190 : // Conditional branch, schedule a visit to the branch-not-taken
191 : // basic block.
192 E : Unvisited(addr + inst.size);
193 : // And fall through to visit branch target.
194 :
195 : case FC_UNC_BRANCH: {
196 E : terminate = true; // The basic block ends here.
197 E : AbsoluteAddress dest;
198 E : switch (inst.ops[0].type) {
199 : case O_REG:
200 : case O_MEM:
201 : // Computed branch, we can't chase this.
202 E : break;
203 :
204 : case O_SMEM:
205 : // Branch to a register, can't chase this.
206 i : break;
207 :
208 : case O_DISP:
209 : // Indirect address, this may be e.g. a jump to an import.
210 : // TODO(siggi): validate that this is so.
211 E : DCHECK_EQ(32, inst.ops[0].size);
212 E : break;
213 :
214 : case O_PC:
215 : // PC relative address.
216 E : dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
217 E : conditional_branch_handled = true;
218 E : break;
219 :
220 : default:
221 i : NOTREACHED() << "Unexpected branch destination type";
222 : break;
223 : }
224 :
225 : // Make sure to visit the branch destination.
226 E : if (dest != AbsoluteAddress(0)) {
227 E : if (IsInBlock(dest))
228 E : Unvisited(dest);
229 : }
230 :
231 : // Notify of a newly-discovered branch destination.
232 E : if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
233 i : return kWalkError;
234 :
235 E : if (dest == AbsoluteAddress(0)) {
236 : // We couldn't compute the destination, if not handled,
237 : // we may have incomplete coverage for the function.
238 : incomplete_branches =
239 E : incomplete_branches || !conditional_branch_handled;
240 : }
241 : }
242 E : break;
243 :
244 : case FC_INT:
245 : // We encounter int3 inline in functions sometimes.
246 E : break;
247 :
248 : default:
249 i : NOTREACHED() << "Unexpected instruction type encountered";
250 i : terminate = true;
251 : break;
252 : }
253 :
254 : // If the next instruction is flagged as a disassembly start point, we
255 : // should end this run of instructions (basic-block) and let it be picked
256 : // up on the next iteration.
257 E : if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
258 E : control_flow = kControlFlowContinues;
259 E : terminate = true;
260 : }
261 E : }
262 :
263 : // Notify that we are terminating an instruction run. Note that we have to
264 : // back up the address by the last instruction size.
265 : if (OnEndInstructionRun(addr - inst.size,
266 : inst,
267 E : control_flow) == kDirectiveAbort)
268 i : return kWalkError;
269 E : }
270 :
271 : // Notify when we've completed disassembly.
272 E : if (OnDisassemblyComplete() == kDirectiveAbort)
273 i : return kWalkError;
274 :
275 : // If we covered every byte in the function, we don't
276 : // care that we didn't chase all computed branches.
277 E : if (incomplete_branches && disassembled_bytes_ == code_size_)
278 E : return kWalkSuccess;
279 :
280 : // Otherwise we return success only in case of no computed branches.
281 E : return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
282 E : }
283 :
284 E : bool Disassembler::Unvisited(AbsoluteAddress addr) {
285 E : DCHECK(IsInBlock(addr));
286 :
287 E : if (visited_.Intersects(addr))
288 E : return false;
289 :
290 E : return unvisited_.insert(addr).second;
291 E : }
292 :
293 : Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
294 E : AbsoluteAddress addr, const _DInst& inst) {
295 : // Invoke our local callback.
296 E : CallbackDirective directive = OnInstruction(addr, inst);
297 :
298 : // Invoke the external callback if we're not already aborted.
299 E : if (directive == kDirectiveContinue && !on_instruction_.is_null())
300 E : directive = on_instruction_.Run(*this, inst);
301 :
302 E : return directive;
303 E : }
304 :
305 E : bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
306 : return addr >= code_addr_ &&
307 E : static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
308 E : }
309 :
310 : } // namespace core
|