1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of disassembler.
16 : #include "syzygy/core/disassembler.h"
17 :
18 : #include "base/logging.h"
19 : #include "base/stringprintf.h"
20 : #include "syzygy/core/disassembler_util.h"
21 :
22 : namespace core {
23 :
24 : Disassembler::Disassembler(const uint8* code,
25 : size_t code_size,
26 : AbsoluteAddress code_addr,
27 : const InstructionCallback& on_instruction)
28 : : code_(code),
29 : code_size_(code_size),
30 : code_addr_(code_addr),
31 : on_instruction_(on_instruction),
32 E : disassembled_bytes_(0) {
33 E : }
34 :
35 : Disassembler::Disassembler(const uint8* code,
36 : size_t code_size,
37 : AbsoluteAddress code_addr,
38 : const AddressSet& entry_points,
39 : const InstructionCallback& on_instruction)
40 : : code_(code),
41 : code_size_(code_size),
42 : code_addr_(code_addr),
43 : on_instruction_(on_instruction),
44 E : disassembled_bytes_(0) {
45 :
46 E : AddressSet::const_iterator it = entry_points.begin();
47 E : for (; it != entry_points.end(); ++it)
48 E : Unvisited(*it);
49 E : }
50 :
51 E : Disassembler::~Disassembler() {
52 E : }
53 :
54 : Disassembler::CallbackDirective Disassembler::OnInstruction(
55 E : AbsoluteAddress addr, const _DInst& inst) {
56 E : return kDirectiveContinue;
57 E : }
58 :
59 : Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
60 E : AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
61 E : return kDirectiveContinue;
62 E : }
63 :
64 : Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
65 E : AbsoluteAddress start_address) {
66 E : return kDirectiveContinue;
67 E : }
68 :
69 : Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
70 E : AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
71 E : return kDirectiveContinue;
72 E : }
73 :
74 E : Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
75 E : return kDirectiveContinue;
76 E : }
77 :
78 E : Disassembler::WalkResult Disassembler::Walk() {
79 : // Initialize our disassembly state.
80 E : _CodeInfo code = {};
81 E : code.dt = Decode32Bits;
82 E : code.features = DF_NONE;
83 :
84 : // This is to keep track of whether we cover the entire function.
85 E : bool incomplete_branches = false;
86 :
87 E : while (!unvisited_.empty()) {
88 E : AddressSet::iterator it = unvisited_.begin();
89 E : AbsoluteAddress addr(*it);
90 E : unvisited_.erase(it);
91 :
92 : // Unvisited addresses must be within the code block we're currently
93 : // disassembling.
94 E : DCHECK_LE(code_addr_, addr);
95 E : DCHECK_GT(code_addr_ + code_size_, addr);
96 :
97 : // Notify of the beginning of a new instruction run.
98 E : if (OnStartInstructionRun(addr) == kDirectiveAbort)
99 i : return kWalkError;
100 :
101 : // This continues disassembly along a contiguous instruction run until we
102 : // run out of code, jump somewhere else, or are requested to terminate the
103 : // path by the OnInstruction callback. We call notification methods to
104 : // notify of the start of a run, the end of a run and when branch
105 : // instructions with computable destination addresses are hit.
106 E : bool terminate = false;
107 E : ControlFlowFlag control_flow = kControlFlowTerminates;
108 E : _DInst inst = {};
109 E : for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
110 E : code.codeOffset = addr.value();
111 E : code.codeLen = code_size_ - (addr - code_addr_);
112 E : code.code = code_ + (addr - code_addr_);
113 E : if (code.codeLen == 0)
114 E : break;
115 :
116 E : bool conditional_branch_handled = false;
117 :
118 E : unsigned int decoded = 0;
119 E : _DecodeResult result = DistormDecompose(&code, &inst, 1, &decoded);
120 :
121 E : if (decoded == 0) {
122 i : LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
123 :
124 : // Dump the next few bytes. The longest X86 instruction possible is 15
125 : // bytes according to distorm.
126 i : int max_bytes = code.codeLen;
127 i : if (max_bytes > 15)
128 i : max_bytes = 15;
129 i : std::string dump;
130 i : for (int i = 0; i < max_bytes; ++i) {
131 i : dump += base::StringPrintf(" 0x%02X", code.code[i]);
132 i : }
133 i : LOG(ERROR) << ".text =" << dump
134 : << (max_bytes < code.codeLen ? " ..." : ".");
135 i : return kWalkError;
136 : }
137 :
138 E : CHECK_EQ(1U, decoded);
139 E : CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
140 :
141 : // Try to visit this instruction.
142 E : VisitedSpace::Range range(addr, inst.size);
143 E : if (!visited_.Insert(range, 0)) {
144 : // If the collision is a repeat of a previously disassembled
145 : // instruction at a different offset then something went wrong.
146 E : if (!visited_.ContainsExactly(range)) {
147 i : LOG(ERROR) << "Two disassembled instructions overlap.";
148 i : return kWalkError;
149 : }
150 E : break;
151 : }
152 :
153 : // Tally the code bytes we just disassembled.
154 E : disassembled_bytes_ += inst.size;
155 :
156 : // Invoke the callback and terminate if need be.
157 E : switch (NotifyOnInstruction(addr, inst)) {
158 : case kDirectiveTerminateWalk:
159 E : return kWalkTerminated;
160 :
161 : case kDirectiveAbort:
162 i : return kWalkError;
163 :
164 : case kDirectiveTerminatePath:
165 E : terminate = true;
166 :
167 : default:
168 : break;
169 : }
170 :
171 E : uint8 fc = META_GET_FC(inst.meta);
172 E : switch (fc) {
173 : case FC_NONE:
174 : case FC_CALL:
175 : case FC_CMOV:
176 : // Do nothing with these flow control types.
177 E : break;
178 :
179 : case FC_RET:
180 : // It's a RET instruction, we're done with this branch.
181 E : terminate = true;
182 E : break;
183 :
184 : case FC_SYS:
185 i : incomplete_branches = true;
186 i : terminate = true;
187 i : NOTREACHED() << "Unexpected SYS* instruction encountered";
188 i : break;
189 :
190 : case FC_CND_BRANCH:
191 : // Conditional branch, schedule a visit to the branch-not-taken
192 : // basic block.
193 E : Unvisited(addr + inst.size);
194 : // And fall through to visit branch target.
195 :
196 : case FC_UNC_BRANCH: {
197 E : terminate = true; // The basic block ends here.
198 E : AbsoluteAddress dest;
199 E : switch (inst.ops[0].type) {
200 : case O_REG:
201 : case O_MEM:
202 : // Computed branch, we can't chase this.
203 E : break;
204 :
205 : case O_SMEM:
206 : // Branch to a register, can't chase this.
207 i : break;
208 :
209 : case O_DISP:
210 : // Indirect address, this may be e.g. a jump to an import.
211 : // TODO(siggi): validate that this is so.
212 E : DCHECK_EQ(32, inst.ops[0].size);
213 E : break;
214 :
215 : case O_PC:
216 : // PC relative address.
217 E : dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
218 E : conditional_branch_handled = true;
219 E : break;
220 :
221 : default:
222 i : NOTREACHED() << "Unexpected branch destination type";
223 : break;
224 : }
225 :
226 : // Make sure to visit the branch destination.
227 E : if (dest != AbsoluteAddress(0)) {
228 E : if (IsInBlock(dest))
229 E : Unvisited(dest);
230 : }
231 :
232 : // Notify of a newly-discovered branch destination.
233 E : if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
234 i : return kWalkError;
235 :
236 E : if (dest == AbsoluteAddress(0)) {
237 : // We couldn't compute the destination, if not handled,
238 : // we may have incomplete coverage for the function.
239 : incomplete_branches =
240 E : incomplete_branches || !conditional_branch_handled;
241 : }
242 : }
243 E : break;
244 :
245 : case FC_INT:
246 : // We encounter int3 inline in functions sometimes.
247 E : break;
248 :
249 : default:
250 i : NOTREACHED() << "Unexpected instruction type encountered";
251 i : terminate = true;
252 : break;
253 : }
254 :
255 : // If the next instruction is flagged as a disassembly start point, we
256 : // should end this run of instructions (basic-block) and let it be picked
257 : // up on the next iteration.
258 E : if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
259 E : control_flow = kControlFlowContinues;
260 E : terminate = true;
261 : }
262 E : }
263 :
264 : // Notify that we are terminating an instruction run. Note that we have to
265 : // back up the address by the last instruction size.
266 : if (OnEndInstructionRun(addr - inst.size,
267 : inst,
268 E : control_flow) == kDirectiveAbort)
269 i : return kWalkError;
270 E : }
271 :
272 : // Notify when we've completed disassembly.
273 E : if (OnDisassemblyComplete() == kDirectiveAbort)
274 i : return kWalkError;
275 :
276 : // If we covered every byte in the function, we don't
277 : // care that we didn't chase all computed branches.
278 E : if (incomplete_branches && disassembled_bytes_ == code_size_)
279 E : return kWalkSuccess;
280 :
281 : // Otherwise we return success only in case of no computed branches.
282 E : return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
283 E : }
284 :
285 E : bool Disassembler::Unvisited(AbsoluteAddress addr) {
286 E : DCHECK(IsInBlock(addr));
287 :
288 E : if (visited_.Intersects(addr))
289 E : return false;
290 :
291 E : return unvisited_.insert(addr).second;
292 E : }
293 :
294 : Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
295 E : AbsoluteAddress addr, const _DInst& inst) {
296 : // Invoke our local callback.
297 E : CallbackDirective directive = OnInstruction(addr, inst);
298 :
299 : // Invoke the external callback if we're not already aborted.
300 E : if (directive == kDirectiveContinue && !on_instruction_.is_null())
301 E : directive = on_instruction_.Run(*this, inst);
302 :
303 E : return directive;
304 E : }
305 :
306 E : bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
307 : return addr >= code_addr_ &&
308 E : static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
309 E : }
310 :
311 : } // namespace core
|