1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of disassembler.
16 : #include "syzygy/core/disassembler.h"
17 :
18 : #include "base/logging.h"
19 : #include "base/strings/stringprintf.h"
20 : #include "syzygy/core/disassembler_util.h"
21 :
22 : namespace core {
23 :
24 : Disassembler::Disassembler(const uint8* code,
25 : size_t code_size,
26 : AbsoluteAddress code_addr,
27 : const InstructionCallback& on_instruction)
28 : : code_(code),
29 : code_size_(code_size),
30 : code_addr_(code_addr),
31 : on_instruction_(on_instruction),
32 E : disassembled_bytes_(0) {
33 E : }
34 :
35 : Disassembler::Disassembler(const uint8* code,
36 : size_t code_size,
37 : AbsoluteAddress code_addr,
38 : const AddressSet& entry_points,
39 : const InstructionCallback& on_instruction)
40 : : code_(code),
41 : code_size_(code_size),
42 : code_addr_(code_addr),
43 : on_instruction_(on_instruction),
44 : disassembled_bytes_(0) {
45 :
46 : AddressSet::const_iterator it = entry_points.begin();
47 : for (; it != entry_points.end(); ++it)
48 : Unvisited(*it);
49 : }
50 :
51 E : Disassembler::~Disassembler() {
52 E : }
53 :
54 : Disassembler::CallbackDirective Disassembler::OnInstruction(
55 E : AbsoluteAddress addr, const _DInst& inst) {
56 E : return kDirectiveContinue;
57 E : }
58 :
59 : Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
60 E : AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
61 E : return kDirectiveContinue;
62 E : }
63 :
64 : Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
65 E : AbsoluteAddress start_address) {
66 E : return kDirectiveContinue;
67 E : }
68 :
69 : Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
70 E : AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
71 E : return kDirectiveContinue;
72 E : }
73 :
74 E : Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
75 E : return kDirectiveContinue;
76 E : }
77 :
78 E : Disassembler::WalkResult Disassembler::Walk() {
79 : // Initialize our disassembly state.
80 E : _CodeInfo code = {};
81 E : code.dt = Decode32Bits;
82 E : code.features = DF_NONE;
83 :
84 : // This is to keep track of whether we cover the entire function.
85 E : bool incomplete_branches = false;
86 :
87 E : while (!unvisited_.empty()) {
88 E : AddressSet::iterator it = unvisited_.begin();
89 E : AbsoluteAddress addr(*it);
90 E : unvisited_.erase(it);
91 :
92 : // Unvisited addresses must be within the code block we're currently
93 : // disassembling.
94 E : DCHECK_LE(code_addr_, addr);
95 E : DCHECK_GT(code_addr_ + code_size_, addr);
96 :
97 : // Notify of the beginning of a new instruction run.
98 E : if (OnStartInstructionRun(addr) == kDirectiveAbort)
99 i : return kWalkError;
100 :
101 : // This continues disassembly along a contiguous instruction run until we
102 : // run out of code, jump somewhere else, or are requested to terminate the
103 : // path by the OnInstruction callback. We call notification methods to
104 : // notify of the start of a run, the end of a run and when branch
105 : // instructions with computable destination addresses are hit.
106 E : bool terminate = false;
107 E : ControlFlowFlag control_flow = kControlFlowTerminates;
108 E : _DInst inst = {};
109 E : for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
110 E : code.codeOffset = addr.value();
111 E : code.codeLen = code_size_ - (addr - code_addr_);
112 E : code.code = code_ + (addr - code_addr_);
113 E : if (code.codeLen == 0)
114 i : break;
115 :
116 E : bool conditional_branch_handled = false;
117 :
118 E : unsigned int decoded = 0;
119 E : _DecodeResult result = DistormDecompose(&code, &inst, 1, &decoded);
120 :
121 E : if (decoded == 0) {
122 i : LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
123 :
124 : // Dump the next few bytes. The longest X86 instruction possible is 15
125 : // bytes according to distorm.
126 i : int max_bytes = code.codeLen;
127 i : if (max_bytes > 15)
128 i : max_bytes = 15;
129 i : std::string dump;
130 i : for (int i = 0; i < max_bytes; ++i) {
131 i : dump += base::StringPrintf(" 0x%02X", code.code[i]);
132 i : }
133 i : LOG(ERROR) << ".text =" << dump
134 : << (max_bytes < code.codeLen ? " ..." : ".");
135 i : return kWalkError;
136 : }
137 :
138 E : CHECK_EQ(1U, decoded);
139 E : CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
140 :
141 : // Try to visit this instruction.
142 E : VisitedSpace::Range range(addr, inst.size);
143 E : if (!visited_.Insert(range, 0)) {
144 : // If the collision is a repeat of a previously disassembled
145 : // instruction at a different offset then something went wrong.
146 i : if (!visited_.ContainsExactly(range)) {
147 i : LOG(ERROR) << "Two disassembled instructions overlap.";
148 i : return kWalkError;
149 : }
150 i : break;
151 : }
152 :
153 : // Tally the code bytes we just disassembled.
154 E : disassembled_bytes_ += inst.size;
155 :
156 : // Invoke the callback and terminate if need be.
157 E : switch (NotifyOnInstruction(addr, inst)) {
158 : case kDirectiveTerminateWalk:
159 E : return kWalkTerminated;
160 :
161 : case kDirectiveAbort:
162 i : return kWalkError;
163 :
164 : case kDirectiveTerminatePath:
165 E : terminate = true;
166 : break;
167 : }
168 :
169 E : uint8 fc = META_GET_FC(inst.meta);
170 E : switch (fc) {
171 : case FC_NONE:
172 : case FC_CALL:
173 : case FC_CMOV:
174 : // Do nothing with these flow control types.
175 E : break;
176 :
177 : case FC_RET:
178 : // It's a RET instruction, we're done with this branch.
179 E : terminate = true;
180 E : break;
181 :
182 : case FC_SYS:
183 i : incomplete_branches = true;
184 i : terminate = true;
185 i : NOTREACHED() << "Unexpected SYS* instruction encountered";
186 i : break;
187 :
188 : case FC_CND_BRANCH:
189 : // Conditional branch, schedule a visit to the branch-not-taken
190 : // basic block.
191 E : Unvisited(addr + inst.size);
192 : // And fall through to visit branch target.
193 :
194 : case FC_UNC_BRANCH: {
195 E : terminate = true; // The basic block ends here.
196 E : AbsoluteAddress dest;
197 E : switch (inst.ops[0].type) {
198 : case O_REG:
199 : case O_MEM:
200 : // Computed branch, we can't chase this.
201 i : break;
202 :
203 : case O_SMEM:
204 : // Branch to a register, can't chase this.
205 i : break;
206 :
207 : case O_DISP:
208 : // Indirect address, this may be e.g. a jump to an import.
209 : // TODO(siggi): validate that this is so.
210 i : DCHECK_EQ(32, inst.ops[0].size);
211 i : break;
212 :
213 : case O_PC:
214 : // PC relative address.
215 E : dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
216 E : conditional_branch_handled = true;
217 E : break;
218 :
219 : default:
220 i : NOTREACHED() << "Unexpected branch destination type";
221 : break;
222 : }
223 :
224 : // Make sure to visit the branch destination.
225 E : if (dest != AbsoluteAddress(0)) {
226 E : if (IsInBlock(dest))
227 E : Unvisited(dest);
228 : }
229 :
230 : // Notify of a newly-discovered branch destination.
231 E : if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
232 i : return kWalkError;
233 :
234 E : if (dest == AbsoluteAddress(0)) {
235 : // We couldn't compute the destination, if not handled,
236 : // we may have incomplete coverage for the function.
237 : incomplete_branches =
238 i : incomplete_branches || !conditional_branch_handled;
239 : }
240 : }
241 E : break;
242 :
243 : case FC_INT:
244 : // We encounter int3 inline in functions sometimes.
245 i : break;
246 :
247 : default:
248 i : NOTREACHED() << "Unexpected instruction type encountered";
249 i : terminate = true;
250 : break;
251 : }
252 :
253 : // If the next instruction is flagged as a disassembly start point, we
254 : // should end this run of instructions (basic-block) and let it be picked
255 : // up on the next iteration.
256 E : if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
257 E : control_flow = kControlFlowContinues;
258 E : terminate = true;
259 : }
260 E : }
261 :
262 : // Notify that we are terminating an instruction run. Note that we have to
263 : // back up the address by the last instruction size.
264 : if (OnEndInstructionRun(addr - inst.size,
265 : inst,
266 E : control_flow) == kDirectiveAbort)
267 i : return kWalkError;
268 E : }
269 :
270 : // Notify when we've completed disassembly.
271 E : if (OnDisassemblyComplete() == kDirectiveAbort)
272 i : return kWalkError;
273 :
274 : // If we covered every byte in the function, we don't
275 : // care that we didn't chase all computed branches.
276 E : if (incomplete_branches && disassembled_bytes_ == code_size_)
277 i : return kWalkSuccess;
278 :
279 : // Otherwise we return success only in case of no computed branches.
280 E : return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
281 E : }
282 :
283 E : bool Disassembler::Unvisited(AbsoluteAddress addr) {
284 E : DCHECK(IsInBlock(addr));
285 :
286 E : if (visited_.Intersects(addr))
287 i : return false;
288 :
289 E : return unvisited_.insert(addr).second;
290 E : }
291 :
292 : Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
293 E : AbsoluteAddress addr, const _DInst& inst) {
294 : // Invoke our local callback.
295 E : CallbackDirective directive = OnInstruction(addr, inst);
296 :
297 : // Invoke the external callback if we're not already aborted.
298 E : if (directive == kDirectiveContinue && !on_instruction_.is_null())
299 E : directive = on_instruction_.Run(*this, inst);
300 :
301 E : return directive;
302 E : }
303 :
304 E : bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
305 : return addr >= code_addr_ &&
306 E : static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
307 E : }
308 :
309 : } // namespace core
|