1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of disassembler.
16 : #include "syzygy/core/disassembler.h"
17 :
18 : #include "base/logging.h"
19 : #include "base/strings/stringprintf.h"
20 : #include "syzygy/core/disassembler_util.h"
21 :
22 : namespace core {
23 :
24 : Disassembler::Disassembler(const uint8_t* code,
25 : size_t code_size,
26 : AbsoluteAddress code_addr,
27 : const InstructionCallback& on_instruction)
28 E : : code_(code),
29 E : code_size_(code_size),
30 E : code_addr_(code_addr),
31 E : on_instruction_(on_instruction),
32 E : disassembled_bytes_(0) {
33 E : }
34 :
35 : Disassembler::Disassembler(const uint8_t* code,
36 : size_t code_size,
37 : AbsoluteAddress code_addr,
38 : const AddressSet& entry_points,
39 : const InstructionCallback& on_instruction)
40 : : code_(code),
41 : code_size_(code_size),
42 : code_addr_(code_addr),
43 : on_instruction_(on_instruction),
44 : disassembled_bytes_(0) {
45 : AddressSet::const_iterator it = entry_points.begin();
46 : for (; it != entry_points.end(); ++it)
47 : Unvisited(*it);
48 : }
49 :
50 E : Disassembler::~Disassembler() {
51 E : }
52 :
53 : Disassembler::CallbackDirective Disassembler::OnInstruction(
54 E : AbsoluteAddress addr, const _DInst& inst) {
55 E : return kDirectiveContinue;
56 E : }
57 :
58 : Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
59 E : AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
60 E : return kDirectiveContinue;
61 E : }
62 :
63 : Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
64 E : AbsoluteAddress start_address) {
65 E : return kDirectiveContinue;
66 E : }
67 :
68 : Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
69 E : AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
70 E : return kDirectiveContinue;
71 E : }
72 :
73 E : Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
74 E : return kDirectiveContinue;
75 E : }
76 :
77 E : Disassembler::WalkResult Disassembler::Walk() {
78 : // Initialize our disassembly state.
79 E : _CodeInfo code = {};
80 E : code.dt = Decode32Bits;
81 E : code.features = DF_NONE;
82 :
83 : // This is to keep track of whether we cover the entire function.
84 E : bool incomplete_branches = false;
85 :
86 E : while (!unvisited_.empty()) {
87 E : AddressSet::iterator it = unvisited_.begin();
88 E : AbsoluteAddress addr(*it);
89 E : unvisited_.erase(it);
90 :
91 : // Unvisited addresses must be within the code block we're currently
92 : // disassembling.
93 E : DCHECK_LE(code_addr_, addr);
94 E : DCHECK_GT(code_addr_ + code_size_, addr);
95 :
96 : // Notify of the beginning of a new instruction run.
97 E : if (OnStartInstructionRun(addr) == kDirectiveAbort)
98 i : return kWalkError;
99 :
100 : // This continues disassembly along a contiguous instruction run until we
101 : // run out of code, jump somewhere else, or are requested to terminate the
102 : // path by the OnInstruction callback. We call notification methods to
103 : // notify of the start of a run, the end of a run and when branch
104 : // instructions with computable destination addresses are hit.
105 E : bool terminate = false;
106 E : ControlFlowFlag control_flow = kControlFlowTerminates;
107 E : _DInst inst = {};
108 E : for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
109 E : code.codeOffset = addr.value();
110 E : code.codeLen = code_size_ - (addr - code_addr_);
111 E : code.code = code_ + (addr - code_addr_);
112 E : if (code.codeLen == 0)
113 i : break;
114 :
115 E : bool conditional_branch_handled = false;
116 :
117 E : unsigned int decoded = 0;
118 E : _DecodeResult result = DistormDecompose(&code, &inst, 1, &decoded);
119 :
120 E : if (decoded == 0) {
121 i : LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
122 :
123 : // Dump the next few bytes. The longest X86 instruction possible is 15
124 : // bytes according to distorm.
125 i : int max_bytes = code.codeLen;
126 i : if (max_bytes > 15)
127 i : max_bytes = 15;
128 i : std::string dump;
129 i : for (int i = 0; i < max_bytes; ++i) {
130 i : dump += base::StringPrintf(" 0x%02X", code.code[i]);
131 i : }
132 i : LOG(ERROR) << ".text =" << dump
133 : << (max_bytes < code.codeLen ? " ..." : ".");
134 i : return kWalkError;
135 : }
136 :
137 E : CHECK_EQ(1U, decoded);
138 E : CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
139 :
140 : // Try to visit this instruction.
141 E : VisitedSpace::Range range(addr, inst.size);
142 E : if (!visited_.Insert(range, 0)) {
143 : // If the collision is a repeat of a previously disassembled
144 : // instruction at a different offset then something went wrong.
145 i : if (!visited_.ContainsExactly(range)) {
146 i : LOG(ERROR) << "Two disassembled instructions overlap.";
147 i : return kWalkError;
148 : }
149 i : break;
150 : }
151 :
152 : // Tally the code bytes we just disassembled.
153 E : disassembled_bytes_ += inst.size;
154 :
155 : // Invoke the callback and terminate if need be.
156 E : switch (NotifyOnInstruction(addr, inst)) {
157 : case kDirectiveTerminateWalk:
158 E : return kWalkTerminated;
159 :
160 : case kDirectiveAbort:
161 i : return kWalkError;
162 :
163 : case kDirectiveTerminatePath:
164 E : terminate = true;
165 : break;
166 : }
167 :
168 E : uint8_t fc = META_GET_FC(inst.meta);
169 E : switch (fc) {
170 : case FC_NONE:
171 : case FC_CALL:
172 : case FC_CMOV:
173 : // Do nothing with these flow control types.
174 E : break;
175 :
176 : case FC_RET:
177 : // It's a RET instruction, we're done with this branch.
178 E : terminate = true;
179 E : break;
180 :
181 : case FC_SYS:
182 i : incomplete_branches = true;
183 i : terminate = true;
184 i : NOTREACHED() << "Unexpected SYS* instruction encountered";
185 i : break;
186 :
187 : case FC_CND_BRANCH:
188 : // Conditional branch, schedule a visit to the branch-not-taken
189 : // basic block.
190 E : Unvisited(addr + inst.size);
191 : // And fall through to visit branch target.
192 :
193 : case FC_UNC_BRANCH: {
194 E : terminate = true; // The basic block ends here.
195 E : AbsoluteAddress dest;
196 E : switch (inst.ops[0].type) {
197 : case O_REG:
198 : case O_MEM:
199 : // Computed branch, we can't chase this.
200 i : break;
201 :
202 : case O_SMEM:
203 : // Branch to a register, can't chase this.
204 i : break;
205 :
206 : case O_DISP:
207 : // Indirect address, this may be e.g. a jump to an import.
208 : // TODO(siggi): validate that this is so.
209 i : DCHECK_EQ(32, inst.ops[0].size);
210 i : break;
211 :
212 : case O_PC:
213 : // PC relative address.
214 E : dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
215 E : conditional_branch_handled = true;
216 E : break;
217 :
218 : default:
219 i : NOTREACHED() << "Unexpected branch destination type";
220 : break;
221 : }
222 :
223 : // Make sure to visit the branch destination.
224 E : if (dest != AbsoluteAddress(0)) {
225 E : if (IsInBlock(dest))
226 E : Unvisited(dest);
227 : }
228 :
229 : // Notify of a newly-discovered branch destination.
230 E : if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
231 i : return kWalkError;
232 :
233 E : if (dest == AbsoluteAddress(0)) {
234 : // We couldn't compute the destination, if not handled,
235 : // we may have incomplete coverage for the function.
236 i : incomplete_branches =
237 : incomplete_branches || !conditional_branch_handled;
238 : }
239 : }
240 E : break;
241 :
242 : case FC_INT:
243 : // We encounter int3 inline in functions sometimes.
244 i : break;
245 :
246 : default:
247 i : NOTREACHED() << "Unexpected instruction type encountered";
248 i : terminate = true;
249 : break;
250 : }
251 :
252 : // If the next instruction is flagged as a disassembly start point, we
253 : // should end this run of instructions (basic-block) and let it be picked
254 : // up on the next iteration.
255 E : if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
256 E : control_flow = kControlFlowContinues;
257 E : terminate = true;
258 : }
259 E : }
260 :
261 : // Notify that we are terminating an instruction run. Note that we have to
262 : // back up the address by the last instruction size.
263 : if (OnEndInstructionRun(addr - inst.size,
264 : inst,
265 E : control_flow) == kDirectiveAbort)
266 i : return kWalkError;
267 E : }
268 :
269 : // Notify when we've completed disassembly.
270 E : if (OnDisassemblyComplete() == kDirectiveAbort)
271 i : return kWalkError;
272 :
273 : // If we covered every byte in the function, we don't
274 : // care that we didn't chase all computed branches.
275 E : if (incomplete_branches && disassembled_bytes_ == code_size_)
276 i : return kWalkSuccess;
277 :
278 : // Otherwise we return success only in case of no computed branches.
279 E : return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
280 E : }
281 :
282 E : bool Disassembler::Unvisited(AbsoluteAddress addr) {
283 E : DCHECK(IsInBlock(addr));
284 :
285 E : if (visited_.Intersects(addr))
286 i : return false;
287 :
288 E : return unvisited_.insert(addr).second;
289 E : }
290 :
291 : Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
292 E : AbsoluteAddress addr, const _DInst& inst) {
293 : // Invoke our local callback.
294 E : CallbackDirective directive = OnInstruction(addr, inst);
295 :
296 : // Invoke the external callback if we're not already aborted.
297 E : if (directive == kDirectiveContinue && !on_instruction_.is_null())
298 E : directive = on_instruction_.Run(*this, inst);
299 :
300 E : return directive;
301 E : }
302 :
303 E : bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
304 E : return addr >= code_addr_ &&
305 : static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
306 E : }
307 :
308 : } // namespace core
|