1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A class that attempts to disassemble a function.
16 : #ifndef SYZYGY_CORE_DISASSEMBLER_H_
17 : #define SYZYGY_CORE_DISASSEMBLER_H_
18 :
19 : #include <stdint.h>
20 : #include <set>
21 :
22 : #include "base/callback.h"
23 : #include "syzygy/core/address.h"
24 : #include "syzygy/core/address_space.h"
25 : #include "distorm.h" // NOLINT
26 :
27 : namespace core {
28 :
29 : class Disassembler {
30 : public:
31 : typedef std::set<AbsoluteAddress> AddressSet;
32 : typedef core::AddressSpace<AbsoluteAddress, size_t, uint8_t> VisitedSpace;
33 :
34 : enum CallbackDirective {
35 : // Indicates that the disassembler should continue.
36 : kDirectiveContinue,
37 :
38 : // Indicates that the disassembler should terminate its current
39 : // path in the walk, and continue at the next unvisited location.
40 : kDirectiveTerminatePath,
41 :
42 : // Indicates that the disassembler should halt all disassembly.
43 : kDirectiveTerminateWalk,
44 :
45 : // Indicate that the disassembler should terminate with an error.
46 : kDirectiveAbort
47 : };
48 :
49 : // The instruction callback is invoked for each instruction the disassembler
50 : // encounters. The callback receives three parameters:
51 : // 1. const Disassembler& disasm the disassembler.
52 : // 2. const _DInst& inst the current instruction.
53 : // Returns a CallbackDirective telling the disassembler how to proceed.
54 : typedef base::Callback<CallbackDirective(const Disassembler&,
55 : const _DInst&)>
56 : InstructionCallback;
57 :
58 : enum WalkResult {
59 : // Error during walk - e.g. function is not in our PEImageFile
60 : // or the section is not code, or the OnInstruction callback indicated an
61 : // error status.
62 : kWalkError,
63 :
64 : // Walk was successful and complete.
65 : kWalkSuccess,
66 :
67 : // Walk was incomplete, e.g. it encountered a computed branch or
68 : // similar, so may not have traversed every branch of the function.
69 : kWalkIncomplete,
70 :
71 : // Walk was terminated.
72 : kWalkTerminated,
73 : };
74 :
75 : // These flag values are passed to OnEndInstructionRun.
76 : enum ControlFlowFlag {
77 : // The instruction run ends with an explicit termination of control flow.
78 : kControlFlowTerminates,
79 :
80 : // The instruction implicitly flows into the next instruction run.
81 : kControlFlowContinues,
82 : };
83 :
84 : Disassembler(const uint8_t* code,
85 : size_t code_size,
86 : AbsoluteAddress code_addr,
87 : const InstructionCallback& on_instruction);
88 :
89 : Disassembler(const uint8_t* code,
90 : size_t code_size,
91 : AbsoluteAddress code_addr,
92 : const AddressSet& entry_points,
93 : const InstructionCallback& on_instruction);
94 :
95 : virtual ~Disassembler();
96 :
97 : // Add addr to unvisited set.
98 : // @returns true iff addr is unvisited.
99 : // @pre IsInCode(addr, 1).
100 : bool Unvisited(AbsoluteAddress addr);
101 :
102 : // Attempts to walk function from known entry points.
103 : // Invokes callback for every instruction as it's encountered.
104 : // @returns the results of the walk.
105 : // @note the instructions may be encountered in any order, as the
106 : // disassembler follows the code's control flow.
107 : virtual WalkResult Walk();
108 :
109 : // @name Accessors.
110 : // @{
111 : const uint8_t* code() const { return code_; }
112 : size_t code_size() const { return code_size_; }
113 : const AbsoluteAddress code_addr() const { return code_addr_; }
114 : const AddressSet& unvisited() const { return unvisited_; }
115 : const VisitedSpace& visited() const { return visited_; }
116 E : size_t disassembled_bytes() const { return disassembled_bytes_; }
117 : // @}
118 :
119 : protected:
120 : // Called every time a basic instruction is hit.
121 : // @param addr is the address of the branch instruction itself.
122 : // @param inst is the disassembled instruction data.
123 : // @returns kWalkContinue on success or kWalkError on failure.
124 : virtual CallbackDirective OnInstruction(AbsoluteAddress addr,
125 : const _DInst& inst);
126 :
127 : // Called every time a branch instruction is hit.
128 : // @param addr is the address of the branch instruction itself.
129 : // @param inst is the disassembled instruction data.
130 : // @param dest is the destination address of the branch instruction.
131 : // @returns kWalkContinue on success or kWalkError on failure.
132 : virtual CallbackDirective OnBranchInstruction(AbsoluteAddress addr,
133 : const _DInst& inst,
134 : AbsoluteAddress dest);
135 :
136 : // Called every time disassembly is started from a new address. Will be
137 : // called at least once if unvisited_ is non-empty.
138 : // @param start_address denotes the beginning of the instruction run.
139 : // @returns kWalkContinue on success or kWalkError on failure.
140 : virtual CallbackDirective OnStartInstructionRun(
141 : AbsoluteAddress start_address);
142 :
143 : // Called on every disassembled instruction.
144 : // @param addr is the address of the instruction that terminates the run.
145 : // @param inst is the terminating instruction.
146 : // @param control_flow a flag denoting whether control flow terminates
147 : // for this instruction run, or flows into the next instruction run.
148 : // @returns kWalkContinue on success or kWalkError on failure.
149 : virtual CallbackDirective OnEndInstructionRun(AbsoluteAddress addr,
150 : const _DInst& inst,
151 : ControlFlowFlag control_flow);
152 :
153 : // Called when disassembly is complete and no further entry points remain
154 : // to disassemble from.
155 : // @returns kWalkContinue on success or kWalkError on failure.
156 : virtual CallbackDirective OnDisassemblyComplete();
157 :
158 : // Wrapper function to handle invoking both the internal and external
159 : // OnInstruction() callbacks.
160 : // @param addr is the address of the current instruction.
161 : // @param inst is the instruction.
162 : CallbackDirective NotifyOnInstruction(AbsoluteAddress addr,
163 : const _DInst& inst);
164 :
165 : // @returns true iff the range [addr ... addr + len) is in the function.
166 : bool IsInBlock(AbsoluteAddress addr) const;
167 :
168 : // The code we refer to.
169 : const uint8_t* code_;
170 : const size_t code_size_;
171 :
172 : // The original address of the first byte of code_.
173 : const AbsoluteAddress code_addr_;
174 :
175 : // Invoke this callback on every instruction.
176 : InstructionCallback on_instruction_;
177 :
178 : // Unvisited instruction locations before and during a walk.
179 : // This is seeded by the code entry point(s), and will also contain
180 : // branch targets during disassembly.
181 : AddressSet unvisited_;
182 : // Each visited instruction is stored as a range in this space.
183 : VisitedSpace visited_;
184 :
185 : // Number of bytes disassembled to this point during walk.
186 : size_t disassembled_bytes_;
187 : };
188 :
189 : } // namespace core
190 :
191 : #endif // SYZYGY_CORE_DISASSEMBLER_H_
|