1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A class that attempts to disassemble a function.
16 : #ifndef SYZYGY_CORE_DISASSEMBLER_H_
17 : #define SYZYGY_CORE_DISASSEMBLER_H_
18 :
19 : #include <set>
20 : #include "base/basictypes.h"
21 : #include "base/callback.h"
22 : #include "syzygy/core/address.h"
23 : #include "syzygy/core/address_space.h"
24 : #include "distorm.h" // NOLINT
25 :
26 : namespace core {
27 :
28 : class Disassembler {
29 : public:
30 : typedef std::set<AbsoluteAddress> AddressSet;
31 : typedef core::AddressSpace<AbsoluteAddress, size_t, uint8> VisitedSpace;
32 :
33 : enum CallbackDirective {
34 : // Indicates that the disassembler should continue.
35 : kDirectiveContinue,
36 :
37 : // Indicates that the disassembler should terminate its current
38 : // path in the walk, and continue at the next unvisited location.
39 : kDirectiveTerminatePath,
40 :
41 : // Indicates that the disassembler should halt all disassembly.
42 : kDirectiveTerminateWalk,
43 :
44 : // Indicate that the disassembler should terminate with an error.
45 : kDirectiveAbort
46 : };
47 :
48 : // The instruction callback is invoked for each instruction the disassembler
49 : // encounters. The callback receives three parameters:
50 : // 1. const Disassembler& disasm the disassembler.
51 : // 2. const _DInst& inst the current instruction.
52 : // Returns a CallbackDirective telling the disassembler how to proceed.
53 : typedef base::Callback<CallbackDirective(const Disassembler&,
54 : const _DInst&)>
55 : InstructionCallback;
56 :
57 : enum WalkResult {
58 : // Error during walk - e.g. function is not in our PEImageFile
59 : // or the section is not code, or the OnInstruction callback indicated an
60 : // error status.
61 : kWalkError,
62 :
63 : // Walk was successful and complete.
64 : kWalkSuccess,
65 :
66 : // Walk was incomplete, e.g. it encountered a computed branch or
67 : // similar, so may not have traversed every branch of the function.
68 : kWalkIncomplete,
69 :
70 : // Walk was terminated.
71 : kWalkTerminated,
72 : };
73 :
74 : // These flag values are passed to OnEndInstructionRun.
75 : enum ControlFlowFlag {
76 : // The instruction run ends with an explicit termination of control flow.
77 : kControlFlowTerminates,
78 :
79 : // The instruction implicitly flows into the next instruction run.
80 : kControlFlowContinues,
81 : };
82 :
83 : Disassembler(const uint8* code,
84 : size_t code_size,
85 : AbsoluteAddress code_addr,
86 : const InstructionCallback& on_instruction);
87 :
88 : Disassembler(const uint8* code,
89 : size_t code_size,
90 : AbsoluteAddress code_addr,
91 : const AddressSet& entry_points,
92 : const InstructionCallback& on_instruction);
93 :
94 : virtual ~Disassembler();
95 :
96 : // Add addr to unvisited set.
97 : // @returns true iff addr is unvisited.
98 : // @pre IsInCode(addr, 1).
99 : bool Unvisited(AbsoluteAddress addr);
100 :
101 : // Attempts to walk function from known entry points.
102 : // Invokes callback for every instruction as it's encountered.
103 : // @returns the results of the walk.
104 : // @note the instructions may be encountered in any order, as the
105 : // disassembler follows the code's control flow.
106 : virtual WalkResult Walk();
107 :
108 : // @name Accessors.
109 : // @{
110 : const uint8* code() const { return code_; }
111 : size_t code_size() const { return code_size_; }
112 : const AbsoluteAddress code_addr() const { return code_addr_; }
113 : const AddressSet& unvisited() const { return unvisited_; }
114 : const VisitedSpace& visited() const { return visited_; }
115 E : size_t disassembled_bytes() const { return disassembled_bytes_; }
116 : // @}
117 :
118 : protected:
119 : // Called every time a basic instruction is hit.
120 : // @param addr is the address of the branch instruction itself.
121 : // @param inst is the disassembled instruction data.
122 : // @returns kWalkContinue on success or kWalkError on failure.
123 : virtual CallbackDirective OnInstruction(AbsoluteAddress addr,
124 : const _DInst& inst);
125 :
126 : // Called every time a branch instruction is hit.
127 : // @param addr is the address of the branch instruction itself.
128 : // @param inst is the disassembled instruction data.
129 : // @param dest is the destination address of the branch instruction.
130 : // @returns kWalkContinue on success or kWalkError on failure.
131 : virtual CallbackDirective OnBranchInstruction(AbsoluteAddress addr,
132 : const _DInst& inst,
133 : AbsoluteAddress dest);
134 :
135 : // Called every time disassembly is started from a new address. Will be
136 : // called at least once if unvisited_ is non-empty.
137 : // @param start_address denotes the beginning of the instruction run.
138 : // @returns kWalkContinue on success or kWalkError on failure.
139 : virtual CallbackDirective OnStartInstructionRun(
140 : AbsoluteAddress start_address);
141 :
142 : // Called on every disassembled instruction.
143 : // @param addr is the address of the instruction that terminates the run.
144 : // @param inst is the terminating instruction.
145 : // @param control_flow a flag denoting whether control flow terminates
146 : // for this instruction run, or flows into the next instruction run.
147 : // @returns kWalkContinue on success or kWalkError on failure.
148 : virtual CallbackDirective OnEndInstructionRun(AbsoluteAddress addr,
149 : const _DInst& inst,
150 : ControlFlowFlag control_flow);
151 :
152 : // Called when disassembly is complete and no further entry points remain
153 : // to disassemble from.
154 : // @returns kWalkContinue on success or kWalkError on failure.
155 : virtual CallbackDirective OnDisassemblyComplete();
156 :
157 : // Wrapper function to handle invoking both the internal and external
158 : // OnInstruction() callbacks.
159 : // @param addr is the address of the current instruction.
160 : // @param inst is the instruction.
161 : CallbackDirective NotifyOnInstruction(AbsoluteAddress addr,
162 : const _DInst& inst);
163 :
164 : // @returns true iff the range [addr ... addr + len) is in the function.
165 : bool IsInBlock(AbsoluteAddress addr) const;
166 :
167 : // The code we refer to.
168 : const uint8* code_;
169 : const size_t code_size_;
170 :
171 : // The original address of the first byte of code_.
172 : const AbsoluteAddress code_addr_;
173 :
174 : // Invoke this callback on every instruction.
175 : InstructionCallback on_instruction_;
176 :
177 : // Unvisited instruction locations before and during a walk.
178 : // This is seeded by the code entry point(s), and will also contain
179 : // branch targets during disassembly.
180 : AddressSet unvisited_;
181 : // Each visited instruction is stored as a range in this space.
182 : VisitedSpace visited_;
183 :
184 : // Number of bytes disassembled to this point during walk.
185 : size_t disassembled_bytes_;
186 : };
187 :
188 : } // namespace core
189 :
190 : #endif // SYZYGY_CORE_DISASSEMBLER_H_
|