1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A class that attempts to disassemble a function into basic blocks.
16 : //
17 : // Given a function block (dubbed macro block), this disassembler attempts to
18 : // cut it up into sequences of contiguous instruction runs and data blocks. A
19 : // contiguous instruction run is defined as a set of instructions that under
20 : // normal operation will always run from start to end. This class requires that
21 : // all external references to addresses within a function block have an
22 : // associated label.
23 :
24 : #ifndef SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
25 : #define SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
26 :
27 : #include <set>
28 : #include <string>
29 :
30 : #include "base/basictypes.h"
31 : #include "base/callback.h"
32 : #include "base/strings/string_piece.h"
33 : #include "syzygy/block_graph/basic_block.h"
34 : #include "syzygy/block_graph/basic_block_subgraph.h"
35 : #include "syzygy/block_graph/block_graph.h"
36 : #include "syzygy/core/address.h"
37 : #include "syzygy/core/disassembler.h"
38 : #include "distorm.h" // NOLINT
39 :
40 : namespace block_graph {
41 :
42 : // This class re-disassembles an already-processed code block (referred to
43 : // herein as a macro block) and breaks it up into basic blocks.
44 : //
45 : // A basic block is defined here as one of:
46 : //
47 : // 1) A series of code instructions that will be executed contiguously.
48 : // 2) A chunk of data (as determined by it being labeled as such).
49 : // 3) Padding (or unreachable code)
50 : //
51 : // The break-down into basic blocks happens in six passes:
52 : //
53 : // 1) Code disassembly starting from the block's inbound code references. This
54 : // carves all of the basic code blocks and creates control flow (successor)
55 : // relationships. While the basic blocks are being created, intra-block
56 : // successors cannot be resolved and are instead referenced by block offset;
57 : // inter-block successors are immediately resolved.
58 : // 2) Data block construction to carve out statically embedded data.
59 : // 3) Padding block construction to fill any gaps.
60 : // 4) Copying all inbound references (referrers) to their corresponding
61 : // basic block.
62 : // 5) Copying all references originating in the block to their corresponding
63 : // basic block.
64 : // 6) Wiring up all unresolved intra-block successors.
65 : //
66 : // The block to be decomposed must have been produced by a successful
67 : // PE decomposition (or some facsimile thereof).
68 : class BasicBlockDecomposer {
69 : public:
70 : typedef core::AbsoluteAddress AbsoluteAddress;
71 : typedef block_graph::BasicBlock BasicBlock;
72 : typedef BasicBlock::BasicBlockType BasicBlockType;
73 : typedef block_graph::BlockGraph BlockGraph;
74 : typedef BlockGraph::Offset Offset;
75 : typedef block_graph::BasicBlockSubGraph BasicBlockSubGraph;
76 :
77 : // Initialize the BasicBlockDecomposer instance.
78 : // @param block The block to be decomposed
79 : // @param subgraph The basic-block sub-graph data structure to populate.
80 : // This can be NULL if the results of the decomposition aren't
81 : // necessary.
82 : BasicBlockDecomposer(const BlockGraph::Block* block,
83 : BasicBlockSubGraph* subgraph);
84 :
85 : // Decomposes a function macro block into its constituent basic blocks.
86 : //
87 : // Immediately following a successful decomposition of a block to
88 : // basic-blocks, subgraph will contain all the basic-blocks found in the
89 : // source block and exactly one block description: that of the source block.
90 : //
91 : // Following decomposition, additional block descriptions can be created,
92 : // new basic blocks added, and basic blocks shuffled between the descriptions.
93 : // The subgraph can then be coalesced back into the BlockGraph from
94 : // which the original block came.
95 : //
96 : // This can set various status bits indicating the reason for the failure.
97 : bool Decompose();
98 :
99 : // @returns true if the decomposition failed because of unsupported
100 : // instructions.
101 E : bool contains_unsupported_instructions() const {
102 E : return contains_unsupported_instructions_;
103 E : }
104 :
105 : protected:
106 : typedef std::map<Offset, BasicBlockReference> BasicBlockReferenceMap;
107 : typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
108 : typedef BlockGraph::Block::SourceRange SourceRange;
109 : typedef BlockGraph::Size Size;
110 : typedef std::set<Offset> JumpTargets;
111 :
112 : // Returns the source range that coincides with the data range
113 : // [@p offset, [@p offset + @p size) in the original block.
114 : SourceRange GetSourceRange(Offset offset, Size size) const;
115 :
116 : // Find the basic block, and corresponding byte-range, that contains the
117 : // given offset.
118 : // @param offset the starting offset you with the returned basic-block/range
119 : // to contain.
120 : // @param basic_block the basic-block containing @p offset.
121 : // @param range the byte-range in which @p basic_offset resides, which
122 : // contains @p offset.
123 : bool FindBasicBlock(Offset offset,
124 : BasicBlock** basic_block,
125 : BBAddressSpace::Range* range) const;
126 :
127 : // Find the basic block that begins at the given offset.
128 : // @param offset The starting offset of the basic block you want to find.
129 : // @pre The basic block subgraph is derived from an original block (and
130 : // thus has an address space) and has been broken down into all of its
131 : // constituent basic blocks (i.e., post disassembly and basic-block
132 : // splitting).
133 : BasicBlock* GetBasicBlockAt(Offset offset) const;
134 :
135 : // Helper function to end the current basic block and begin a new one
136 : // at @p offset. This is only to be used during disassembly.
137 : bool EndCurrentBasicBlock(Offset end_offset);
138 :
139 : // Walk the function's code in a linear fashion, decomposing the block into
140 : // code and data basic blocks forming an original address space.
141 : bool Disassemble();
142 :
143 : // Determines the code range of the block, and creates any data blocks. This
144 : // will return false if an invalid block layout is encountered.
145 : // @param end Will be filled in with the end of the code range.
146 : bool GetCodeRangeAndCreateDataBasicBlocks(Offset* end);
147 :
148 : // Performs an initial linear pass at disassembling the code bytes of the
149 : // block into rudimentary basic blocks. The initial set of basic blocks are
150 : // each terminated at branch points. A subsequent pass will further split
151 : // basic blocks at branch destinations, see SplitCodeBlocksAtBranchTargets().
152 : bool ParseInstructions();
153 :
154 : // @name Helpers for ParseInstructions().
155 : // @{
156 :
157 : // Initializes jump_targets_ to the set of referenced code locations.
158 : // This covers all locations which are externally referenced, as well as
159 : // those that are internally referenced via jump tables. These jump targets
160 : // may be otherwise un-discoverable through disassembly.
161 : // @param code_end_offset the first offset above @p code_begin_offset at
162 : // which the bytes are no longer code.
163 : void InitJumpTargets(Offset code_end_offset);
164 :
165 : // Decode the bytes at @p offset into @p instruction. This function takes
166 : // into consideration the range of offsets which denote code.
167 : // @param offset The offset of into block_ at which to start decoding.
168 : // @param code_end_offset The offset at which the bytes cease to be code.
169 : // @param instruction this value will be populated on success.
170 : // @returns true on success; false otherwise.
171 : // @note Used by ParseInstructions().
172 : bool DecodeInstruction(Offset offset,
173 : Offset code_end_offset,
174 : Instruction* instruction) const;
175 :
176 : // Called for each instruction, this creates the Instruction object
177 : // corresponding to @p instruction, or terminates the current basic block
178 : // if @p instruction is a branch point.
179 : // @param instruction the decoded instruction.
180 : // @param offset the offset at which @p instruction occurs in the block.
181 : // @returns true on success; false otherwise.
182 : // @note Used by ParseInstructions().
183 : bool HandleInstruction(const Instruction& instruction, Offset offset);
184 :
185 : // @}
186 :
187 : // @name Validation functions.
188 : // @{
189 : // Verifies that every identified jump target in the original code block
190 : // resolves to the start of a basic code block in the original code blocks
191 : // basic-block address space. This is protected for unit-testing purposes.
192 : void CheckAllJumpTargetsStartABasicCodeBlock() const;
193 :
194 : // Verifies that the address space derived from the original code block
195 : // fully covers the original code block. This is protected for unit-testing
196 : // purposes.
197 : void CheckHasCompleteBasicBlockCoverage() const;
198 :
199 : // Verifies that all basic blocks in the address space derived from the
200 : // original code block have valid successors or end in an instruction that
201 : // does not yield successors. This is protected for unit-testing purposes.
202 : void CheckAllControlFlowIsValid() const;
203 :
204 : // Verifies that all labels in the original block are present in the
205 : // decomposed basic-block subgraph.
206 : void CheckAllLabelsArePreserved() const;
207 : // @}
208 :
209 : // Split code basic-blocks at branch targets such that no basic-block
210 : // has a reference that it not to its head.
211 : void SplitCodeBlocksAtBranchTargets();
212 :
213 : // Propagate the referrers from the original block into the basic blocks
214 : // so that referrers can be tracked as the basic blocks are manipulated.
215 : void CopyExternalReferrers();
216 :
217 : // Helper function to populate @p refs with the set of references originating
218 : // from its source range in the original block.
219 : void CopyReferences(Offset item_offset,
220 : Size item_size,
221 : BasicBlockReferenceMap* refs);
222 :
223 : // Propagate the references from the original block into the basic blocks
224 : // so that they can be tracked as the basic blocks are manipulated.
225 : void CopyReferences();
226 :
227 : // Resolve intra-block control flow references and referrers.
228 : void ResolveSuccessors();
229 :
230 : // Convert any unreachable code basic block into padding basic blocks.
231 : void MarkUnreachableCodeAsPadding();
232 :
233 : // Inserts a basic block range into the decomposition.
234 : bool InsertBasicBlockRange(Offset offset,
235 : size_t size,
236 : BasicBlockType type);
237 :
238 : // The block being disassembled.
239 : const BlockGraph::Block* const block_;
240 :
241 : // The basic-block sub-graph to which the block will be decomposed.
242 : BasicBlockSubGraph* subgraph_;
243 :
244 : // The layout of the original block into basic blocks in subgraph_.
245 : BBAddressSpace original_address_space_;
246 :
247 : // Tracks locations our conditional branches jump to. Used to fix up basic
248 : // blocks by breaking up those that have a jump target in the middle.
249 : JumpTargets jump_targets_;
250 :
251 : // The start offset of the current basic block during a walk.
252 : Offset current_block_start_;
253 :
254 : // The list of instructions in the current basic block.
255 : BasicBlock::Instructions current_instructions_;
256 :
257 : // The set of successors for the current basic block.
258 : BasicBlock::Successors current_successors_;
259 :
260 : // A debugging flag indicating whether the decomposition results should be
261 : // CHECKed.
262 : bool check_decomposition_results_;
263 :
264 : // If no explicit subgraph was provided then we need to use one as scratch
265 : // space in order to do some work.
266 : scoped_ptr<BasicBlockSubGraph> scratch_subgraph_;
267 :
268 : // Decomposition failure flags.
269 : bool contains_unsupported_instructions_;
270 : };
271 :
272 : } // namespace block_graph
273 :
274 : #endif // SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
|