1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A class that attempts to disassemble a function into basic blocks.
16 : //
17 : // Given a function block (dubbed macro block), this disassembler attempts to
18 : // cut it up into sequences of contiguous instruction runs and data blocks. A
19 : // contiguous instruction run is defined as a set of instructions that under
20 : // normal operation will always run from start to end. This class requires that
21 : // all external references to addresses within a function block have an
22 : // associated label.
23 :
24 : #ifndef SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
25 : #define SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
26 :
27 : #include <set>
28 : #include <string>
29 :
30 : #include "base/callback.h"
31 : #include "base/strings/string_piece.h"
32 : #include "syzygy/block_graph/basic_block.h"
33 : #include "syzygy/block_graph/basic_block_subgraph.h"
34 : #include "syzygy/block_graph/block_graph.h"
35 : #include "syzygy/core/address.h"
36 : #include "syzygy/core/disassembler.h"
37 : #include "distorm.h" // NOLINT
38 :
39 : namespace block_graph {
40 :
41 : // This class re-disassembles an already-processed code block (referred to
42 : // herein as a macro block) and breaks it up into basic blocks.
43 : //
44 : // A basic block is defined here as one of:
45 : //
46 : // 1) A series of code instructions that will be executed contiguously.
47 : // 2) A chunk of data (as determined by it being labeled as such).
48 : // 3) Padding (or unreachable code)
49 : //
50 : // The break-down into basic blocks happens in six passes:
51 : //
52 : // 1) Code disassembly starting from the block's inbound code references. This
53 : // carves all of the basic code blocks and creates control flow (successor)
54 : // relationships. While the basic blocks are being created, intra-block
55 : // successors cannot be resolved and are instead referenced by block offset;
56 : // inter-block successors are immediately resolved.
57 : // 2) Data block construction to carve out statically embedded data.
58 : // 3) Padding block construction to fill any gaps.
59 : // 4) Copying all inbound references (referrers) to their corresponding
60 : // basic block.
61 : // 5) Copying all references originating in the block to their corresponding
62 : // basic block.
63 : // 6) Wiring up all unresolved intra-block successors.
64 : //
65 : // The block to be decomposed must have been produced by a successful
66 : // PE decomposition (or some facsimile thereof).
67 : class BasicBlockDecomposer {
68 : public:
69 : typedef core::AbsoluteAddress AbsoluteAddress;
70 : typedef block_graph::BasicBlock BasicBlock;
71 : typedef BasicBlock::BasicBlockType BasicBlockType;
72 : typedef block_graph::BlockGraph BlockGraph;
73 : typedef BlockGraph::Offset Offset;
74 : typedef block_graph::BasicBlockSubGraph BasicBlockSubGraph;
75 :
76 : // Initialize the BasicBlockDecomposer instance.
77 : // @param block The block to be decomposed
78 : // @param subgraph The basic-block sub-graph data structure to populate.
79 : // This can be NULL if the results of the decomposition aren't
80 : // necessary.
81 : BasicBlockDecomposer(const BlockGraph::Block* block,
82 : BasicBlockSubGraph* subgraph);
83 :
84 : // Decomposes a function macro block into its constituent basic blocks.
85 : //
86 : // Immediately following a successful decomposition of a block to
87 : // basic-blocks, subgraph will contain all the basic-blocks found in the
88 : // source block and exactly one block description: that of the source block.
89 : //
90 : // Following decomposition, additional block descriptions can be created,
91 : // new basic blocks added, and basic blocks shuffled between the descriptions.
92 : // The subgraph can then be coalesced back into the BlockGraph from
93 : // which the original block came.
94 : //
95 : // This can set various status bits indicating the reason for the failure.
96 : bool Decompose();
97 :
98 : // @returns true if the decomposition failed because of unsupported
99 : // instructions.
100 E : bool contains_unsupported_instructions() const {
101 E : return contains_unsupported_instructions_;
102 E : }
103 :
104 : protected:
105 : typedef std::map<Offset, BasicBlockReference> BasicBlockReferenceMap;
106 : typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
107 : typedef BlockGraph::Block::SourceRange SourceRange;
108 : typedef BlockGraph::Size Size;
109 : typedef std::set<Offset> JumpTargets;
110 :
111 : // Returns the source range that coincides with the data range
112 : // [@p offset, [@p offset + @p size) in the original block.
113 : SourceRange GetSourceRange(Offset offset, Size size) const;
114 :
115 : // Find the basic block, and corresponding byte-range, that contains the
116 : // given offset.
117 : // @param offset the starting offset you with the returned basic-block/range
118 : // to contain.
119 : // @param basic_block the basic-block containing @p offset.
120 : // @param range the byte-range in which @p basic_offset resides, which
121 : // contains @p offset.
122 : bool FindBasicBlock(Offset offset,
123 : BasicBlock** basic_block,
124 : BBAddressSpace::Range* range) const;
125 :
126 : // Find the basic block that begins at the given offset.
127 : // @param offset The starting offset of the basic block you want to find.
128 : // @pre The basic block subgraph is derived from an original block (and
129 : // thus has an address space) and has been broken down into all of its
130 : // constituent basic blocks (i.e., post disassembly and basic-block
131 : // splitting).
132 : BasicBlock* GetBasicBlockAt(Offset offset) const;
133 :
134 : // Helper function to end the current basic block and begin a new one
135 : // at @p offset. This is only to be used during disassembly.
136 : bool EndCurrentBasicBlock(Offset end_offset);
137 :
138 : // Walk the function's code in a linear fashion, decomposing the block into
139 : // code and data basic blocks forming an original address space.
140 : bool Disassemble();
141 :
142 : // Determines the code range of the block, and creates any data blocks. This
143 : // will return false if an invalid block layout is encountered.
144 : // @param end Will be filled in with the end of the code range.
145 : bool GetCodeRangeAndCreateDataBasicBlocks(Offset* end);
146 :
147 : // Performs an initial linear pass at disassembling the code bytes of the
148 : // block into rudimentary basic blocks. The initial set of basic blocks are
149 : // each terminated at branch points. A subsequent pass will further split
150 : // basic blocks at branch destinations, see SplitCodeBlocksAtBranchTargets().
151 : bool ParseInstructions();
152 :
153 : // @name Helpers for ParseInstructions().
154 : // @{
155 :
156 : // Initializes jump_targets_ to the set of referenced code locations.
157 : // This covers all locations which are externally referenced, as well as
158 : // those that are internally referenced via jump tables. These jump targets
159 : // may be otherwise un-discoverable through disassembly.
160 : // @param code_end_offset the first offset above @p code_begin_offset at
161 : // which the bytes are no longer code.
162 : void InitJumpTargets(Offset code_end_offset);
163 :
164 : // Decode the bytes at @p offset into @p instruction. This function takes
165 : // into consideration the range of offsets which denote code.
166 : // @param offset The offset of into block_ at which to start decoding.
167 : // @param code_end_offset The offset at which the bytes cease to be code.
168 : // @param instruction this value will be populated on success.
169 : // @returns true on success; false otherwise.
170 : // @note Used by ParseInstructions().
171 : bool DecodeInstruction(Offset offset,
172 : Offset code_end_offset,
173 : Instruction* instruction) const;
174 :
175 : // Called for each instruction, this creates the Instruction object
176 : // corresponding to @p instruction, or terminates the current basic block
177 : // if @p instruction is a branch point.
178 : // @param instruction the decoded instruction.
179 : // @param offset the offset at which @p instruction occurs in the block.
180 : // @returns true on success; false otherwise.
181 : // @note Used by ParseInstructions().
182 : bool HandleInstruction(const Instruction& instruction, Offset offset);
183 :
184 : // @}
185 :
186 : // @name Validation functions.
187 : // @{
188 : // Verifies that every identified jump target in the original code block
189 : // resolves to the start of a basic code block in the original code blocks
190 : // basic-block address space. This is protected for unit-testing purposes.
191 : void CheckAllJumpTargetsStartABasicCodeBlock() const;
192 :
193 : // Verifies that the address space derived from the original code block
194 : // fully covers the original code block. This is protected for unit-testing
195 : // purposes.
196 : void CheckHasCompleteBasicBlockCoverage() const;
197 :
198 : // Verifies that all basic blocks in the address space derived from the
199 : // original code block have valid successors or end in an instruction that
200 : // does not yield successors. This is protected for unit-testing purposes.
201 : void CheckAllControlFlowIsValid() const;
202 :
203 : // Verifies that all labels in the original block are present in the
204 : // decomposed basic-block subgraph.
205 : void CheckAllLabelsArePreserved() const;
206 : // @}
207 :
208 : // Split code basic-blocks at branch targets such that no basic-block
209 : // has a reference that it not to its head.
210 : void SplitCodeBlocksAtBranchTargets();
211 :
212 : // Propagate the referrers from the original block into the basic blocks
213 : // so that referrers can be tracked as the basic blocks are manipulated.
214 : void CopyExternalReferrers();
215 :
216 : // Helper function to populate @p refs with the set of references originating
217 : // from its source range in the original block.
218 : void CopyReferences(Offset item_offset,
219 : Size item_size,
220 : BasicBlockReferenceMap* refs);
221 :
222 : // Propagate the references from the original block into the basic blocks
223 : // so that they can be tracked as the basic blocks are manipulated.
224 : void CopyReferences();
225 :
226 : // Resolve intra-block control flow references and referrers.
227 : void ResolveSuccessors();
228 :
229 : // Convert any unreachable code basic block into padding basic blocks.
230 : void MarkUnreachableCodeAsPadding();
231 :
232 : // Inserts a basic block range into the decomposition.
233 : bool InsertBasicBlockRange(Offset offset,
234 : size_t size,
235 : BasicBlockType type);
236 :
237 : // The block being disassembled.
238 : const BlockGraph::Block* const block_;
239 :
240 : // The basic-block sub-graph to which the block will be decomposed.
241 : BasicBlockSubGraph* subgraph_;
242 :
243 : // The layout of the original block into basic blocks in subgraph_.
244 : BBAddressSpace original_address_space_;
245 :
246 : // Tracks locations our conditional branches jump to. Used to fix up basic
247 : // blocks by breaking up those that have a jump target in the middle.
248 : JumpTargets jump_targets_;
249 :
250 : // The start offset of the current basic block during a walk.
251 : Offset current_block_start_;
252 :
253 : // The list of instructions in the current basic block.
254 : BasicBlock::Instructions current_instructions_;
255 :
256 : // The set of successors for the current basic block.
257 : BasicBlock::Successors current_successors_;
258 :
259 : // A debugging flag indicating whether the decomposition results should be
260 : // CHECKed.
261 : bool check_decomposition_results_;
262 :
263 : // If no explicit subgraph was provided then we need to use one as scratch
264 : // space in order to do some work.
265 : std::unique_ptr<BasicBlockSubGraph> scratch_subgraph_;
266 :
267 : // Decomposition failure flags.
268 : bool contains_unsupported_instructions_;
269 : };
270 :
271 : } // namespace block_graph
272 :
273 : #endif // SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
|