1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A class that attempts to disassemble a function into basic blocks.
16 : //
17 : // Given a function block (dubbed macro block), this disassembler attempts to
18 : // cut it up into sequences of contiguous instruction runs and data blocks. A
19 : // contiguous instruction run is defined as a set of instructions that under
20 : // normal operation will always run from start to end. This class requires that
21 : // all external references to addresses within a function block have an
22 : // associated label.
23 :
24 : #ifndef SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
25 : #define SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
26 :
27 : #include <set>
28 : #include <string>
29 :
30 : #include "base/basictypes.h"
31 : #include "base/callback.h"
32 : #include "base/string_piece.h"
33 : #include "syzygy/block_graph/basic_block.h"
34 : #include "syzygy/block_graph/basic_block_subgraph.h"
35 : #include "syzygy/block_graph/block_graph.h"
36 : #include "syzygy/core/address.h"
37 : #include "syzygy/core/disassembler.h"
38 : #include "distorm.h" // NOLINT
39 :
40 m : namespace block_graph {
41 :
42 : // This class re-disassembles an already-processed code block (referred to
43 : // herein as a macro block) and breaks it up into basic blocks.
44 : //
45 : // A basic block is defined here as one of:
46 : //
47 : // 1) A series of code instructions that will be executed contiguously.
48 : // 2) A chunk of data (as determined by it being labeled as such).
49 : // 3) Padding (or unreachable code)
50 : //
51 : // The break-down into basic blocks happens in six passes:
52 : //
53 : // 1) Code disassembly starting from the block's inbound code references. This
54 : // carves all of the basic code blocks and creates control flow (successor)
55 : // relationships. While the basic blocks are being created, intra-block
56 : // successors cannot be resolved and are instead referenced by block offset;
57 : // inter-block successors are immediately resolved.
58 : // 2) Data block construction to carve out statically embedded data.
59 : // 3) Padding block construction to fill any gaps.
60 : // 4) Copying all inbound references (referrers) to their corresponding
61 : // basic block.
62 : // 5) Copying all references originating in the block to their corresponding
63 : // basic block.
64 : // 6) Wiring up all unresolved intra-block successors.
65 : //
66 : // The block to be decomposed must have been produced by a successful
67 : // PE decomposition (or some facsimile thereof).
68 : //
69 : // TODO(rogerm): Refactor the Disassembler interface to expose more callbacks.
70 : // The BasicBlockDecomposer doesn't really have an IS-A relationship with
71 : // the Disassembler, but inherits so as to over-ride the various event
72 : // handlers the Disassembler exposes to itself. Private inheritance would
73 : // fit nicely here, but is not allowed by the style guide. We could also
74 : // create an adapter class that forwards the events, but that's about the
75 : // same as fixing the Disassembler properly, hence this TODO.
76 m : class BasicBlockDecomposer : public core::Disassembler {
77 m : public:
78 m : typedef core::AbsoluteAddress AbsoluteAddress;
79 m : typedef block_graph::BasicBlock BasicBlock;
80 m : typedef BasicBlock::BasicBlockType BasicBlockType;
81 m : typedef block_graph::BlockGraph BlockGraph;
82 m : typedef BlockGraph::Offset Offset;
83 m : typedef block_graph::BasicBlockSubGraph BasicBlockSubGraph;
84 :
85 : // Initialize the BasicBlockDecomposer instance.
86 : // @param block The block to be decomposed
87 : // @param subgraph The basic-block sub-graph data structure to populate.
88 : // @pre block is safe for basic-block decomposition. That is,
89 : // CodeBlockAttributesAreBasicBlockSafe(block) returns true.
90 m : BasicBlockDecomposer(const BlockGraph::Block* block,
91 m : BasicBlockSubGraph* subgraph);
92 :
93 : // Decomposes a function macro block into its constituent basic blocks.
94 : //
95 : // Immediately following a successful decomposition of a block to
96 : // basic-blocks, subgraph will contain all the basic-blocks found in the
97 : // source block and exactly one block description: that of the source block.
98 : //
99 : // Following decomposition, additional block descriptions can be created,
100 : // new basic blocks added, and basic blocks shuffled between the descriptions.
101 : // The subgraph can then be coalesced back into the BlockGraph from
102 : // which the original block came.
103 m : bool Decompose();
104 :
105 m : protected:
106 : // Set up the queue of addresses to disassemble from as well as the set of
107 : // internal jump targets. Called from the constructors.
108 m : void InitUnvisitedAndJumpTargets();
109 :
110 : // Overrides from Disassembler. See syzygy/core/disassembler.h for comments.
111 : // @{
112 m : virtual CallbackDirective OnInstruction(AbsoluteAddress addr,
113 m : const _DInst& inst) OVERRIDE;
114 m : virtual CallbackDirective OnBranchInstruction(AbsoluteAddress addr,
115 m : const _DInst& inst,
116 m : AbsoluteAddress dest) OVERRIDE;
117 m : virtual CallbackDirective OnStartInstructionRun(
118 m : AbsoluteAddress start_address) OVERRIDE;
119 m : virtual CallbackDirective OnEndInstructionRun(
120 m : AbsoluteAddress addr,
121 m : const _DInst& inst,
122 m : ControlFlowFlag control_flow) OVERRIDE;
123 m : virtual CallbackDirective OnDisassemblyComplete() OVERRIDE;
124 : // @}
125 :
126 : // @name Validation functions.
127 : // @{
128 : // Verifies that every identified jump target in the original code block
129 : // resolves to the start of a basic code block in the original code blocks
130 : // basic-block address space. This is protected for unit-testing purposes.
131 m : void CheckAllJumpTargetsStartABasicCodeBlock() const;
132 :
133 : // Verifies that the address space derived from the original code block
134 : // fully covers the original code block. This is protected for unit-testing
135 : // purposes.
136 m : void CheckHasCompleteBasicBlockCoverage() const;
137 :
138 : // Verifies that all basic blocks in the address space derived from the
139 : // original code block have valid successors or end in an instruction that
140 : // does not yield successors. This is protected for unit-testing purposes.
141 m : void CheckAllControlFlowIsValid() const;
142 :
143 : // Verifies that all labels in the original block are present in the
144 : // decomposed basic-block subgraph.
145 m : void CheckAllLabelsArePreserved() const;
146 : // @}
147 :
148 : // Split code basic-blocks at branch targets such that no basic-block
149 : // has a reference that it not to its head.
150 m : bool SplitCodeBlocksAtBranchTargets();
151 :
152 : // Creates basic blocks for all known data symbols in the block.
153 : // @returns true on success.
154 m : bool FillInDataBlocks();
155 :
156 : // Fills in all gaps in the range
157 : // [code_addr_, code_addr_ + code_size_[ with padding basic blocks.
158 : // @returns true on success.
159 m : bool FillInPaddingBlocks();
160 :
161 : // Propagate the referrers from the original block into the basic blocks
162 : // so that referrers can be tracked as the basic blocks are manipulated.
163 m : bool CopyExternalReferrers();
164 :
165 : // Helper function to populate @p item with the set of references to
166 : // originating from its source range in the original block. I.e., if item is
167 : // an instruction that occupied bytes j through k in the original block, then
168 : // all references found between bytes j through k of the original block will
169 : // be copied to the set of references tracked by @p item.
170 m : template<typename Item>
171 m : bool CopyReferences(Item* item);
172 :
173 : // Propagate the references from the original block into the basic blocks
174 : // so that they can be tracked as the basic blocks are manipulated.
175 m : bool CopyReferences();
176 :
177 : // Resolve intra-block control flow references and referrers.
178 m : bool ResolveSuccessors();
179 :
180 : // Inserts a basic block range into the decompsition.
181 m : bool InsertBasicBlockRange(AbsoluteAddress addr,
182 m : size_t size,
183 m : BasicBlockType type);
184 :
185 : // The block being disassembled.
186 m : const BlockGraph::Block* const block_;
187 :
188 : // The basic-block sub-graph to which the block will be decomposed.
189 m : BasicBlockSubGraph* subgraph_;
190 :
191 : // Tracks locations our conditional branches jump to. Used to fix up basic
192 : // blocks by breaking up those that have a jump target in the middle.
193 m : AddressSet jump_targets_;
194 :
195 : // The start of the current basic block during a walk.
196 m : AbsoluteAddress current_block_start_;
197 :
198 : // The list of instructions in the current basic block.
199 m : BasicBlock::Instructions current_instructions_;
200 :
201 : // The set of successors for the current basic block.
202 m : BasicBlock::Successors current_successors_;
203 :
204 : // A debugging flag indicating whether the decomposition results should be
205 : // CHECKed.
206 m : bool check_decomposition_results_;
207 m : };
208 :
209 m : } // namespace block_graph
210 :
211 : #endif // SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
|