1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implements the BasicBlockEntryHookTransform class.
16 :
17 : #include "syzygy/instrument/transforms/basic_block_entry_hook_transform.h"
18 :
19 : #include "base/logging.h"
20 : #include "base/string_util.h"
21 : #include "base/stringprintf.h"
22 : #include "syzygy/block_graph/block_builder.h"
23 : #include "syzygy/block_graph/block_util.h"
24 : #include "syzygy/common/basic_block_frequency_data.h"
25 : #include "syzygy/common/defs.h"
26 : #include "syzygy/instrument/transforms/entry_thunk_transform.h"
27 : #include "syzygy/pe/block_util.h"
28 : #include "syzygy/pe/pe_utils.h"
29 : #include "syzygy/pe/transforms/add_imports_transform.h"
30 :
31 : namespace instrument {
32 : namespace transforms {
33 :
34 : namespace {
35 :
36 : using ::common::kBasicBlockEntryAgentId;
37 : using block_graph::BasicBlock;
38 : using block_graph::BasicCodeBlock;
39 : using block_graph::BasicBlockAssembler;
40 : using block_graph::BlockBuilder;
41 : using block_graph::BlockGraph;
42 : using block_graph::Displacement;
43 : using block_graph::Immediate;
44 : using block_graph::Operand;
45 : using pe::transforms::AddImportsTransform;
46 :
47 : typedef AddImportsTransform::ImportedModule ImportedModule;
48 : typedef BasicBlockEntryHookTransform::RelativeAddressRange RelativeAddressRange;
49 :
50 : const char kDefaultModuleName[] = "basic_block_entry.dll";
51 : const char kBasicBlockEnter[] = "_basic_block_enter";
52 :
53 : // Compares two relative address ranges to see if they overlap. Assumes they
54 : // are already sorted. This is used to validate basic-block ranges.
55 : struct RelativeAddressRangesOverlapFunctor {
56 : bool operator()(const RelativeAddressRange& r1,
57 E : const RelativeAddressRange& r2) const {
58 E : DCHECK_LT(r1.start(), r2.start());
59 :
60 E : if (r1.end() > r2.start())
61 i : return true;
62 :
63 E : return false;
64 E : }
65 : };
66 :
67 : // Sets up the basic-block entry hook import.
68 : bool SetupEntryHook(BlockGraph* block_graph,
69 : BlockGraph::Block* header_block,
70 : const std::string& module_name,
71 E : BlockGraph::Reference* basic_block_enter) {
72 E : DCHECK(block_graph != NULL);
73 E : DCHECK(header_block != NULL);
74 E : DCHECK(basic_block_enter != NULL);
75 :
76 : // Setup the import module.
77 E : ImportedModule module(module_name);
78 : size_t bb_index = module.AddSymbol(kBasicBlockEnter,
79 E : ImportedModule::kAlwaysImport);
80 :
81 : // Setup the add-imports transform.
82 E : AddImportsTransform add_imports;
83 E : add_imports.AddModule(&module);
84 :
85 : // Add the imports to the block-graph.
86 E : if (!ApplyBlockGraphTransform(&add_imports, block_graph, header_block)) {
87 i : LOG(ERROR) << "Unable to add import entry for basic-block hook function.";
88 i : return false;
89 : }
90 :
91 : // Get a reference to the entry-hook function.
92 E : if (!module.GetSymbolReference(bb_index, basic_block_enter)) {
93 i : LOG(ERROR) << "Unable to get " << kBasicBlockEnter << ".";
94 i : return false;
95 : }
96 E : DCHECK(basic_block_enter->IsValid());
97 :
98 E : return true;
99 E : }
100 :
101 : } // namespace
102 :
103 : const char BasicBlockEntryHookTransform::kTransformName[] =
104 : "BasicBlockEntryHookTransform";
105 :
106 : BasicBlockEntryHookTransform::BasicBlockEntryHookTransform()
107 : : add_frequency_data_(kBasicBlockEntryAgentId,
108 : "Basic-Block Frequency Data",
109 : common::kBasicBlockFrequencyDataVersion),
110 : thunk_section_(NULL),
111 : instrument_dll_name_(kDefaultModuleName),
112 E : set_src_ranges_for_thunks_(false) {
113 E : }
114 :
115 : bool BasicBlockEntryHookTransform::PreBlockGraphIteration(
116 : BlockGraph* block_graph,
117 E : BlockGraph::Block* header_block) {
118 E : DCHECK(block_graph != NULL);
119 E : DCHECK(header_block != NULL);
120 :
121 : // Setup basic block entry hook.
122 : if (!SetupEntryHook(block_graph,
123 : header_block,
124 : instrument_dll_name_,
125 E : &bb_entry_hook_ref_)) {
126 i : return false;
127 : }
128 :
129 : // Add the static basic-block frequency data.
130 : if (!ApplyBlockGraphTransform(
131 E : &add_frequency_data_, block_graph, header_block)) {
132 i : LOG(ERROR) << "Failed to insert basic-block frequency data.";
133 i : return false;
134 : }
135 :
136 : // Find or create the section we put our thunks in.
137 : thunk_section_ = block_graph->FindOrAddSection(common::kThunkSectionName,
138 E : pe::kCodeCharacteristics);
139 E : DCHECK(thunk_section_ != NULL);
140 :
141 E : return true;
142 E : }
143 :
144 : bool BasicBlockEntryHookTransform::OnBlock(BlockGraph* block_graph,
145 E : BlockGraph::Block* block) {
146 E : DCHECK(block_graph != NULL);
147 E : DCHECK(block != NULL);
148 :
149 E : if (block->type() != BlockGraph::CODE_BLOCK)
150 E : return true;
151 :
152 E : if (!pe::CodeBlockIsBasicBlockDecomposable(block)) {
153 E : if (!ThunkNonDecomposableCodeBlock(block_graph, block))
154 i : return false;
155 E : return true;
156 : }
157 :
158 E : if (!ApplyBasicBlockSubGraphTransform(this, block_graph, block, NULL))
159 i : return false;
160 :
161 E : return true;
162 E : }
163 :
164 : bool BasicBlockEntryHookTransform::TransformBasicBlockSubGraph(
165 E : BlockGraph* block_graph , BasicBlockSubGraph* subgraph) {
166 : // TODO(rogerm): A lot of this is boilerplate that can be hoisted to an
167 : // IterativeBasicBlockSubgraphTransform (or some such). In particular,
168 : // iterating the subgraph, dispatch on code/data basic block, and the
169 : // bb_ranges_ are duplicated in the coverage transform.
170 E : DCHECK(block_graph != NULL);
171 E : DCHECK(subgraph != NULL);
172 E : DCHECK(bb_entry_hook_ref_.IsValid());
173 E : DCHECK(add_frequency_data_.frequency_data_block() != NULL);
174 :
175 : // Insert a call to the basic-block entry hook at the top of each code
176 : // basic-block.
177 : BasicBlockSubGraph::BBCollection::iterator it =
178 E : subgraph->basic_blocks().begin();
179 E : for (; it != subgraph->basic_blocks().end(); ++it) {
180 E : BasicCodeBlock* bb = BasicCodeBlock::Cast(*it);
181 E : if (bb == NULL || bb->is_padding())
182 E : continue;
183 :
184 : // Find the source range associated with this basic-block.
185 E : BlockGraph::Block::SourceRange source_range;
186 E : if (!GetBasicBlockSourceRange(*bb, &source_range)) {
187 i : LOG(ERROR) << "Unable to get source range for basic block '"
188 : << bb->name() << "'";
189 i : return false;
190 : }
191 :
192 : // We use the location/index in the bb_ranges vector of the current
193 : // basic-block range as the basic_block_id, and we pass a pointer to
194 : // the frequency data block as the module_data parameter. We then make
195 : // a memory indirect call to the bb_entry_hook.
196 E : Immediate basic_block_id(bb_ranges_.size(), core::kSize32Bit);
197 E : Immediate module_data(add_frequency_data_.frequency_data_block(), 0);
198 : Operand bb_entry_hook(Displacement(bb_entry_hook_ref_.referenced(),
199 E : bb_entry_hook_ref_.offset()));
200 :
201 : // Assemble entry hook instrumentation into the instruction stream.
202 E : BasicBlockAssembler bb_asm(bb->instructions().begin(), &bb->instructions());
203 E : bb_asm.push(basic_block_id);
204 E : bb_asm.push(module_data);
205 E : bb_asm.call(bb_entry_hook);
206 :
207 E : bb_ranges_.push_back(source_range);
208 E : }
209 :
210 E : return true;
211 E : }
212 :
213 : bool BasicBlockEntryHookTransform::PostBlockGraphIteration(
214 E : BlockGraph* block_graph, BlockGraph::Block* header_block) {
215 E : DCHECK(block_graph != NULL);
216 E : DCHECK(header_block != NULL);
217 :
218 E : size_t num_basic_blocks = bb_ranges_.size();
219 E : if (num_basic_blocks == 0) {
220 i : LOG(WARNING) << "Encountered no basic code blocks during instrumentation.";
221 i : return true;
222 : }
223 :
224 : if (!add_frequency_data_.ConfigureFrequencyDataBuffer(num_basic_blocks,
225 E : sizeof(uint32))) {
226 i : LOG(ERROR) << "Failed to configure frequency data buffer.";
227 i : return false;
228 : }
229 :
230 : // Add the module entry thunks.
231 E : EntryThunkTransform add_thunks;
232 E : add_thunks.set_only_instrument_module_entry(true);
233 E : add_thunks.set_instrument_dll_name(instrument_dll_name_);
234 E : add_thunks.set_src_ranges_for_thunks(set_src_ranges_for_thunks_);
235 :
236 E : Immediate module_data(add_frequency_data_.frequency_data_block(), 0);
237 E : if (!add_thunks.SetEntryThunkParameter(module_data)) {
238 i : LOG(ERROR) << "Failed to configure the entry thunks with the module_data "
239 : << "parameter.";
240 i : return false;
241 : }
242 :
243 E : if (!ApplyBlockGraphTransform(&add_thunks, block_graph, header_block)) {
244 i : LOG(ERROR) << "Unable to thunk module entry points.";
245 i : return false;
246 : }
247 E : thunk_section_ = add_thunks.thunk_section();
248 E : DCHECK(thunk_section_ != NULL);
249 :
250 : #ifndef NDEBUG
251 : // If we're in debug mode then sanity check the basic block ranges. When
252 : // sorted, they should not overlap.
253 E : RelativeAddressRangeVector bb_ranges_copy(bb_ranges_);
254 E : std::sort(bb_ranges_copy.begin(), bb_ranges_copy.end());
255 : DCHECK(std::adjacent_find(bb_ranges_copy.begin(),
256 : bb_ranges_copy.end(),
257 : RelativeAddressRangesOverlapFunctor()) ==
258 E : bb_ranges_copy.end());
259 : #endif
260 :
261 E : return true;
262 E : }
263 :
264 : bool BasicBlockEntryHookTransform::ThunkNonDecomposableCodeBlock(
265 E : BlockGraph* block_graph, BlockGraph::Block* code_block) {
266 E : DCHECK(block_graph != NULL);
267 E : DCHECK(code_block != NULL);
268 E : DCHECK(!pe::CodeBlockIsBasicBlockDecomposable(code_block));
269 :
270 : // Typedef for the thunk block map. The key is the offset within the callee
271 : // block and the value is the thunk block that forwards to the callee at that
272 : // offset.
273 : typedef std::map<BlockGraph::Offset, BlockGraph::Block*> ThunkBlockMap;
274 :
275 : // We keep a cache of thunks we've already created (by target offset of the
276 : // entry-point into the block) so that we only create one thunk per entry
277 : // point.
278 E : ThunkBlockMap thunk_block_map;
279 :
280 : // Iterate through all the block's referrers, creating thunks as we go.
281 : // We copy the referrer set for simplicity, as it's potentially mutated
282 : // in the loop.
283 E : BlockGraph::Block::ReferrerSet referrers = code_block->referrers();
284 E : BlockGraph::Block::ReferrerSet::const_iterator referrer_it(referrers.begin());
285 E : for (; referrer_it != referrers.end(); ++referrer_it) {
286 E : const BlockGraph::Block::Referrer& referrer = *referrer_it;
287 : if (!EnsureReferrerIsThunked(
288 E : referrer, block_graph, code_block, &thunk_block_map)) {
289 i : return false;
290 : }
291 E : }
292 :
293 E : return true;
294 E : }
295 :
296 : bool BasicBlockEntryHookTransform::EnsureReferrerIsThunked(
297 : const BlockGraph::Block::Referrer& referrer,
298 : BlockGraph* block_graph,
299 : BlockGraph::Block* code_block,
300 E : ThunkBlockMap* thunk_block_map) {
301 E : DCHECK(block_graph != NULL);
302 E : DCHECK(code_block != NULL);
303 E : DCHECK(thunk_block_map != NULL);
304 E : DCHECK(!pe::CodeBlockIsBasicBlockDecomposable(code_block));
305 :
306 : // Get the reference.
307 E : BlockGraph::Reference ref;
308 E : if (!referrer.first->GetReference(referrer.second, &ref)) {
309 i : LOG(ERROR) << "Unable to get reference from referrer.";
310 i : return false;
311 : }
312 E : DCHECK_EQ(code_block, ref.referenced());
313 :
314 : // Skip self-references, except long references to the start of the block.
315 : // Note: This may currently miss important cases. Notably if a block contains
316 : // more than one function, and the functions are mutually recursive, we'll
317 : // only record the original entry to the block, but will miss the internal
318 : // recursion. As-is, this does work for the common case where a block
319 : // contains one self-recursive function, however.
320 E : if (referrer.first == code_block) {
321 : // Skip short references.
322 E : if (ref.size() < sizeof(core::AbsoluteAddress))
323 E : return true;
324 :
325 : // Skip interior references. The block is not bb-decomposable so there is
326 : // nothing for us to do with them.
327 E : if (ref.offset() != 0)
328 E : return true;
329 : }
330 :
331 : // Get a thunk for the referenced offset from the thunk block map, creating
332 : // a new one if one does not already exist.
333 E : BlockGraph::Block* thunk_block = NULL;
334 : if (!FindOrCreateThunk(block_graph, thunk_block_map, code_block, ref.offset(),
335 E : &thunk_block)) {
336 i : LOG(ERROR) << "Unable to create thunk block.";
337 i : return false;
338 : }
339 E : DCHECK(thunk_block != NULL);
340 :
341 : // Update the referrer to point to the thunk.
342 : BlockGraph::Reference new_ref(ref.type(),
343 : ref.size(),
344 : thunk_block,
345 E : 0, 0);
346 E : referrer.first->SetReference(referrer.second, new_ref);
347 :
348 E : return true;
349 E : }
350 :
351 : bool BasicBlockEntryHookTransform::FindOrCreateThunk(
352 : BlockGraph* block_graph,
353 : ThunkBlockMap* thunk_block_map,
354 : BlockGraph::Block* code_block,
355 : BlockGraph::Offset offset,
356 E : BlockGraph::Block** thunk) {
357 E : DCHECK(block_graph != NULL);
358 E : DCHECK(thunk_block_map != NULL);
359 E : DCHECK(code_block != NULL);
360 E : DCHECK(thunk != NULL);
361 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
362 :
363 : // Do we already have a thunk defined for this offset? If so, return it.
364 E : ThunkBlockMap::const_iterator thunk_it = thunk_block_map->find(offset);
365 E : if (thunk_it != thunk_block_map->end()) {
366 E : *thunk = thunk_it->second;
367 E : return true;
368 : }
369 :
370 E : *thunk = NULL;
371 :
372 : // Determine the name for this thunk.
373 E : std::string name;
374 E : if (offset == 0) {
375 : name = base::StringPrintf("%s%s",
376 : code_block->name().c_str(),
377 E : common::kThunkSuffix);
378 E : } else {
379 : name = base::StringPrintf("%s%s+%d",
380 : code_block->name().c_str(),
381 : common::kThunkSuffix,
382 E : offset);
383 : }
384 :
385 : // Set up a basic block subgraph containing a single block description, with
386 : // that block description containing a single empty basic block, and get an
387 : // assembler writing into that basic block.
388 E : BasicBlockSubGraph subgraph;
389 E : BasicCodeBlock* bb = subgraph.AddBasicCodeBlock(name);
390 : BasicBlockSubGraph::BlockDescription* desc = subgraph.AddBlockDescription(
391 E : name, BlockGraph::CODE_BLOCK, thunk_section_->id(), 1, 0);
392 E : desc->basic_block_order.push_back(bb);
393 :
394 : // We use the location/index in the bb_ranges vector of the current
395 : // basic-block range as the basic_block_id, and we pass a pointer to
396 : // the frequency data block as the module_data parameter. We then make
397 : // a memory indirect call to the bb_entry_hook.
398 E : Immediate basic_block_id(bb_ranges_.size(), core::kSize32Bit);
399 E : Immediate module_data(add_frequency_data_.frequency_data_block(), 0);
400 E : Immediate original_function(Displacement(code_block, offset));
401 : Operand bb_entry_hook(Displacement(bb_entry_hook_ref_.referenced(),
402 E : bb_entry_hook_ref_.offset()));
403 :
404 : // Assemble entry hook instrumentation into the thunk's instruction stream.
405 : // Note that we turn this into a simulated call, so that the return from
406 : // the bb entry hook continues from the thunked function.
407 E : BasicBlockAssembler bb_asm(bb->instructions().begin(), &bb->instructions());
408 E : bb_asm.push(basic_block_id);
409 E : bb_asm.push(module_data);
410 E : bb_asm.push(original_function);
411 E : bb_asm.jmp(bb_entry_hook);
412 :
413 : // Find the source range associated with this block.
414 E : BlockGraph::Block::SourceRange source_range;
415 E : if (!code_block->source_ranges().empty())
416 E : source_range = code_block->source_ranges().range_pair(0).second;
417 :
418 : // Make sure we only push the source range if we have not already created
419 : // a source range mapping for this block (i.e., if the non-decomposable
420 : // block has multiple entry points, we want them to share an id). We can do
421 : // this because we handle one block at a time; so, all of a block's thunks
422 : // will be created as a group. This assertion is sanity checked in the
423 : // PostBlockGraphIteration function's check for overlapping source ranges.
424 E : if (bb_ranges_.empty() || source_range != bb_ranges_.back())
425 E : bb_ranges_.push_back(source_range);
426 :
427 : // Condense the whole mess into a block.
428 E : BlockBuilder block_builder(block_graph);
429 E : if (!block_builder.Merge(&subgraph)) {
430 i : LOG(ERROR) << "Failed to build thunk block.";
431 i : return false;
432 : }
433 :
434 : // Exactly one new block should have been created.
435 E : DCHECK_EQ(1u, block_builder.new_blocks().size());
436 E : *thunk = block_builder.new_blocks().front();
437 E : (*thunk_block_map)[offset] = *thunk;
438 :
439 E : return true;
440 E : }
441 :
442 : } // namespace transforms
443 : } // namespace instrument
|