1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implements the BasicBlockEntryHookTransform class.
16 :
17 : #include "syzygy/instrument/transforms/basic_block_entry_hook_transform.h"
18 :
19 : #include "base/logging.h"
20 : #include "base/strings/string_util.h"
21 : #include "base/strings/stringprintf.h"
22 : #include "syzygy/block_graph/block_builder.h"
23 : #include "syzygy/block_graph/block_util.h"
24 : #include "syzygy/common/defs.h"
25 : #include "syzygy/common/indexed_frequency_data.h"
26 : #include "syzygy/instrument/transforms/entry_thunk_transform.h"
27 : #include "syzygy/pe/pe_utils.h"
28 : #include "syzygy/pe/transforms/pe_add_imports_transform.h"
29 :
30 : namespace instrument {
31 : namespace transforms {
32 :
33 : namespace {
34 :
35 : using block_graph::BasicBlock;
36 : using block_graph::BasicBlockAssembler;
37 : using block_graph::BasicBlockReference;
38 : using block_graph::BasicCodeBlock;
39 : using block_graph::BlockBuilder;
40 : using block_graph::BlockGraph;
41 : using block_graph::Displacement;
42 : using block_graph::Immediate;
43 : using block_graph::Operand;
44 : using block_graph::Successor;
45 : using block_graph::TransformPolicyInterface;
46 : using common::kBasicBlockEntryAgentId;
47 : using common::ThreadLocalIndexedFrequencyData;
48 : using pe::transforms::PEAddImportsTransform;
49 :
50 : typedef BasicBlockEntryHookTransform::RelativeAddressRange RelativeAddressRange;
51 : typedef pe::transforms::ImportedModule ImportedModule;
52 :
53 : const char kDefaultModuleName[] = "basic_block_entry_client.dll";
54 : const char kBasicBlockEnter[] = "_increment_indexed_freq_data";
55 :
56 : // Compares two relative address ranges to see if they overlap. Assumes they
57 : // are already sorted. This is used to validate basic-block ranges.
58 : struct RelativeAddressRangesOverlapFunctor {
59 : bool operator()(const RelativeAddressRange& r1,
60 E : const RelativeAddressRange& r2) const {
61 E : DCHECK_LT(r1.start(), r2.start());
62 :
63 E : if (r1.end() > r2.start())
64 i : return true;
65 :
66 E : return false;
67 E : }
68 : };
69 :
70 : // Sets up the basic-block entry and the frequency data hooks import.
71 : bool SetupEntryHooks(const TransformPolicyInterface* policy,
72 : BlockGraph* block_graph,
73 : BlockGraph::Block* header_block,
74 : const std::string& module_name,
75 E : BlockGraph::Reference* basic_block_enter) {
76 E : DCHECK(block_graph != NULL);
77 E : DCHECK(header_block != NULL);
78 E : DCHECK(basic_block_enter != NULL);
79 :
80 : // Setup the import module.
81 E : ImportedModule module(module_name);
82 : size_t bb_index = module.AddSymbol(kBasicBlockEnter,
83 E : ImportedModule::kAlwaysImport);
84 :
85 : // Setup the add-imports transform.
86 E : PEAddImportsTransform add_imports;
87 E : add_imports.AddModule(&module);
88 :
89 : // Add the imports to the block-graph.
90 : if (!ApplyBlockGraphTransform(
91 E : &add_imports, policy, block_graph, header_block)) {
92 i : LOG(ERROR) << "Unable to add import entry hook functions.";
93 i : return false;
94 : }
95 :
96 : // Get a reference to the entry-hook function.
97 E : if (!module.GetSymbolReference(bb_index, basic_block_enter)) {
98 i : LOG(ERROR) << "Unable to get " << kBasicBlockEnter << ".";
99 i : return false;
100 : }
101 E : DCHECK(basic_block_enter->IsValid());
102 :
103 E : return true;
104 E : }
105 :
106 : void AddSuccessorBetween(Successor::Condition condition,
107 : BasicCodeBlock* from,
108 : BasicCodeBlock* to) {
109 : from->successors().push_back(
110 : Successor(condition,
111 : BasicBlockReference(BlockGraph::RELATIVE_REF,
112 : BlockGraph::Reference::kMaximumSize,
113 : to),
114 : 0));
115 : }
116 :
117 : } // namespace
118 :
119 : const char BasicBlockEntryHookTransform::kTransformName[] =
120 : "BasicBlockEntryHookTransform";
121 :
122 : BasicBlockEntryHookTransform::BasicBlockEntryHookTransform()
123 : : add_frequency_data_(kBasicBlockEntryAgentId,
124 : "Basic-Block Frequency Data",
125 : common::kBasicBlockFrequencyDataVersion,
126 : common::IndexedFrequencyData::BASIC_BLOCK_ENTRY,
127 : sizeof(ThreadLocalIndexedFrequencyData)),
128 : thunk_section_(NULL),
129 : instrument_dll_name_(kDefaultModuleName),
130 : set_src_ranges_for_thunks_(false),
131 E : set_inline_fast_path_(false) {
132 E : }
133 :
134 : bool BasicBlockEntryHookTransform::PreBlockGraphIteration(
135 : const TransformPolicyInterface* policy,
136 : BlockGraph* block_graph,
137 E : BlockGraph::Block* header_block) {
138 E : DCHECK_NE(reinterpret_cast<TransformPolicyInterface*>(NULL), policy);
139 E : DCHECK_NE(reinterpret_cast<BlockGraph*>(NULL), block_graph);
140 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), header_block);
141 E : DCHECK_EQ(BlockGraph::PE_IMAGE, block_graph->image_format());
142 :
143 : // Setup basic block entry and the frequency data hooks.
144 : if (!SetupEntryHooks(policy,
145 : block_graph,
146 : header_block,
147 : instrument_dll_name_,
148 E : &bb_entry_hook_ref_)) {
149 i : return false;
150 : }
151 :
152 : // Add the static basic-block frequency data.
153 : if (!ApplyBlockGraphTransform(
154 E : &add_frequency_data_, policy, block_graph, header_block)) {
155 i : LOG(ERROR) << "Failed to insert basic-block frequency data.";
156 i : return false;
157 : }
158 :
159 : // Find or create the section we put our thunks in.
160 : thunk_section_ = block_graph->FindOrAddSection(common::kThunkSectionName,
161 E : pe::kCodeCharacteristics);
162 E : DCHECK(thunk_section_ != NULL);
163 :
164 E : return true;
165 E : }
166 :
167 : bool BasicBlockEntryHookTransform::OnBlock(
168 : const TransformPolicyInterface* policy,
169 : BlockGraph* block_graph,
170 E : BlockGraph::Block* block) {
171 E : DCHECK(policy != NULL);
172 E : DCHECK(block_graph != NULL);
173 E : DCHECK(block != NULL);
174 E : DCHECK(thunk_section_ != NULL);
175 :
176 : // Skip blocks created by this transform.
177 E : if (block->section() == thunk_section_->id())
178 i : return true;
179 :
180 E : if (block->type() != BlockGraph::CODE_BLOCK)
181 E : return true;
182 :
183 E : if (!policy->BlockIsSafeToBasicBlockDecompose(block)) {
184 E : if (!ThunkNonDecomposableCodeBlock(block_graph, block))
185 i : return false;
186 E : return true;
187 : }
188 :
189 E : if (!ApplyBasicBlockSubGraphTransform(this, policy, block_graph, block, NULL))
190 i : return false;
191 :
192 E : return true;
193 E : }
194 :
195 : bool BasicBlockEntryHookTransform::TransformBasicBlockSubGraph(
196 : const TransformPolicyInterface* policy,
197 : BlockGraph* block_graph ,
198 E : BasicBlockSubGraph* subgraph) {
199 : // TODO(rogerm): A lot of this is boilerplate that can be hoisted to an
200 : // IterativeBasicBlockSubgraphTransform (or some such). In particular,
201 : // iterating the subgraph, dispatch on code/data basic block, and the
202 : // bb_ranges_ are duplicated in the coverage transform.
203 E : DCHECK(policy != NULL);
204 E : DCHECK(block_graph != NULL);
205 E : DCHECK(subgraph != NULL);
206 E : DCHECK(bb_entry_hook_ref_.IsValid());
207 E : DCHECK(add_frequency_data_.frequency_data_block() != NULL);
208 :
209 : // Insert a call to the basic-block entry hook at the top of each code
210 : // basic-block.
211 : BasicBlockSubGraph::BBCollection::iterator it =
212 E : subgraph->basic_blocks().begin();
213 E : for (; it != subgraph->basic_blocks().end(); ++it) {
214 E : BasicCodeBlock* bb = BasicCodeBlock::Cast(*it);
215 E : if (bb == NULL || bb->is_padding())
216 E : continue;
217 :
218 : // Find the source range associated with this basic-block.
219 E : BlockGraph::Block::SourceRange source_range;
220 E : if (!GetBasicBlockSourceRange(*bb, &source_range)) {
221 i : LOG(ERROR) << "Unable to get source range for basic block '"
222 : << bb->name() << "'";
223 i : return false;
224 : }
225 :
226 : // We use the location/index in the bb_ranges vector of the current
227 : // basic-block range as the basic_block_id, and we pass a pointer to
228 : // the frequency data block as the module_data parameter. We then make
229 : // a memory indirect call to the bb_entry_hook.
230 E : auto basic_block_id(Immediate(bb_ranges_.size(), assm::kSize32Bit));
231 E : auto module_data(Immediate(add_frequency_data_.frequency_data_block(), 0));
232 : auto bb_entry_hook(Operand(Displacement(bb_entry_hook_ref_.referenced(),
233 E : bb_entry_hook_ref_.offset())));
234 :
235 : // Assemble entry hook instrumentation into the instruction stream.
236 E : BasicBlockAssembler bb_asm(bb->instructions().begin(), &bb->instructions());
237 :
238 E : bb_asm.push(basic_block_id);
239 E : bb_asm.push(module_data);
240 E : bb_asm.call(bb_entry_hook);
241 :
242 E : bb_ranges_.push_back(source_range);
243 E : }
244 :
245 E : return true;
246 E : }
247 :
248 : bool BasicBlockEntryHookTransform::PostBlockGraphIteration(
249 : const TransformPolicyInterface* policy,
250 : BlockGraph* block_graph,
251 E : BlockGraph::Block* header_block) {
252 E : DCHECK(policy != NULL);
253 E : DCHECK(block_graph != NULL);
254 E : DCHECK(header_block != NULL);
255 :
256 E : size_t num_basic_blocks = bb_ranges_.size();
257 E : if (num_basic_blocks == 0) {
258 i : LOG(WARNING) << "Encountered no basic code blocks during instrumentation.";
259 i : return true;
260 : }
261 :
262 : if (!add_frequency_data_.ConfigureFrequencyDataBuffer(num_basic_blocks,
263 : 1,
264 E : sizeof(uint32))) {
265 i : LOG(ERROR) << "Failed to configure frequency data buffer.";
266 i : return false;
267 : }
268 :
269 : // Initialized BasicBlock agent specific fields
270 E : block_graph::TypedBlock<ThreadLocalIndexedFrequencyData> frequency_data;
271 E : CHECK(frequency_data.Init(0, add_frequency_data_.frequency_data_block()));
272 E : frequency_data->fs_slot = 0;
273 E : frequency_data->tls_index = TLS_OUT_OF_INDEXES;
274 :
275 : // Add the module entry thunks.
276 E : EntryThunkTransform add_thunks;
277 E : add_thunks.set_only_instrument_module_entry(true);
278 E : add_thunks.set_instrument_dll_name(instrument_dll_name_);
279 E : add_thunks.set_src_ranges_for_thunks(set_src_ranges_for_thunks_);
280 :
281 E : auto module_data(Immediate(add_frequency_data_.frequency_data_block(), 0));
282 E : if (!add_thunks.SetEntryThunkParameter(module_data)) {
283 i : LOG(ERROR) << "Failed to configure the entry thunks with the module_data "
284 : << "parameter.";
285 i : return false;
286 : }
287 :
288 : if (!ApplyBlockGraphTransform(
289 E : &add_thunks, policy, block_graph, header_block)) {
290 i : LOG(ERROR) << "Unable to thunk module entry points.";
291 i : return false;
292 : }
293 E : thunk_section_ = add_thunks.thunk_section();
294 E : DCHECK(thunk_section_ != NULL);
295 :
296 : #ifndef NDEBUG
297 : // If we're in debug mode then sanity check the basic block ranges. When
298 : // sorted, they should not overlap.
299 E : RelativeAddressRangeVector bb_ranges_copy(bb_ranges_);
300 E : std::sort(bb_ranges_copy.begin(), bb_ranges_copy.end());
301 : DCHECK(std::adjacent_find(bb_ranges_copy.begin(),
302 : bb_ranges_copy.end(),
303 : RelativeAddressRangesOverlapFunctor()) ==
304 E : bb_ranges_copy.end());
305 : #endif
306 :
307 E : return true;
308 E : }
309 :
310 : bool BasicBlockEntryHookTransform::ThunkNonDecomposableCodeBlock(
311 E : BlockGraph* block_graph, BlockGraph::Block* code_block) {
312 E : DCHECK(block_graph != NULL);
313 E : DCHECK(code_block != NULL);
314 :
315 : // Typedef for the thunk block map. The key is the offset within the callee
316 : // block and the value is the thunk block that forwards to the callee at that
317 : // offset.
318 : typedef std::map<BlockGraph::Offset, BlockGraph::Block*> ThunkBlockMap;
319 :
320 : // We keep a cache of thunks we've already created (by target offset of the
321 : // entry-point into the block) so that we only create one thunk per entry
322 : // point.
323 E : ThunkBlockMap thunk_block_map;
324 :
325 : // Iterate through all the block's referrers, creating thunks as we go.
326 : // We copy the referrer set for simplicity, as it's potentially mutated
327 : // in the loop.
328 E : BlockGraph::Block::ReferrerSet referrers = code_block->referrers();
329 E : BlockGraph::Block::ReferrerSet::const_iterator referrer_it(referrers.begin());
330 E : for (; referrer_it != referrers.end(); ++referrer_it) {
331 E : const BlockGraph::Block::Referrer& referrer = *referrer_it;
332 : if (!EnsureReferrerIsThunked(
333 E : referrer, block_graph, code_block, &thunk_block_map)) {
334 i : return false;
335 : }
336 E : }
337 :
338 E : return true;
339 E : }
340 :
341 : bool BasicBlockEntryHookTransform::EnsureReferrerIsThunked(
342 : const BlockGraph::Block::Referrer& referrer,
343 : BlockGraph* block_graph,
344 : BlockGraph::Block* code_block,
345 E : ThunkBlockMap* thunk_block_map) {
346 E : DCHECK(block_graph != NULL);
347 E : DCHECK(code_block != NULL);
348 E : DCHECK(thunk_block_map != NULL);
349 :
350 : // Get the reference.
351 E : BlockGraph::Reference ref;
352 E : if (!referrer.first->GetReference(referrer.second, &ref)) {
353 i : LOG(ERROR) << "Unable to get reference from referrer.";
354 i : return false;
355 : }
356 E : DCHECK_EQ(code_block, ref.referenced());
357 :
358 : // Skip self-references, except long references to the start of the block.
359 : // Note: This may currently miss important cases. Notably if a block contains
360 : // more than one function, and the functions are mutually recursive, we'll
361 : // only record the original entry to the block, but will miss the internal
362 : // recursion. As-is, this does work for the common case where a block
363 : // contains one self-recursive function, however.
364 E : if (referrer.first == code_block) {
365 : // Skip short references.
366 E : if (ref.size() < sizeof(core::AbsoluteAddress))
367 i : return true;
368 :
369 : // Skip interior references. The block is not bb-decomposable so there is
370 : // nothing for us to do with them.
371 E : if (ref.offset() != 0)
372 E : return true;
373 : }
374 :
375 : // Get a thunk for the referenced offset from the thunk block map, creating
376 : // a new one if one does not already exist.
377 E : BlockGraph::Block* thunk_block = NULL;
378 : if (!FindOrCreateThunk(block_graph, thunk_block_map, code_block, ref.offset(),
379 E : &thunk_block)) {
380 i : LOG(ERROR) << "Unable to create thunk block.";
381 i : return false;
382 : }
383 E : DCHECK(thunk_block != NULL);
384 :
385 : // Update the referrer to point to the thunk.
386 : BlockGraph::Reference new_ref(ref.type(),
387 : ref.size(),
388 : thunk_block,
389 E : 0, 0);
390 E : referrer.first->SetReference(referrer.second, new_ref);
391 :
392 E : return true;
393 E : }
394 :
395 : bool BasicBlockEntryHookTransform::FindOrCreateThunk(
396 : BlockGraph* block_graph,
397 : ThunkBlockMap* thunk_block_map,
398 : BlockGraph::Block* code_block,
399 : BlockGraph::Offset offset,
400 E : BlockGraph::Block** thunk) {
401 E : DCHECK(block_graph != NULL);
402 E : DCHECK(thunk_block_map != NULL);
403 E : DCHECK(code_block != NULL);
404 E : DCHECK(thunk != NULL);
405 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
406 :
407 : // Do we already have a thunk defined for this offset? If so, return it.
408 E : ThunkBlockMap::const_iterator thunk_it = thunk_block_map->find(offset);
409 E : if (thunk_it != thunk_block_map->end()) {
410 E : *thunk = thunk_it->second;
411 E : return true;
412 : }
413 :
414 E : *thunk = NULL;
415 :
416 : // Determine the name for this thunk.
417 E : std::string name;
418 E : if (offset == 0) {
419 : name = base::StringPrintf("%s%s",
420 : code_block->name().c_str(),
421 E : common::kThunkSuffix);
422 E : } else {
423 : name = base::StringPrintf("%s%s+%d",
424 : code_block->name().c_str(),
425 : common::kThunkSuffix,
426 E : offset);
427 : }
428 :
429 : // Set up a basic block subgraph containing a single block description, with
430 : // that block description containing a single empty basic block, and get an
431 : // assembler writing into that basic block.
432 E : BasicBlockSubGraph subgraph;
433 E : BasicCodeBlock* bb = subgraph.AddBasicCodeBlock(name);
434 : BasicBlockSubGraph::BlockDescription* desc = subgraph.AddBlockDescription(
435 : name,
436 : NULL,
437 : BlockGraph::CODE_BLOCK,
438 : thunk_section_->id(),
439 : 1,
440 E : 0);
441 E : desc->basic_block_order.push_back(bb);
442 :
443 : // Find the source range associated with this block.
444 E : BlockGraph::Block::SourceRange source_range;
445 E : if (!code_block->source_ranges().empty())
446 E : source_range = code_block->source_ranges().range_pair(0).second;
447 :
448 : // Make sure we only push the source range if we have not already created
449 : // a source range mapping for this block (i.e., if the non-decomposable
450 : // block has multiple entry points, we want them to share an id). We can do
451 : // this because we handle one block at a time; so, all of a block's thunks
452 : // will be created as a group. This assertion is sanity checked in the
453 : // PostBlockGraphIteration function's check for overlapping source ranges.
454 E : if (bb_ranges_.empty() || source_range != bb_ranges_.back())
455 E : bb_ranges_.push_back(source_range);
456 :
457 : // We use the location/index in the bb_ranges vector of the current
458 : // basic-block range as the basic_block_id, and we pass a pointer to
459 : // the frequency data block as the module_data parameter. We then make
460 : // a memory indirect call to the bb_entry_hook.
461 E : auto basic_block_id(Immediate(bb_ranges_.size()-1, assm::kSize32Bit));
462 E : auto module_data(Immediate(add_frequency_data_.frequency_data_block(), 0));
463 E : auto original_function(Immediate(code_block, offset));
464 : auto bb_entry_hook(Operand(Displacement(bb_entry_hook_ref_.referenced(),
465 E : bb_entry_hook_ref_.offset())));
466 :
467 : // Assemble entry hook instrumentation into the thunk's instruction stream.
468 : // Note that we turn this into a simulated call, so that the return from
469 : // the bb entry hook continues from the thunked function.
470 E : BasicBlockAssembler bb_asm(bb->instructions().begin(), &bb->instructions());
471 E : bb_asm.push(basic_block_id);
472 E : bb_asm.push(module_data);
473 E : bb_asm.push(original_function);
474 E : bb_asm.jmp(bb_entry_hook);
475 :
476 : // Condense the whole mess into a block.
477 E : BlockBuilder block_builder(block_graph);
478 E : if (!block_builder.Merge(&subgraph)) {
479 i : LOG(ERROR) << "Failed to build thunk block.";
480 i : return false;
481 : }
482 :
483 : // Exactly one new block should have been created.
484 E : DCHECK_EQ(1u, block_builder.new_blocks().size());
485 E : *thunk = block_builder.new_blocks().front();
486 E : (*thunk_block_map)[offset] = *thunk;
487 :
488 E : return true;
489 E : }
490 :
491 : } // namespace transforms
492 : } // namespace instrument
|