1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implements the BasicBlockEntryHookTransform class.
16 :
17 : #include "syzygy/instrument/transforms/basic_block_entry_hook_transform.h"
18 :
19 : #include "base/logging.h"
20 : #include "base/strings/string_util.h"
21 : #include "base/strings/stringprintf.h"
22 : #include "syzygy/block_graph/block_builder.h"
23 : #include "syzygy/block_graph/block_util.h"
24 : #include "syzygy/common/defs.h"
25 : #include "syzygy/common/indexed_frequency_data.h"
26 : #include "syzygy/instrument/transforms/entry_thunk_transform.h"
27 : #include "syzygy/pe/pe_utils.h"
28 : #include "syzygy/pe/transforms/pe_add_imports_transform.h"
29 :
30 : namespace instrument {
31 : namespace transforms {
32 :
33 : namespace {
34 :
35 : using block_graph::BasicBlock;
36 : using block_graph::BasicBlockAssembler;
37 : using block_graph::BasicBlockReference;
38 : using block_graph::BasicCodeBlock;
39 : using block_graph::BlockBuilder;
40 : using block_graph::BlockGraph;
41 : using block_graph::Displacement;
42 : using block_graph::Immediate;
43 : using block_graph::Operand;
44 : using block_graph::Successor;
45 : using block_graph::TransformPolicyInterface;
46 : using common::kBasicBlockEntryAgentId;
47 : using common::ThreadLocalIndexedFrequencyData;
48 : using pe::transforms::PEAddImportsTransform;
49 :
50 : typedef BasicBlockEntryHookTransform::RelativeAddressRange RelativeAddressRange;
51 : typedef pe::transforms::ImportedModule ImportedModule;
52 :
53 : const char kDefaultModuleName[] = "basic_block_entry_client.dll";
54 : const char kBasicBlockEnter[] = "_increment_indexed_freq_data";
55 :
56 : // Compares two relative address ranges to see if they overlap. Assumes they
57 : // are already sorted. This is used to validate basic-block ranges.
58 : struct RelativeAddressRangesOverlapFunctor {
59 : bool operator()(const RelativeAddressRange& r1,
60 E : const RelativeAddressRange& r2) const {
61 E : DCHECK_LT(r1.start(), r2.start());
62 :
63 E : if (r1.end() > r2.start())
64 i : return true;
65 :
66 E : return false;
67 E : }
68 : };
69 :
70 : // Sets up the basic-block entry and the frequency data hooks import.
71 : bool SetupEntryHooks(const TransformPolicyInterface* policy,
72 : BlockGraph* block_graph,
73 : BlockGraph::Block* header_block,
74 : const std::string& module_name,
75 E : BlockGraph::Reference* basic_block_enter) {
76 E : DCHECK(block_graph != NULL);
77 E : DCHECK(header_block != NULL);
78 E : DCHECK(basic_block_enter != NULL);
79 :
80 : // Setup the import module.
81 E : ImportedModule module(module_name);
82 E : size_t bb_index = module.AddSymbol(kBasicBlockEnter,
83 : ImportedModule::kAlwaysImport);
84 :
85 : // Setup the add-imports transform.
86 E : PEAddImportsTransform add_imports;
87 E : add_imports.AddModule(&module);
88 :
89 : // Add the imports to the block-graph.
90 E : if (!ApplyBlockGraphTransform(
91 : &add_imports, policy, block_graph, header_block)) {
92 i : LOG(ERROR) << "Unable to add import entry hook functions.";
93 i : return false;
94 : }
95 :
96 : // Get a reference to the entry-hook function.
97 E : if (!module.GetSymbolReference(bb_index, basic_block_enter)) {
98 i : LOG(ERROR) << "Unable to get " << kBasicBlockEnter << ".";
99 i : return false;
100 : }
101 E : DCHECK(basic_block_enter->IsValid());
102 :
103 E : return true;
104 E : }
105 :
106 : void AddSuccessorBetween(Successor::Condition condition,
107 : BasicCodeBlock* from,
108 : BasicCodeBlock* to) {
109 : from->successors().push_back(
110 : Successor(condition,
111 : BasicBlockReference(BlockGraph::RELATIVE_REF,
112 : BlockGraph::Reference::kMaximumSize,
113 : to),
114 : 0));
115 : }
116 :
117 : } // namespace
118 :
119 : const char BasicBlockEntryHookTransform::kTransformName[] =
120 : "BasicBlockEntryHookTransform";
121 :
122 : BasicBlockEntryHookTransform::BasicBlockEntryHookTransform()
123 E : : add_frequency_data_(kBasicBlockEntryAgentId,
124 : "Basic-Block Frequency Data",
125 : common::kBasicBlockFrequencyDataVersion,
126 : common::IndexedFrequencyData::BASIC_BLOCK_ENTRY,
127 : sizeof(ThreadLocalIndexedFrequencyData)),
128 E : thunk_section_(NULL),
129 E : instrument_dll_name_(kDefaultModuleName),
130 E : set_src_ranges_for_thunks_(false),
131 E : set_inline_fast_path_(false) {
132 E : }
133 :
134 : bool BasicBlockEntryHookTransform::PreBlockGraphIteration(
135 : const TransformPolicyInterface* policy,
136 : BlockGraph* block_graph,
137 E : BlockGraph::Block* header_block) {
138 E : DCHECK_NE(reinterpret_cast<TransformPolicyInterface*>(NULL), policy);
139 E : DCHECK_NE(reinterpret_cast<BlockGraph*>(NULL), block_graph);
140 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), header_block);
141 E : DCHECK_EQ(BlockGraph::PE_IMAGE, block_graph->image_format());
142 :
143 : // Setup basic block entry and the frequency data hooks.
144 E : if (!SetupEntryHooks(policy,
145 : block_graph,
146 : header_block,
147 : instrument_dll_name_,
148 : &bb_entry_hook_ref_)) {
149 i : return false;
150 : }
151 :
152 : // Add the static basic-block frequency data.
153 E : if (!ApplyBlockGraphTransform(
154 : &add_frequency_data_, policy, block_graph, header_block)) {
155 i : LOG(ERROR) << "Failed to insert basic-block frequency data.";
156 i : return false;
157 : }
158 :
159 : // Find or create the section we put our thunks in.
160 E : thunk_section_ = block_graph->FindOrAddSection(common::kThunkSectionName,
161 : pe::kCodeCharacteristics);
162 E : DCHECK(thunk_section_ != NULL);
163 :
164 E : return true;
165 E : }
166 :
167 : bool BasicBlockEntryHookTransform::OnBlock(
168 : const TransformPolicyInterface* policy,
169 : BlockGraph* block_graph,
170 E : BlockGraph::Block* block) {
171 E : DCHECK(policy != NULL);
172 E : DCHECK(block_graph != NULL);
173 E : DCHECK(block != NULL);
174 E : DCHECK(thunk_section_ != NULL);
175 :
176 : // Skip blocks created by this transform.
177 E : if (block->section() == thunk_section_->id())
178 i : return true;
179 :
180 E : if (block->type() != BlockGraph::CODE_BLOCK)
181 E : return true;
182 :
183 E : if (!policy->BlockIsSafeToBasicBlockDecompose(block)) {
184 E : if (!ThunkNonDecomposableCodeBlock(block_graph, block))
185 i : return false;
186 E : return true;
187 : }
188 :
189 E : if (!ApplyBasicBlockSubGraphTransform(this, policy, block_graph, block, NULL))
190 i : return false;
191 :
192 E : return true;
193 E : }
194 :
195 : bool BasicBlockEntryHookTransform::TransformBasicBlockSubGraph(
196 : const TransformPolicyInterface* policy,
197 : BlockGraph* block_graph ,
198 E : BasicBlockSubGraph* subgraph) {
199 : // TODO(rogerm): A lot of this is boilerplate that can be hoisted to an
200 : // IterativeBasicBlockSubgraphTransform (or some such). In particular,
201 : // iterating the subgraph, dispatch on code/data basic block, and the
202 : // bb_ranges_ are duplicated in the coverage transform.
203 E : DCHECK(policy != NULL);
204 E : DCHECK(block_graph != NULL);
205 E : DCHECK(subgraph != NULL);
206 E : DCHECK(bb_entry_hook_ref_.IsValid());
207 E : DCHECK(add_frequency_data_.frequency_data_block() != NULL);
208 :
209 : // Insert a call to the basic-block entry hook at the top of each code
210 : // basic-block.
211 : BasicBlockSubGraph::BBCollection::iterator it =
212 E : subgraph->basic_blocks().begin();
213 E : for (; it != subgraph->basic_blocks().end(); ++it) {
214 E : BasicCodeBlock* bb = BasicCodeBlock::Cast(*it);
215 E : if (bb == NULL || bb->is_padding())
216 E : continue;
217 :
218 : // Find the source range associated with this basic-block.
219 E : BlockGraph::Block::SourceRange source_range;
220 E : if (!GetBasicBlockSourceRange(*bb, &source_range)) {
221 i : LOG(ERROR) << "Unable to get source range for basic block '"
222 : << bb->name() << "'";
223 i : return false;
224 : }
225 :
226 : // We use the location/index in the bb_ranges vector of the current
227 : // basic-block range as the basic_block_id, and we pass a pointer to
228 : // the frequency data block as the module_data parameter. We then make
229 : // a memory indirect call to the bb_entry_hook.
230 E : auto basic_block_id(Immediate(bb_ranges_.size(), assm::kSize32Bit));
231 E : auto module_data(Immediate(add_frequency_data_.frequency_data_block(), 0));
232 E : auto bb_entry_hook(Operand(Displacement(bb_entry_hook_ref_.referenced(),
233 : bb_entry_hook_ref_.offset())));
234 :
235 : // Assemble entry hook instrumentation into the instruction stream.
236 E : BasicBlockAssembler bb_asm(bb->instructions().begin(), &bb->instructions());
237 :
238 E : bb_asm.push(basic_block_id);
239 E : bb_asm.push(module_data);
240 E : bb_asm.call(bb_entry_hook);
241 :
242 E : bb_ranges_.push_back(source_range);
243 E : }
244 :
245 E : return true;
246 E : }
247 :
248 : bool BasicBlockEntryHookTransform::PostBlockGraphIteration(
249 : const TransformPolicyInterface* policy,
250 : BlockGraph* block_graph,
251 E : BlockGraph::Block* header_block) {
252 E : DCHECK(policy != NULL);
253 E : DCHECK(block_graph != NULL);
254 E : DCHECK(header_block != NULL);
255 :
256 E : size_t num_basic_blocks = bb_ranges_.size();
257 E : if (num_basic_blocks == 0) {
258 i : LOG(WARNING) << "Encountered no basic code blocks during instrumentation.";
259 i : return true;
260 : }
261 :
262 E : if (!add_frequency_data_.ConfigureFrequencyDataBuffer(num_basic_blocks, 1,
263 : sizeof(uint32_t))) {
264 i : LOG(ERROR) << "Failed to configure frequency data buffer.";
265 i : return false;
266 : }
267 :
268 : // Initialized BasicBlock agent specific fields
269 E : block_graph::TypedBlock<ThreadLocalIndexedFrequencyData> frequency_data;
270 E : CHECK(frequency_data.Init(0, add_frequency_data_.frequency_data_block()));
271 E : frequency_data->fs_slot = 0;
272 E : frequency_data->tls_index = TLS_OUT_OF_INDEXES;
273 :
274 : // Add the module entry thunks.
275 E : EntryThunkTransform add_thunks;
276 E : add_thunks.set_only_instrument_module_entry(true);
277 E : add_thunks.set_instrument_dll_name(instrument_dll_name_);
278 E : add_thunks.set_src_ranges_for_thunks(set_src_ranges_for_thunks_);
279 :
280 E : auto module_data(Immediate(add_frequency_data_.frequency_data_block(), 0));
281 E : if (!add_thunks.SetEntryThunkParameter(module_data)) {
282 i : LOG(ERROR) << "Failed to configure the entry thunks with the module_data "
283 : << "parameter.";
284 i : return false;
285 : }
286 :
287 E : if (!ApplyBlockGraphTransform(
288 : &add_thunks, policy, block_graph, header_block)) {
289 i : LOG(ERROR) << "Unable to thunk module entry points.";
290 i : return false;
291 : }
292 E : thunk_section_ = add_thunks.thunk_section();
293 E : DCHECK(thunk_section_ != NULL);
294 :
295 : #ifndef NDEBUG
296 : // If we're in debug mode then sanity check the basic block ranges. When
297 : // sorted, they should not overlap.
298 E : RelativeAddressRangeVector bb_ranges_copy(bb_ranges_);
299 E : std::sort(bb_ranges_copy.begin(), bb_ranges_copy.end());
300 E : DCHECK(std::adjacent_find(bb_ranges_copy.begin(),
301 : bb_ranges_copy.end(),
302 : RelativeAddressRangesOverlapFunctor()) ==
303 : bb_ranges_copy.end());
304 : #endif
305 :
306 E : return true;
307 E : }
308 :
309 : bool BasicBlockEntryHookTransform::ThunkNonDecomposableCodeBlock(
310 E : BlockGraph* block_graph, BlockGraph::Block* code_block) {
311 E : DCHECK(block_graph != NULL);
312 E : DCHECK(code_block != NULL);
313 :
314 : // Typedef for the thunk block map. The key is the offset within the callee
315 : // block and the value is the thunk block that forwards to the callee at that
316 : // offset.
317 : typedef std::map<BlockGraph::Offset, BlockGraph::Block*> ThunkBlockMap;
318 :
319 : // We keep a cache of thunks we've already created (by target offset of the
320 : // entry-point into the block) so that we only create one thunk per entry
321 : // point.
322 E : ThunkBlockMap thunk_block_map;
323 :
324 : // Iterate through all the block's referrers, creating thunks as we go.
325 : // We copy the referrer set for simplicity, as it's potentially mutated
326 : // in the loop.
327 E : BlockGraph::Block::ReferrerSet referrers = code_block->referrers();
328 E : BlockGraph::Block::ReferrerSet::const_iterator referrer_it(referrers.begin());
329 E : for (; referrer_it != referrers.end(); ++referrer_it) {
330 E : const BlockGraph::Block::Referrer& referrer = *referrer_it;
331 E : if (!EnsureReferrerIsThunked(
332 : referrer, block_graph, code_block, &thunk_block_map)) {
333 i : return false;
334 : }
335 E : }
336 :
337 E : return true;
338 E : }
339 :
340 : bool BasicBlockEntryHookTransform::EnsureReferrerIsThunked(
341 : const BlockGraph::Block::Referrer& referrer,
342 : BlockGraph* block_graph,
343 : BlockGraph::Block* code_block,
344 E : ThunkBlockMap* thunk_block_map) {
345 E : DCHECK(block_graph != NULL);
346 E : DCHECK(code_block != NULL);
347 E : DCHECK(thunk_block_map != NULL);
348 :
349 : // Get the reference.
350 E : BlockGraph::Reference ref;
351 E : if (!referrer.first->GetReference(referrer.second, &ref)) {
352 i : LOG(ERROR) << "Unable to get reference from referrer.";
353 i : return false;
354 : }
355 E : DCHECK_EQ(code_block, ref.referenced());
356 :
357 : // Skip self-references, except long references to the start of the block.
358 : // Note: This may currently miss important cases. Notably if a block contains
359 : // more than one function, and the functions are mutually recursive, we'll
360 : // only record the original entry to the block, but will miss the internal
361 : // recursion. As-is, this does work for the common case where a block
362 : // contains one self-recursive function, however.
363 E : if (referrer.first == code_block) {
364 : // Skip short references.
365 E : if (ref.size() < sizeof(core::AbsoluteAddress))
366 i : return true;
367 :
368 : // Skip interior references. The block is not bb-decomposable so there is
369 : // nothing for us to do with them.
370 E : if (ref.offset() != 0)
371 E : return true;
372 : }
373 :
374 : // Get a thunk for the referenced offset from the thunk block map, creating
375 : // a new one if one does not already exist.
376 E : BlockGraph::Block* thunk_block = NULL;
377 E : if (!FindOrCreateThunk(block_graph, thunk_block_map, code_block, ref.offset(),
378 : &thunk_block)) {
379 i : LOG(ERROR) << "Unable to create thunk block.";
380 i : return false;
381 : }
382 E : DCHECK(thunk_block != NULL);
383 :
384 : // Update the referrer to point to the thunk.
385 E : BlockGraph::Reference new_ref(ref.type(),
386 : ref.size(),
387 : thunk_block,
388 : 0, 0);
389 E : referrer.first->SetReference(referrer.second, new_ref);
390 :
391 E : return true;
392 E : }
393 :
394 : bool BasicBlockEntryHookTransform::FindOrCreateThunk(
395 : BlockGraph* block_graph,
396 : ThunkBlockMap* thunk_block_map,
397 : BlockGraph::Block* code_block,
398 : BlockGraph::Offset offset,
399 E : BlockGraph::Block** thunk) {
400 E : DCHECK(block_graph != NULL);
401 E : DCHECK(thunk_block_map != NULL);
402 E : DCHECK(code_block != NULL);
403 E : DCHECK(thunk != NULL);
404 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
405 :
406 : // Do we already have a thunk defined for this offset? If so, return it.
407 E : ThunkBlockMap::const_iterator thunk_it = thunk_block_map->find(offset);
408 E : if (thunk_it != thunk_block_map->end()) {
409 E : *thunk = thunk_it->second;
410 E : return true;
411 : }
412 :
413 E : *thunk = NULL;
414 :
415 : // Determine the name for this thunk.
416 E : std::string name;
417 E : if (offset == 0) {
418 E : name = base::StringPrintf("%s%s",
419 : code_block->name().c_str(),
420 : common::kThunkSuffix);
421 E : } else {
422 E : name = base::StringPrintf("%s%s+%d",
423 : code_block->name().c_str(),
424 : common::kThunkSuffix,
425 : offset);
426 : }
427 :
428 : // Set up a basic block subgraph containing a single block description, with
429 : // that block description containing a single empty basic block, and get an
430 : // assembler writing into that basic block.
431 E : BasicBlockSubGraph subgraph;
432 E : BasicCodeBlock* bb = subgraph.AddBasicCodeBlock(name);
433 E : BasicBlockSubGraph::BlockDescription* desc = subgraph.AddBlockDescription(
434 : name,
435 : NULL,
436 : BlockGraph::CODE_BLOCK,
437 : thunk_section_->id(),
438 : 1,
439 : 0);
440 E : desc->basic_block_order.push_back(bb);
441 :
442 : // Find the source range associated with this block.
443 E : BlockGraph::Block::SourceRange source_range;
444 E : if (!code_block->source_ranges().empty())
445 E : source_range = code_block->source_ranges().range_pair(0).second;
446 :
447 : // Make sure we only push the source range if we have not already created
448 : // a source range mapping for this block (i.e., if the non-decomposable
449 : // block has multiple entry points, we want them to share an id). We can do
450 : // this because we handle one block at a time; so, all of a block's thunks
451 : // will be created as a group. This assertion is sanity checked in the
452 : // PostBlockGraphIteration function's check for overlapping source ranges.
453 E : if (bb_ranges_.empty() || source_range != bb_ranges_.back())
454 E : bb_ranges_.push_back(source_range);
455 :
456 : // We use the location/index in the bb_ranges vector of the current
457 : // basic-block range as the basic_block_id, and we pass a pointer to
458 : // the frequency data block as the module_data parameter. We then make
459 : // a memory indirect call to the bb_entry_hook.
460 E : auto basic_block_id(Immediate(bb_ranges_.size()-1, assm::kSize32Bit));
461 E : auto module_data(Immediate(add_frequency_data_.frequency_data_block(), 0));
462 E : auto original_function(Immediate(code_block, offset));
463 E : auto bb_entry_hook(Operand(Displacement(bb_entry_hook_ref_.referenced(),
464 : bb_entry_hook_ref_.offset())));
465 :
466 : // Assemble entry hook instrumentation into the thunk's instruction stream.
467 : // Note that we turn this into a simulated call, so that the return from
468 : // the bb entry hook continues from the thunked function.
469 E : BasicBlockAssembler bb_asm(bb->instructions().begin(), &bb->instructions());
470 E : bb_asm.push(basic_block_id);
471 E : bb_asm.push(module_data);
472 E : bb_asm.push(original_function);
473 E : bb_asm.jmp(bb_entry_hook);
474 :
475 : // Condense the whole mess into a block.
476 E : BlockBuilder block_builder(block_graph);
477 E : if (!block_builder.Merge(&subgraph)) {
478 i : LOG(ERROR) << "Failed to build thunk block.";
479 i : return false;
480 : }
481 :
482 : // Exactly one new block should have been created.
483 E : DCHECK_EQ(1u, block_builder.new_blocks().size());
484 E : *thunk = block_builder.new_blocks().front();
485 E : (*thunk_block_map)[offset] = *thunk;
486 :
487 E : return true;
488 E : }
489 :
490 : } // namespace transforms
491 : } // namespace instrument
|