1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/instrument/transforms/entry_thunk_transform.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/strings/stringprintf.h"
19 : #include "syzygy/block_graph/block_builder.h"
20 : #include "syzygy/block_graph/block_util.h"
21 : #include "syzygy/common/defs.h"
22 : #include "syzygy/pe/pe_utils.h"
23 : #include "syzygy/pe/transforms/pe_add_imports_transform.h"
24 :
25 : namespace instrument {
26 : namespace transforms {
27 :
28 : using block_graph::BasicBlock;
29 : using block_graph::BasicCodeBlock;
30 : using block_graph::BasicBlockAssembler;
31 : using block_graph::BasicBlockReference;
32 : using block_graph::BasicBlockSubGraph;
33 : using block_graph::BlockBuilder;
34 : using block_graph::BlockGraph;
35 : using block_graph::Displacement;
36 : using block_graph::Operand;
37 : using block_graph::TransformPolicyInterface;
38 : using pe::transforms::PEAddImportsTransform;
39 :
40 : typedef pe::transforms::ImportedModule ImportedModule;
41 :
42 : const char EntryThunkTransform::kTransformName[] =
43 : "EntryThunkTransform";
44 :
45 : const char EntryThunkTransform::kEntryHookName[] = "_indirect_penter";
46 : const char EntryThunkTransform::kDllMainEntryHookName[] =
47 : "_indirect_penter_dllmain";
48 : const char EntryThunkTransform::kExeMainEntryHookName[] =
49 : "_indirect_penter_exemain";
50 : const char EntryThunkTransform::kDefaultInstrumentDll[] =
51 : "call_trace_client.dll";
52 :
53 : EntryThunkTransform::EntryThunkTransform()
54 : : thunk_section_(NULL),
55 : instrument_unsafe_references_(true),
56 : src_ranges_for_thunks_(false),
57 : only_instrument_module_entry_(false),
58 E : instrument_dll_name_(kDefaultInstrumentDll) {
59 E : }
60 :
61 : bool EntryThunkTransform::SetEntryThunkParameter(
62 E : const ImmediateType& immediate) {
63 : if (immediate.size() != assm::kSizeNone &&
64 E : immediate.size() != assm::kSize32Bit) {
65 E : return false;
66 : }
67 E : entry_thunk_parameter_ = immediate;
68 E : return true;
69 E : }
70 :
71 : bool EntryThunkTransform::SetFunctionThunkParameter(
72 E : const ImmediateType& immediate) {
73 : if (immediate.size() != assm::kSizeNone &&
74 E : immediate.size() != assm::kSize32Bit) {
75 E : return false;
76 : }
77 E : function_thunk_parameter_ = immediate;
78 E : return true;
79 E : }
80 :
81 E : bool EntryThunkTransform::EntryThunkIsParameterized() const {
82 E : return entry_thunk_parameter_.size() != assm::kSizeNone;
83 E : }
84 :
85 E : bool EntryThunkTransform::FunctionThunkIsParameterized() const {
86 E : return function_thunk_parameter_.size() != assm::kSizeNone;
87 E : }
88 :
89 : bool EntryThunkTransform::PreBlockGraphIteration(
90 : const TransformPolicyInterface* policy,
91 : BlockGraph* block_graph,
92 E : BlockGraph::Block* header_block) {
93 E : DCHECK_NE(reinterpret_cast<TransformPolicyInterface*>(NULL), policy);
94 E : DCHECK_NE(reinterpret_cast<BlockGraph*>(NULL), block_graph);
95 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), header_block);
96 E : DCHECK_EQ(BlockGraph::PE_IMAGE, block_graph->image_format());
97 E : DCHECK_EQ(reinterpret_cast<BlockGraph::Section*>(NULL), thunk_section_);
98 :
99 E : if (!GetEntryPoints(header_block))
100 i : return false;
101 :
102 E : ImportedModule import_module(instrument_dll_name_);
103 :
104 : // We import the minimal set of symbols necessary, depending on the types of
105 : // entry points we find in the module. We maintain a list of symbol indices/
106 : // reference pointers, which will be traversed after the import to populate
107 : // the references.
108 : typedef std::pair<size_t, BlockGraph::Reference*> ImportHook;
109 E : std::vector<ImportHook> import_hooks;
110 :
111 : // If there are any DllMain-like entry points (TLS initializers or DllMain
112 : // itself) then we need the DllMain entry hook.
113 E : if (dllmain_entrypoints_.size() > 0) {
114 : import_hooks.push_back(std::make_pair(
115 : import_module.AddSymbol(kDllMainEntryHookName,
116 : ImportedModule::kAlwaysImport),
117 E : &hook_dllmain_ref_));
118 : }
119 :
120 : // If this was an EXE then we need the EXE entry hook.
121 E : if (exe_entry_point_.first != NULL) {
122 : import_hooks.push_back(std::make_pair(
123 : import_module.AddSymbol(kExeMainEntryHookName,
124 : ImportedModule::kAlwaysImport),
125 E : &hook_exe_entry_ref_));
126 : }
127 :
128 : // If we're not only instrumenting module entry then we need the function
129 : // entry hook.
130 E : if (!only_instrument_module_entry_) {
131 : import_hooks.push_back(std::make_pair(
132 : import_module.AddSymbol(kEntryHookName,
133 : ImportedModule::kAlwaysImport),
134 E : &hook_ref_));
135 : }
136 :
137 : // Nothing to do if we don't need any import hooks.
138 E : if (import_hooks.empty())
139 E : return true;
140 :
141 : // Run the transform.
142 E : PEAddImportsTransform add_imports_transform;
143 E : add_imports_transform.AddModule(&import_module);
144 : if (!add_imports_transform.TransformBlockGraph(
145 E : policy, block_graph, header_block)) {
146 i : LOG(ERROR) << "Unable to add imports for instrumentation DLL.";
147 i : return false;
148 : }
149 :
150 : // Get references to each of the imported symbols.
151 E : for (size_t i = 0; i < import_hooks.size(); ++i) {
152 : if (!import_module.GetSymbolReference(import_hooks[i].first,
153 E : import_hooks[i].second)) {
154 i : LOG(ERROR) << "Unable to get reference to import.";
155 i : return false;
156 : }
157 E : }
158 :
159 : // Find or create the section we put our thunks in.
160 : thunk_section_ = block_graph->FindOrAddSection(common::kThunkSectionName,
161 E : pe::kCodeCharacteristics);
162 E : DCHECK(thunk_section_ != NULL);
163 :
164 E : return true;
165 E : }
166 :
167 : bool EntryThunkTransform::OnBlock(const TransformPolicyInterface* policy,
168 : BlockGraph* block_graph,
169 E : BlockGraph::Block* block) {
170 E : DCHECK(block != NULL);
171 :
172 E : if (block->type() != BlockGraph::CODE_BLOCK)
173 E : return true;
174 :
175 E : return InstrumentCodeBlock(block_graph, block);
176 E : }
177 :
178 : bool EntryThunkTransform::InstrumentCodeBlock(
179 E : BlockGraph* block_graph, BlockGraph::Block* block) {
180 E : DCHECK(block_graph != NULL);
181 E : DCHECK(block != NULL);
182 :
183 : // Typedef for the thunk block map. The key is the offset within the callee
184 : // block and the value is the thunk block that forwards to the callee at that
185 : // offset.
186 E : ThunkBlockMap thunk_block_map;
187 :
188 : // Iterate through all the block's referrers, creating thunks as we go.
189 : // We copy the referrer set for simplicity, as it's potentially mutated
190 : // in the loop.
191 E : BlockGraph::Block::ReferrerSet referrers = block->referrers();
192 E : BlockGraph::Block::ReferrerSet::const_iterator referrer_it(referrers.begin());
193 E : for (; referrer_it != referrers.end(); ++referrer_it) {
194 E : const BlockGraph::Block::Referrer& referrer = *referrer_it;
195 : if (!InstrumentCodeBlockReferrer(
196 E : referrer, block_graph, block, &thunk_block_map)) {
197 i : return false;
198 : }
199 E : }
200 :
201 E : return true;
202 E : }
203 :
204 : bool EntryThunkTransform::InstrumentCodeBlockReferrer(
205 : const BlockGraph::Block::Referrer& referrer,
206 : BlockGraph* block_graph,
207 : BlockGraph::Block* block,
208 E : ThunkBlockMap* thunk_block_map) {
209 E : DCHECK(block_graph != NULL);
210 E : DCHECK(block != NULL);
211 E : DCHECK(thunk_block_map != NULL);
212 :
213 : // Get the reference.
214 E : BlockGraph::Reference ref;
215 E : if (!referrer.first->GetReference(referrer.second, &ref)) {
216 i : LOG(ERROR) << "Unable to get reference from referrer.";
217 i : return false;
218 : }
219 :
220 : // Skip self-references, except long references to the start of the block.
221 : // TODO(siggi): This needs refining, as it may currently miss important
222 : // cases. Notably if a block contains more than one function, and the
223 : // functions are mutually recursive, we'll only record the original
224 : // entry to the block, but will miss the internal recursion.
225 : // As-is, this does work for the common case where a block contains
226 : // one self-recursive function, however.
227 E : if (referrer.first == block) {
228 : // Skip short references.
229 E : if (ref.size() < sizeof(core::AbsoluteAddress))
230 E : return true;
231 :
232 : // Skip interior references. The rationale for this is because these
233 : // references will tend to be switch tables, and we don't need the
234 : // overhead of instrumenting and recording all switch statement executions
235 : // for now.
236 E : if (ref.offset() != 0)
237 E : return true;
238 : }
239 :
240 : // See whether this is one of the DLL entrypoints.
241 E : pe::EntryPoint entry(ref.referenced(), ref.offset());
242 : pe::EntryPointSet::const_iterator entry_it(dllmain_entrypoints_.find(
243 E : entry));
244 E : bool is_dllmain_entry = entry_it != dllmain_entrypoints_.end();
245 :
246 : // Determine if this is an EXE entry point.
247 E : bool is_exe_entry = entry == exe_entry_point_;
248 :
249 : // It can't be both an EXE and a DLL entry.
250 E : DCHECK(!is_dllmain_entry || !is_exe_entry);
251 :
252 : // If we're only instrumenting entry points and this isn't one, then skip it.
253 E : if (only_instrument_module_entry_ && !is_dllmain_entry && !is_exe_entry)
254 E : return true;
255 :
256 : if (!instrument_unsafe_references_ &&
257 E : block_graph::IsUnsafeReference(referrer.first, ref)) {
258 E : LOG(INFO) << "Skipping reference between unsafe block pair '"
259 : << referrer.first->name() << "' and '"
260 : << block->name() << "'";
261 E : return true;
262 : }
263 :
264 : // Determine which hook function to use.
265 E : BlockGraph::Reference* hook_ref = &hook_ref_;
266 E : if (is_dllmain_entry)
267 E : hook_ref = &hook_dllmain_ref_;
268 E : else if (is_exe_entry)
269 E : hook_ref = &hook_exe_entry_ref_;
270 E : DCHECK(hook_ref->referenced() != NULL);
271 :
272 : // Determine which parameter to use, if any.
273 E : const ImmediateType* param = NULL;
274 E : if ((is_dllmain_entry || is_exe_entry) && EntryThunkIsParameterized()) {
275 E : param = &entry_thunk_parameter_;
276 E : } else if (FunctionThunkIsParameterized()) {
277 E : param = &function_thunk_parameter_;
278 : }
279 :
280 : // Look for the reference in the thunk block map, and only create a new one
281 : // if it does not already exist.
282 E : BlockGraph::Block* thunk_block = NULL;
283 E : ThunkBlockMap::const_iterator thunk_it = thunk_block_map->find(ref.offset());
284 E : if (thunk_it == thunk_block_map->end()) {
285 E : thunk_block = CreateOneThunk(block_graph, ref, *hook_ref, param);
286 E : if (thunk_block == NULL) {
287 i : LOG(ERROR) << "Unable to create thunk block.";
288 i : return false;
289 : }
290 E : (*thunk_block_map)[ref.offset()] = thunk_block;
291 E : } else {
292 E : thunk_block = thunk_it->second;
293 : }
294 E : DCHECK(thunk_block != NULL);
295 :
296 : // Update the referrer to point to the thunk.
297 : BlockGraph::Reference new_ref(ref.type(),
298 : ref.size(),
299 : thunk_block,
300 E : 0, 0);
301 E : referrer.first->SetReference(referrer.second, new_ref);
302 :
303 E : return true;
304 E : }
305 :
306 : BlockGraph::Block* EntryThunkTransform::CreateOneThunk(
307 : BlockGraph* block_graph,
308 : const BlockGraph::Reference& destination,
309 : const BlockGraph::Reference& hook,
310 E : const ImmediateType* parameter) {
311 E : std::string name;
312 E : if (destination.offset() == 0) {
313 : name = base::StringPrintf("%s%s",
314 : destination.referenced()->name().c_str(),
315 E : common::kThunkSuffix);
316 E : } else {
317 : name = base::StringPrintf("%s%s+%d",
318 : destination.referenced()->name().c_str(),
319 : common::kThunkSuffix,
320 E : destination.offset());
321 : }
322 :
323 : // Set up a basic block subgraph containing a single block description, with
324 : // that block description containing a single empty basic block, and get an
325 : // assembler writing into that basic block.
326 : // TODO(chrisha): Make this reusable somehow. Creating a code block via an
327 : // assembler is likely to be pretty common.
328 E : BasicBlockSubGraph bbsg;
329 : BasicBlockSubGraph::BlockDescription* block_desc = bbsg.AddBlockDescription(
330 : name,
331 : NULL,
332 : BlockGraph::CODE_BLOCK,
333 : thunk_section_->id(),
334 : 1,
335 E : 0);
336 E : BasicCodeBlock* bb = bbsg.AddBasicCodeBlock(name);
337 E : block_desc->basic_block_order.push_back(bb);
338 : BasicBlockAssembler assm(bb->instructions().begin(),
339 E : &bb->instructions());
340 :
341 : // Set up our thunk:
342 : // 1. push parameter
343 : // 2. push func_addr
344 : // 3. jmp hook_addr
345 E : if (parameter != NULL)
346 E : assm.push(*parameter);
347 E : assm.push(Immediate(destination.referenced(), destination.offset()));
348 E : assm.jmp(Operand(Displacement(hook.referenced(), hook.offset())));
349 :
350 : // Condense the whole mess into a block.
351 E : BlockBuilder block_builder(block_graph);
352 E : if (!block_builder.Merge(&bbsg)) {
353 i : LOG(ERROR) << "Failed to build thunk block.";
354 i : return NULL;
355 : }
356 :
357 : // Exactly one new block should have been created.
358 E : DCHECK_EQ(1u, block_builder.new_blocks().size());
359 E : BlockGraph::Block* thunk = block_builder.new_blocks().front();
360 :
361 E : if (src_ranges_for_thunks_) {
362 : // Give the thunk a source range synonymous with the destination.
363 : // That way the debugger will resolve calls and jumps to the thunk to the
364 : // destination function's name, which makes the assembly much easier to
365 : // read. The downside to this is that the symbols are now no longer unique,
366 : // and searching for a function by name may turn up either the function or
367 : // the thunk.
368 : const BlockGraph::Block::SourceRanges& source_ranges =
369 E : destination.referenced()->source_ranges();
370 : const BlockGraph::Block::SourceRanges::RangePair* source =
371 E : source_ranges.FindRangePair(destination.offset(), thunk->size());
372 E : if (source != NULL) {
373 : // Calculate the offset into the range.
374 E : size_t offs = destination.offset() - source->first.start();
375 E : BlockGraph::Block::DataRange data(0, thunk->size());
376 : BlockGraph::Block::SourceRange src(source->second.start() + offs,
377 E : thunk->size());
378 E : bool pushed = thunk->source_ranges().Push(data, src);
379 E : DCHECK(pushed);
380 : }
381 : }
382 :
383 E : return thunk;
384 E : }
385 :
386 E : bool EntryThunkTransform::GetEntryPoints(BlockGraph::Block* header_block) {
387 : // Get the TLS initializer entry-points. These have the same signature and
388 : // call patterns to DllMain.
389 E : if (!pe::GetTlsInitializers(header_block, &dllmain_entrypoints_)) {
390 i : LOG(ERROR) << "Failed to populate the TLS Initializer entry-points.";
391 i : return false;
392 : }
393 :
394 : // Get the DLL entry-point.
395 E : pe::EntryPoint dll_entry_point;
396 E : if (!pe::GetDllEntryPoint(header_block, &dll_entry_point)) {
397 i : LOG(ERROR) << "Failed to resolve the DLL entry-point.";
398 i : return false;
399 : }
400 :
401 : // If the image is an EXE or is a DLL that does not specify an entry-point
402 : // (the entry-point is optional for DLLs) then the dll_entry_point will have
403 : // a NULL block pointer. Otherwise, add it to the entry-point set.
404 E : if (dll_entry_point.first != NULL) {
405 E : dllmain_entrypoints_.insert(dll_entry_point);
406 E : } else {
407 : // Get the EXE entry point. We only need to bother looking if we didn't get
408 : // a DLL entry point, as we can't have both.
409 E : if (!pe::GetExeEntryPoint(header_block, &exe_entry_point_)) {
410 i : LOG(ERROR) << "Failed to resolve the EXE entry-point.";
411 i : return false;
412 : }
413 : }
414 :
415 E : return true;
416 E : }
417 :
418 : } // namespace transforms
419 : } // namespace instrument
|