1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/instrument/transforms/entry_thunk_transform.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/stringprintf.h"
19 : #include "syzygy/block_graph/typed_block.h"
20 : #include "syzygy/pe/pe_utils.h"
21 : #include "syzygy/pe/transforms/add_imports_transform.h"
22 :
23 : namespace instrument {
24 : namespace transforms {
25 :
26 : namespace {
27 :
28 : using block_graph::BlockGraph;
29 : using block_graph::TypedBlock;
30 :
31 : // We add this suffix to the destination
32 : const char kThunkSuffix[] = "_thunk";
33 :
34 : bool IsUnsafeReference(const BlockGraph::Block* referrer,
35 E : const BlockGraph::Reference& ref) {
36 : // Skip references with a non-zero offset if we're
37 : // not instrumenting unsafe references.
38 E : if (ref.offset() != 0)
39 E : return true;
40 :
41 : BlockGraph::BlockAttributes kUnsafeAttribs =
42 : BlockGraph::HAS_INLINE_ASSEMBLY |
43 E : BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER;
44 :
45 E : bool unsafe_referrer = false;
46 : if (referrer->type() == BlockGraph::CODE_BLOCK &&
47 E : (referrer->attributes() & kUnsafeAttribs) != 0) {
48 E : unsafe_referrer = true;
49 : }
50 :
51 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, ref.referenced()->type());
52 E : bool unsafe_block = (ref.referenced()->attributes() & kUnsafeAttribs) != 0;
53 :
54 : // If both the referrer and the referenced blocks are unsafe, we can't
55 : // safely assume that this reference represents a call semantics,
56 : // e.g. where a return address is at the top of stack at entry.
57 : // Ideally we'd decide this on the basis of a full stack analysis, but
58 : // beggers can't be choosers, plus for hand-coded assembly that's
59 : // the halting problem :).
60 : // For instrumentation that uses return address swizzling, instrumenting
61 : // an unsafe reference leads to crashes, so better to back off and get
62 : // slightly less coverage.
63 E : return unsafe_referrer && unsafe_block;
64 E : }
65 :
66 : } // namespace
67 :
68 : using pe::transforms::AddImportsTransform;
69 :
70 : const char EntryThunkTransform::kTransformName[] =
71 : "EntryThunkTransform";
72 :
73 : const char EntryThunkTransform::kEntryHookName[] = "_indirect_penter";
74 : const char EntryThunkTransform::kDllMainEntryHookName[] =
75 : "_indirect_penter_dllmain";
76 : const char EntryThunkTransform::kExeEntryHookName[] =
77 : "_indirect_penter_exe_entry";
78 : const char EntryThunkTransform::kDefaultInstrumentDll[] =
79 : "call_trace_client.dll";
80 :
81 : // We push the absolute address of the function to be called on the
82 : // stack, and then we invoke the instrumentation function indirectly
83 : // through the import table.
84 : // 6844332211 push offset (11223344)
85 : // FF2588776655 jmp dword ptr [(55667788)]
86 : const EntryThunkTransform::Thunk EntryThunkTransform::kThunkTemplate = {
87 : 0x68, NULL, // push immediate
88 : 0x25FF, NULL // jmp DWORD PTR[immediate]
89 : };
90 :
91 : EntryThunkTransform::EntryThunkTransform()
92 : : thunk_section_(NULL),
93 : instrument_unsafe_references_(true),
94 : src_ranges_for_thunks_(false),
95 : only_instrument_module_entry_(false),
96 E : instrument_dll_name_(kDefaultInstrumentDll) {
97 E : }
98 :
99 : bool EntryThunkTransform::PreBlockGraphIteration(
100 E : BlockGraph* block_graph, BlockGraph::Block* header_block) {
101 E : DCHECK(thunk_section_ == NULL);
102 :
103 E : if (!GetEntryPoints(header_block))
104 i : return false;
105 :
106 : AddImportsTransform::ImportedModule import_module(
107 E : instrument_dll_name_.c_str());
108 :
109 : // We import the minimal set of symbols necessary, depending on the types of
110 : // entry points we find in the module. We maintain a list of symbol indices/
111 : // reference pointers, which will be traversed after the import to populate
112 : // the references.
113 : typedef std::pair<size_t, BlockGraph::Reference*> ImportHook;
114 E : std::vector<ImportHook> import_hooks;
115 :
116 : // If there are any DllMain-like entry points (TLS initializers or DllMain
117 : // itself) then we need the DllMain entry hook.
118 E : if (dllmain_entrypoints_.size() > 0) {
119 : import_hooks.push_back(std::make_pair(
120 : import_module.AddSymbol(kDllMainEntryHookName),
121 E : &hook_dllmain_ref_));
122 : }
123 :
124 : // If this was an EXE then we need the EXE entry hook.
125 E : if (exe_entry_point_.first != NULL) {
126 : import_hooks.push_back(std::make_pair(
127 : import_module.AddSymbol(kExeEntryHookName),
128 E : &hook_exe_entry_ref_));
129 : }
130 :
131 : // If we're not only instrumenting module entry then we need the function
132 : // entry hook.
133 E : if (!only_instrument_module_entry_) {
134 : import_hooks.push_back(std::make_pair(
135 : import_module.AddSymbol(kEntryHookName),
136 E : &hook_ref_));
137 : }
138 :
139 : // Nothing to do if we don't need any import hooks.
140 E : if (import_hooks.empty())
141 E : return true;
142 :
143 : // Run the transform.
144 E : AddImportsTransform add_imports_transform;
145 E : add_imports_transform.AddModule(&import_module);
146 E : if (!add_imports_transform.TransformBlockGraph(block_graph, header_block)) {
147 i : LOG(ERROR) << "Unable to add imports for instrumentation DLL.";
148 i : return false;
149 : }
150 :
151 : // Get references to each of the imported symbols.
152 E : for (size_t i = 0; i < import_hooks.size(); ++i) {
153 : if (!import_module.GetSymbolReference(import_hooks[i].first,
154 E : import_hooks[i].second)) {
155 i : LOG(ERROR) << "Unable to get reference to import.";
156 i : return false;
157 : }
158 E : }
159 :
160 : // Find or create the section we put our thunks in.
161 : thunk_section_ = block_graph->FindOrAddSection(".thunks",
162 E : pe::kCodeCharacteristics);
163 E : DCHECK(thunk_section_ != NULL);
164 :
165 E : return true;
166 E : }
167 :
168 : bool EntryThunkTransform::OnBlock(BlockGraph* block_graph,
169 E : BlockGraph::Block* block) {
170 E : DCHECK(block != NULL);
171 :
172 E : if (block->type() != BlockGraph::CODE_BLOCK)
173 E : return true;
174 :
175 E : return InstrumentCodeBlock(block_graph, block);
176 E : }
177 :
178 : bool EntryThunkTransform::InstrumentCodeBlock(
179 E : BlockGraph* block_graph, BlockGraph::Block* block) {
180 E : DCHECK(block_graph != NULL);
181 E : DCHECK(block != NULL);
182 :
183 : // Typedef for the thunk block map. The key is the offset within the callee
184 : // block and the value is the thunk block that forwards to the callee at that
185 : // offset.
186 E : ThunkBlockMap thunk_block_map;
187 :
188 : // Iterate through all the block's referrers, creating thunks as we go.
189 : // We copy the referrer set for simplicity, as it's potentially mutated
190 : // in the loop.
191 E : BlockGraph::Block::ReferrerSet referrers = block->referrers();
192 E : BlockGraph::Block::ReferrerSet::const_iterator referrer_it(referrers.begin());
193 E : for (; referrer_it != referrers.end(); ++referrer_it) {
194 E : const BlockGraph::Block::Referrer& referrer = *referrer_it;
195 : if (!InstrumentCodeBlockReferrer(
196 E : referrer, block_graph, block, &thunk_block_map)) {
197 i : return false;
198 : }
199 E : }
200 :
201 E : return true;
202 E : }
203 :
204 : bool EntryThunkTransform::InstrumentCodeBlockReferrer(
205 : const BlockGraph::Block::Referrer& referrer,
206 : BlockGraph* block_graph,
207 : BlockGraph::Block* block,
208 E : ThunkBlockMap* thunk_block_map) {
209 E : DCHECK(block_graph != NULL);
210 E : DCHECK(block != NULL);
211 E : DCHECK(thunk_block_map != NULL);
212 :
213 : // Get the reference.
214 E : BlockGraph::Reference ref;
215 E : if (!referrer.first->GetReference(referrer.second, &ref)) {
216 i : LOG(ERROR) << "Unable to get reference from referrer.";
217 i : return false;
218 : }
219 :
220 : // Skip self-references, except long references to the start of the block.
221 : // TODO(siggi): This needs refining, as it may currently miss important
222 : // cases. Notably if a block contains more than one function, and the
223 : // functions are mutually recursive, we'll only record the original
224 : // entry to the block, but will miss the internal recursion.
225 : // As-is, this does work for the common case where a block contains
226 : // one self-recursive function, however.
227 E : if (referrer.first == block) {
228 : // Skip short references.
229 E : if (ref.size() < sizeof(core::AbsoluteAddress))
230 i : return true;
231 :
232 : // Skip interior references. The rationale for this is because these
233 : // references will tend to be switch tables, and we don't need the
234 : // overhead of instrumenting and recording all switch statement executions
235 : // for now.
236 E : if (ref.offset() != 0)
237 i : return true;
238 : }
239 :
240 : // See whether this is one of the DLL entrypoints.
241 E : pe::EntryPoint entry(ref.referenced(), ref.offset());
242 : pe::EntryPointSet::const_iterator entry_it(dllmain_entrypoints_.find(
243 E : entry));
244 E : bool is_dllmain_entry = entry_it != dllmain_entrypoints_.end();
245 :
246 : // Determine if this is an EXE entry point.
247 E : bool is_exe_entry = entry == exe_entry_point_;
248 :
249 : // It can't be both an EXE and a DLL entry.
250 E : DCHECK(!is_dllmain_entry || !is_exe_entry);
251 :
252 : // If we're only instrumenting entry points and this isn't one, then skip it.
253 E : if (only_instrument_module_entry_ && !is_dllmain_entry && !is_exe_entry)
254 E : return true;
255 :
256 : if (!instrument_unsafe_references_ &&
257 E : IsUnsafeReference(referrer.first, ref)) {
258 E : LOG(INFO) << "Skipping reference between unsafe block pair '"
259 : << referrer.first->name() << "' and '"
260 : << block->name() << "'";
261 E : return true;
262 : }
263 :
264 : // Determine which hook function to use.
265 E : BlockGraph::Reference* hook_ref = &hook_ref_;
266 E : if (is_dllmain_entry)
267 E : hook_ref = &hook_dllmain_ref_;
268 E : else if (is_exe_entry)
269 E : hook_ref = &hook_exe_entry_ref_;
270 E : DCHECK(hook_ref->referenced() != NULL);
271 :
272 : // Look for the reference in the thunk block map, and only create a new one
273 : // if it does not already exist.
274 E : BlockGraph::Block* thunk_block = NULL;
275 E : ThunkBlockMap::const_iterator thunk_it = thunk_block_map->find(ref.offset());
276 E : if (thunk_it == thunk_block_map->end()) {
277 E : thunk_block = CreateOneThunk(block_graph, ref, *hook_ref);
278 E : if (thunk_block == NULL) {
279 i : LOG(ERROR) << "Unable to create thunk block.";
280 i : return false;
281 : }
282 E : (*thunk_block_map)[ref.offset()] = thunk_block;
283 E : } else {
284 E : thunk_block = thunk_it->second;
285 : }
286 E : DCHECK(thunk_block != NULL);
287 :
288 : // Update the referrer to point to the thunk.
289 : BlockGraph::Reference new_ref(ref.type(),
290 : ref.size(),
291 : thunk_block,
292 E : 0, 0);
293 E : referrer.first->SetReference(referrer.second, new_ref);
294 :
295 E : return true;
296 E : }
297 :
298 : BlockGraph::Block* EntryThunkTransform::CreateOneThunk(
299 : BlockGraph* block_graph,
300 : const BlockGraph::Reference& destination,
301 E : const BlockGraph::Reference& hook) {
302 E : std::string name;
303 E : if (destination.offset() == 0) {
304 : name = base::StringPrintf("%s%s",
305 : destination.referenced()->name().c_str(),
306 E : kThunkSuffix);
307 E : } else {
308 : name = base::StringPrintf("%s%s+%d",
309 : destination.referenced()->name().c_str(),
310 : kThunkSuffix,
311 E : destination.offset());
312 : }
313 :
314 : // Create and initialize the new thunk.
315 : BlockGraph::Block* thunk = block_graph->AddBlock(BlockGraph::CODE_BLOCK,
316 : sizeof(kThunkTemplate),
317 E : name.c_str());
318 E : if (thunk == NULL)
319 i : return NULL;
320 :
321 E : thunk->set_section(thunk_section_->id());
322 : thunk->SetData(reinterpret_cast<const uint8*>(&kThunkTemplate),
323 E : sizeof(kThunkTemplate));
324 :
325 E : if (src_ranges_for_thunks_) {
326 : // Give the thunk a source range synonymous with the destination.
327 : // That way the debugger will resolve calls and jumps to the thunk to the
328 : // destination function's name, which makes the assembly much easier to
329 : // read. The downside to this is that the symbols are now no longer unique,
330 : // and searching for a function by name may turn up either the function or
331 : // the thunk.
332 : const BlockGraph::Block::SourceRanges& source_ranges =
333 E : destination.referenced()->source_ranges();
334 : const BlockGraph::Block::SourceRanges::RangePair* source =
335 E : source_ranges.FindRangePair(destination.offset(), thunk->size());
336 E : if (source != NULL) {
337 : // Calculate the offset into the range.
338 E : size_t offs = destination.offset() - source->first.start();
339 E : BlockGraph::Block::DataRange data(0, thunk->size());
340 : BlockGraph::Block::SourceRange src(source->second.start() + offs,
341 E : thunk->size());
342 E : bool pushed = thunk->source_ranges().Push(data, src);
343 E : DCHECK(pushed);
344 : }
345 : }
346 :
347 E : if (!InitializeThunk(thunk, destination, hook)) {
348 i : bool removed = block_graph->RemoveBlock(thunk);
349 i : DCHECK(removed);
350 :
351 i : thunk = NULL;
352 : }
353 :
354 E : return thunk;
355 E : }
356 :
357 : bool EntryThunkTransform::InitializeThunk(
358 : BlockGraph::Block* thunk_block,
359 : const BlockGraph::Reference& destination,
360 E : const BlockGraph::Reference& import_entry) {
361 E : TypedBlock<Thunk> thunk;
362 E : if (!thunk.Init(0, thunk_block))
363 i : return false;
364 :
365 : if (!thunk.SetReference(BlockGraph::ABSOLUTE_REF,
366 : thunk->func_addr,
367 : destination.referenced(),
368 : destination.offset(),
369 E : destination.offset())) {
370 i : return false;
371 : }
372 :
373 : if (!thunk.SetReference(BlockGraph::ABSOLUTE_REF,
374 : thunk->hook_addr,
375 : import_entry.referenced(),
376 : import_entry.offset(),
377 E : import_entry.offset())) {
378 i : return false;
379 : }
380 :
381 E : return true;
382 E : }
383 :
384 E : bool EntryThunkTransform::GetEntryPoints(BlockGraph::Block* header_block) {
385 : // Get the TLS initializer entry-points. These have the same signature and
386 : // call patterns to DllMain.
387 E : if (!pe::GetTlsInitializers(header_block, &dllmain_entrypoints_)) {
388 i : LOG(ERROR) << "Failed to populate the TLS Initializer entry-points.";
389 i : return false;
390 : }
391 :
392 : // Get the DLL entry-point.
393 E : pe::EntryPoint dll_entry_point;
394 E : if (!pe::GetDllEntryPoint(header_block, &dll_entry_point)) {
395 i : LOG(ERROR) << "Failed to resolve the DLL entry-point.";
396 i : return false;
397 : }
398 :
399 : // If the image is an EXE or is a DLL that does not specify an entry-point
400 : // (the entry-point is optional for DLLs) then the dll_entry_point will have
401 : // a NULL block pointer. Otherwise, add it to the entry-point set.
402 E : if (dll_entry_point.first != NULL) {
403 E : dllmain_entrypoints_.insert(dll_entry_point);
404 E : } else {
405 : // Get the EXE entry point. We only need to bother looking if we didn't get
406 : // a DLL entry point, as we can't have both.
407 E : if (!pe::GetExeEntryPoint(header_block, &exe_entry_point_)) {
408 i : LOG(ERROR) << "Failed to resolve the EXE entry-point.";
409 i : return false;
410 : }
411 : }
412 :
413 E : return true;
414 E : }
415 :
416 : } // namespace transforms
417 : } // namespace instrument
|