1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/instrument/transforms/asan_transform.h"
16 :
17 : #include <vector>
18 :
19 : #include "base/logging.h"
20 : #include "base/stringprintf.h"
21 : #include "base/memory/ref_counted.h"
22 : #include "syzygy/block_graph/basic_block_assembler.h"
23 : #include "syzygy/block_graph/block_builder.h"
24 : #include "syzygy/pe/block_util.h"
25 : #include "syzygy/pe/transforms/add_imports_transform.h"
26 : #include "third_party/distorm/files/include/mnemonics.h"
27 : #include "third_party/distorm/files/src/x86defs.h"
28 :
29 : namespace instrument {
30 : namespace transforms {
31 : namespace {
32 :
33 : using block_graph::BasicBlock;
34 : using block_graph::BasicBlockAssembler;
35 : using block_graph::BasicBlockSubGraph;
36 : using block_graph::BasicBlockReference;
37 : using block_graph::BlockBuilder;
38 : using block_graph::BlockGraph;
39 : using block_graph::Displacement;
40 : using block_graph::Immediate;
41 : using block_graph::Instruction;
42 : using block_graph::Operand;
43 : using block_graph::Value;
44 : using core::Register;
45 : using core::RegisterCode;
46 : using pe::transforms::AddImportsTransform;
47 :
48 : // Represent the different kind of access to the memory.
49 : enum MemoryAccessMode {
50 : kNoAccess,
51 : kReadAccess,
52 : kWriteAccess,
53 : };
54 :
55 : // Returns true iff opcode should be instrumented.
56 E : bool ShouldInstrumentOpcode(uint16 opcode) {
57 E : switch (opcode) {
58 : case I_LEA:
59 : case I_CALL:
60 : case I_JMP:
61 E : return false;
62 : default:
63 E : return true;
64 : }
65 E : }
66 :
67 : // Computes the correct displacement, if any, for operand
68 : // number @p operand of @p instr.
69 : Displacement ComputeDisplacementForOperand(const Instruction& instr,
70 E : size_t operand) {
71 E : const _DInst& repr = instr.representation();
72 :
73 : DCHECK(repr.ops[operand].type == O_SMEM ||
74 E : repr.ops[operand].type == O_MEM);
75 :
76 E : size_t access_size_bytes = repr.ops[operand].size / 8;
77 E : if (repr.dispSize == 0)
78 E : return Displacement(access_size_bytes - 1);
79 :
80 E : BasicBlockReference reference;
81 E : if (instr.FindOperandReference(operand, &reference)) {
82 E : if (reference.referred_type() == BasicBlockReference::REFERRED_TYPE_BLOCK) {
83 : return Displacement(reference.block(),
84 E : reference.offset() + access_size_bytes - 1);
85 : } else {
86 E : return Displacement(reference.basic_block());
87 : }
88 : } else {
89 E : return Displacement(repr.disp + access_size_bytes - 1);
90 : }
91 E : }
92 :
93 : // Returns true if operand @p op is instrumentable, e.g.
94 : // if it implies a memory access.
95 E : bool IsInstrumentable(const _Operand& op) {
96 E : switch (op.type) {
97 : case O_SMEM:
98 : case O_MEM:
99 E : return true;
100 :
101 : default:
102 E : return false;
103 : }
104 E : }
105 :
106 : // Decodes the first O_MEM or O_SMEM operand of @p instr, if any to the
107 : // corresponding Operand.
108 E : MemoryAccessMode DecodeMemoryAccess(const Instruction& instr, Operand* access) {
109 E : DCHECK(access != NULL);
110 E : const _DInst& repr = instr.representation();
111 :
112 : // Figure out which operand we're instrumenting.
113 E : size_t mem_op_id = -1;
114 E : if (IsInstrumentable(repr.ops[0])) {
115 : // The first operand is instrumentable.
116 E : mem_op_id = 0;
117 E : } else if (IsInstrumentable(repr.ops[1])) {
118 : // The second operand is instrumentable.
119 E : mem_op_id = 1;
120 E : } else {
121 : // Neither of the first two operands is instrumentable.
122 E : return kNoAccess;
123 : }
124 :
125 E : if (repr.ops[mem_op_id].type == O_SMEM) {
126 : // Simple memory dereference with optional displacement.
127 E : Register base_reg(RegisterCode(repr.ops[mem_op_id].index - R_EAX));
128 : // Get the displacement for the operand.
129 E : Displacement displ = ComputeDisplacementForOperand(instr, mem_op_id);
130 :
131 E : *access = Operand(base_reg, displ);
132 E : } else if (repr.ops[0].type == O_MEM || repr.ops[1].type == O_MEM) {
133 : // Complex memory dereference.
134 E : Register index_reg(RegisterCode(repr.ops[mem_op_id].index - R_EAX));
135 E : core::ScaleFactor scale = core::kTimes1;
136 E : switch (repr.scale) {
137 : case 2:
138 E : scale = core::kTimes2;
139 E : break;
140 : case 4:
141 E : scale = core::kTimes4;
142 E : break;
143 : case 8:
144 E : scale = core::kTimes8;
145 : break;
146 : default:
147 : break;
148 : }
149 :
150 : // Get the displacement for the operand (if any).
151 E : Displacement displ = ComputeDisplacementForOperand(instr, mem_op_id);
152 :
153 : // Compute the full operand.
154 E : if (repr.base != R_NONE) {
155 E : Register base_reg(RegisterCode(repr.base - R_EAX));
156 E : if (displ.size() == core::kSizeNone) {
157 : // No displacement, it's a [base + index * scale] access.
158 i : *access = Operand(base_reg, index_reg, scale);
159 i : } else {
160 : // This is a [base + index * scale + displ] access.
161 E : *access = Operand(base_reg, index_reg, scale, displ);
162 : }
163 E : } else {
164 : // No base, this is an [index * scale + displ] access.
165 : // TODO(siggi): AFAIK, there's no encoding for [index * scale] without
166 : // a displacement. If this assert fires, I'm proven wrong.
167 E : DCHECK_NE(core::kSizeNone, displ.size());
168 :
169 E : *access = Operand(index_reg, scale, displ);
170 : }
171 E : } else {
172 i : NOTREACHED();
173 :
174 i : return kNoAccess;
175 : }
176 :
177 E : if ((repr.flags & FLAG_DST_WR) && mem_op_id == 0) {
178 : // The first operand is written to.
179 E : return kWriteAccess;
180 : } else {
181 E : return kReadAccess;
182 : }
183 E : }
184 :
185 : // Use @p bb_asm to inject a hook to @p hook to instrument the access to the
186 : // address stored in the operand @p op.
187 : void InjectAsanHook(BasicBlockAssembler* bb_asm,
188 : const Operand& op,
189 E : BlockGraph::Reference* hook) {
190 E : DCHECK(hook != NULL);
191 E : bb_asm->push(core::eax);
192 E : bb_asm->lea(core::eax, op);
193 E : bb_asm->call(Operand(Displacement(hook->referenced(), hook->offset())));
194 E : }
195 :
196 : typedef std::pair<BlockGraph::Block*, BlockGraph::Offset> ReferenceDest;
197 : typedef std::map<ReferenceDest, ReferenceDest> ReferenceMap;
198 : typedef std::set<BlockGraph::Block*> BlockSet;
199 :
200 : // For every block referencing @p dst_blocks, redirects any reference "ref" in
201 : // @p redirects to @p redirects[ref].
202 : void RedirectReferences(const BlockSet& dst_blocks,
203 E : const ReferenceMap& redirects) {
204 : // For each block referenced by any source reference.
205 E : BlockSet::const_iterator dst_block_it = dst_blocks.begin();
206 E : for (; dst_block_it != dst_blocks.end(); ++dst_block_it) {
207 : // Iterate over all their referrers.
208 E : BlockGraph::Block* referred_block = *dst_block_it;
209 E : BlockGraph::Block::ReferrerSet referrers = referred_block->referrers();
210 E : BlockGraph::Block::ReferrerSet::iterator referrer_it = referrers.begin();
211 E : for (; referrer_it != referrers.end(); ++referrer_it) {
212 E : BlockGraph::Block* referrer = referrer_it->first;
213 :
214 : // And redirect any references that happen to match a source reference.
215 : BlockGraph::Block::ReferenceMap::const_iterator reference_it =
216 E : referrer->references().begin();
217 :
218 E : for (; reference_it != referrer->references().end(); ++reference_it) {
219 E : const BlockGraph::Reference& ref(reference_it->second);
220 E : ReferenceDest dest(std::make_pair(ref.referenced(), ref.offset()));
221 :
222 E : ReferenceMap::const_iterator it(redirects.find(dest));
223 E : if (it != redirects.end()) {
224 : BlockGraph::Reference new_reference(ref.type(),
225 : ref.size(),
226 : it->second.first,
227 : it->second.second,
228 E : 0);
229 :
230 E : referrer->SetReference(reference_it->first, new_reference);
231 : }
232 E : }
233 E : }
234 E : }
235 E : }
236 :
237 : } // namespace
238 :
239 : const char AsanBasicBlockTransform::kTransformName[] =
240 : "SyzyAsanBasicBlockTransform";
241 :
242 E : bool AsanBasicBlockTransform::InstrumentBasicBlock(BasicBlock* basic_block) {
243 E : DCHECK(basic_block != NULL);
244 : BasicBlock::Instructions::iterator iter_inst =
245 E : basic_block->instructions().begin();
246 :
247 : // Process each instruction and inject a call to Asan when we find an
248 : // instrumentable memory access.
249 E : for (; iter_inst != basic_block->instructions().end(); ++iter_inst) {
250 E : Operand operand(core::eax);
251 E : const Instruction& instr = *iter_inst;
252 E : const _DInst& repr = instr.representation();
253 :
254 E : MemoryAccessMode access_mode = DecodeMemoryAccess(instr, &operand);
255 :
256 : // Bail if this is not a memory access.
257 E : if (access_mode == kNoAccess)
258 E : continue;
259 :
260 : // A basic block reference means that can be either a computed jump,
261 : // or a load from a case table. In either case it doesn't make sense
262 : // to instrument the access.
263 : if (operand.displacement().reference().referred_type() ==
264 E : BasicBlockReference::REFERRED_TYPE_BASIC_BLOCK) {
265 E : continue;
266 : }
267 :
268 : // A block reference means this instruction is reading or writing to
269 : // a global variable or some such. It's viable to pad and align global
270 : // variables and to red-zone the padding, but without that, there's nothing
271 : // to gain by instrumenting these accesses.
272 : if (operand.displacement().reference().referred_type() ==
273 E : BasicBlockReference::REFERRED_TYPE_BLOCK) {
274 E : continue;
275 : }
276 :
277 : // Is this an instruction we should be instrumenting.
278 E : if (!ShouldInstrumentOpcode(repr.opcode))
279 E : continue;
280 :
281 : // No point in instrumenting ESP-relative accesses.
282 E : if (operand.base() == core::kRegisterEsp)
283 E : continue;
284 :
285 : // We can't deal with repeated (string) instructions.
286 E : if (FLAG_GET_PREFIX(repr.flags) & (FLAG_REPNZ | FLAG_REP))
287 E : continue;
288 :
289 E : BasicBlockAssembler bb_asm(iter_inst, &basic_block->instructions());
290 E : Instruction::Representation inst = iter_inst->representation();
291 E : InjectAsanHook(&bb_asm, operand, hook_access_);
292 E : }
293 E : return true;
294 E : }
295 :
296 : bool AsanBasicBlockTransform::TransformBasicBlockSubGraph(
297 E : BlockGraph* block_graph, BasicBlockSubGraph* subgraph) {
298 E : DCHECK(block_graph != NULL);
299 E : DCHECK(subgraph != NULL);
300 :
301 : // Iterates through each basic block and instruments it.
302 : BasicBlockSubGraph::BBCollection::iterator it =
303 E : subgraph->basic_blocks().begin();
304 E : for (; it != subgraph->basic_blocks().end(); ++it) {
305 E : if (!InstrumentBasicBlock(&it->second))
306 i : return false;
307 E : }
308 E : return true;
309 E : }
310 :
311 : const char AsanTransform::kTransformName[] =
312 : "SyzyAsanTransform";
313 :
314 : const char AsanTransform::kCheckAccessName[] =
315 : "asan_check_access";
316 :
317 : const char AsanTransform::kSyzyAsanDll[] = "asan_rtl.dll";
318 :
319 E : AsanTransform::AsanTransform() : asan_dll_name_(kSyzyAsanDll) {
320 E : }
321 :
322 : bool AsanTransform::PreBlockGraphIteration(BlockGraph* block_graph,
323 E : BlockGraph::Block* header_block) {
324 : // Add an import entry for the ASAN runtime.
325 E : AddImportsTransform::ImportedModule import_module(asan_dll_name_.c_str());
326 :
327 : // Add the probe function import.
328 : size_t asan_hook_check_access_index =
329 E : import_module.AddSymbol(kCheckAccessName);
330 :
331 E : AddImportsTransform add_imports_transform;
332 E : add_imports_transform.AddModule(&import_module);
333 :
334 E : if (!add_imports_transform.TransformBlockGraph(block_graph, header_block)) {
335 i : LOG(ERROR) << "Unable to add imports for Asan instrumentation DLL.";
336 i : return false;
337 : }
338 :
339 : if (!import_module.GetSymbolReference(asan_hook_check_access_index ,
340 E : &hook_asan_check_access_)) {
341 i : LOG(ERROR) << "Unable to get import reference for Asan.";
342 i : return false;
343 : }
344 :
345 E : return true;
346 E : }
347 :
348 : bool AsanTransform::OnBlock(BlockGraph* block_graph,
349 E : BlockGraph::Block* block) {
350 E : DCHECK(block_graph != NULL);
351 E : DCHECK(block != NULL);
352 E : if (block->type() != BlockGraph::CODE_BLOCK)
353 E : return true;
354 :
355 E : if (!pe::CodeBlockIsBasicBlockDecomposable(block))
356 E : return true;
357 :
358 E : AsanBasicBlockTransform transform(&hook_asan_check_access_);
359 E : if (!ApplyBasicBlockSubGraphTransform(&transform, block_graph, block, NULL))
360 i : return false;
361 :
362 E : return true;
363 E : }
364 :
365 : bool AsanTransform::PostBlockGraphIteration(BlockGraph* block_graph,
366 E : BlockGraph::Block* header_block) {
367 : // This function redirects a the heap-related kernel32 imports to point to
368 : // a set of "override" imports in the ASAN runtime.
369 :
370 : // Import entries for the ASAN runtime and kernel32.
371 E : AddImportsTransform::ImportedModule module_kernel32("kernel32.dll");
372 E : AddImportsTransform::ImportedModule module_asan(asan_dll_name_.c_str());
373 :
374 : struct Kernel32ImportRedirect {
375 : const char* import_name;
376 : const char* redirect_name;
377 : };
378 : static const Kernel32ImportRedirect kKernel32Redirects[] = {
379 : { "HeapCreate", "asan_HeapCreate" },
380 : { "HeapDestroy", "asan_HeapDestroy" },
381 : { "HeapAlloc", "asan_HeapAlloc" },
382 : { "HeapReAlloc", "asan_HeapReAlloc" },
383 : { "HeapFree", "asan_HeapFree" },
384 : { "HeapSize", "asan_HeapSize" },
385 : { "HeapValidate", "asan_HeapValidate" },
386 : { "HeapCompact", "asan_HeapCompact" },
387 : { "HeapLock", "asan_HeapLock" },
388 : { "HeapUnlock", "asan_HeapUnlock" },
389 : { "HeapWalk", "asan_HeapWalk" },
390 : { "HeapSetInformation", "asan_HeapSetInformation" },
391 : { "HeapQueryInformation", "asan_HeapQueryInformation" },
392 : };
393 :
394 : // Add imports for the overrides to the respective modules.
395 : // HACK ALERT: This uses the AddImportsTransform to:
396 : // 1. Find existing imports we want to redirect. This has the unfortunate
397 : // side effect of adding all of the imports we query for.
398 : // 2. Create imports for the redirects, which will create imports for
399 : // all of the redirects, irrespective of whether we have anything to
400 : // redirect them to.
401 : // TODO(siggi): Clean this up by factoring import discovery/probing out of the
402 : // AddImports transform, and perhaps write yet another transform to remove
403 : // unused imports.
404 E : std::vector<std::pair<size_t, size_t>> override_indexes;
405 E : for (size_t i = 0; i < arraysize(kKernel32Redirects); ++i) {
406 : size_t kernel32_index =
407 E : module_kernel32.AddSymbol(kKernel32Redirects[i].import_name);
408 : size_t asan_index =
409 E : module_asan.AddSymbol(kKernel32Redirects[i].redirect_name);
410 :
411 E : override_indexes.push_back(std::make_pair(kernel32_index, asan_index));
412 E : }
413 :
414 E : AddImportsTransform add_imports_transform;
415 E : add_imports_transform.AddModule(&module_asan);
416 E : add_imports_transform.AddModule(&module_kernel32);
417 E : if (!add_imports_transform.TransformBlockGraph(block_graph, header_block)) {
418 i : LOG(ERROR) << "Unable to add imports for import redirection.";
419 i : return false;
420 : }
421 :
422 : // Keeps track of all the blocks referenced by the original references.
423 E : BlockSet dst_blocks;
424 : // Stores the reference mapping we want to rewrite.
425 E : ReferenceMap reference_redirect_map;
426 :
427 E : for (size_t i = 0; i < override_indexes.size(); ++i) {
428 E : BlockGraph::Reference src;
429 E : BlockGraph::Reference dst;
430 : if (!module_kernel32.GetSymbolReference(override_indexes[i].first, &src) ||
431 E : !module_asan.GetSymbolReference(override_indexes[i].second, &dst)) {
432 i : NOTREACHED() << "Unable to get references after a successful transform.";
433 i : return false;
434 : }
435 :
436 : // Add the destination block to the set of referred blocks.
437 E : dst_blocks.insert(src.referenced());
438 : reference_redirect_map.insert(
439 : std::make_pair(ReferenceDest(src.referenced(), src.offset()),
440 E : ReferenceDest(dst.referenced(), dst.offset())));
441 E : }
442 :
443 E : RedirectReferences(dst_blocks, reference_redirect_map);
444 :
445 E : return true;
446 E : }
447 :
448 : } // namespace transforms
449 : } // namespace instrument
|