Coverage for /Syzygy/instrument/transforms/asan_transform.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
92.3%1671810.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/instrument/transforms/asan_transform.h"
  16    :  
  17    :  #include <vector>
  18    :  
  19    :  #include "base/logging.h"
  20    :  #include "base/stringprintf.h"
  21    :  #include "base/memory/ref_counted.h"
  22    :  #include "syzygy/block_graph/basic_block_assembler.h"
  23    :  #include "syzygy/block_graph/block_builder.h"
  24    :  #include "syzygy/pe/block_util.h"
  25    :  #include "syzygy/pe/transforms/add_imports_transform.h"
  26    :  #include "third_party/distorm/files/include/mnemonics.h"
  27    :  #include "third_party/distorm/files/src/x86defs.h"
  28    :  
  29    :  namespace instrument {
  30    :  namespace transforms {
  31    :  namespace {
  32    :  
  33    :  using block_graph::BasicBlock;
  34    :  using block_graph::BasicBlockAssembler;
  35    :  using block_graph::BasicBlockSubGraph;
  36    :  using block_graph::BasicBlockReference;
  37    :  using block_graph::BlockBuilder;
  38    :  using block_graph::BlockGraph;
  39    :  using block_graph::Displacement;
  40    :  using block_graph::Immediate;
  41    :  using block_graph::Instruction;
  42    :  using block_graph::Operand;
  43    :  using block_graph::Value;
  44    :  using core::Register;
  45    :  using core::RegisterCode;
  46    :  using pe::transforms::AddImportsTransform;
  47    :  
  48    :  // Represent the different kind of access to the memory.
  49    :  enum MemoryAccessMode {
  50    :    kNoAccess,
  51    :    kReadAccess,
  52    :    kWriteAccess,
  53    :  };
  54    :  
  55    :  // Returns true iff opcode should be instrumented.
  56  E :  bool ShouldInstrumentOpcode(uint16 opcode) {
  57  E :    switch (opcode) {
  58    :      case I_LEA:
  59    :      case I_CALL:
  60    :      case I_JMP:
  61  E :        return false;
  62    :      default:
  63  E :        return true;
  64    :    }
  65  E :  }
  66    :  
  67    :  // Computes the correct displacement, if any, for operand
  68    :  // number @p operand of @p instr.
  69    :  Displacement ComputeDisplacementForOperand(const Instruction& instr,
  70  E :                                             size_t operand) {
  71  E :    const _DInst& repr = instr.representation();
  72    :  
  73    :    DCHECK(repr.ops[operand].type == O_SMEM ||
  74  E :           repr.ops[operand].type == O_MEM);
  75    :  
  76  E :    size_t access_size_bytes = repr.ops[operand].size / 8;
  77  E :    if (repr.dispSize == 0)
  78  E :      return Displacement(access_size_bytes - 1);
  79    :  
  80  E :    BasicBlockReference reference;
  81  E :    if (instr.FindOperandReference(operand, &reference)) {
  82  E :      if (reference.referred_type() == BasicBlockReference::REFERRED_TYPE_BLOCK) {
  83    :        return Displacement(reference.block(),
  84  E :                            reference.offset() + access_size_bytes - 1);
  85    :      } else {
  86  E :        return Displacement(reference.basic_block());
  87    :      }
  88    :    } else {
  89  E :      return Displacement(repr.disp + access_size_bytes - 1);
  90    :    }
  91  E :  }
  92    :  
  93    :  // Returns true if operand @p op is instrumentable, e.g.
  94    :  // if it implies a memory access.
  95  E :  bool IsInstrumentable(const _Operand& op) {
  96  E :    switch (op.type) {
  97    :      case O_SMEM:
  98    :      case O_MEM:
  99  E :        return true;
 100    :  
 101    :      default:
 102  E :        return false;
 103    :    }
 104  E :  }
 105    :  
 106    :  // Decodes the first O_MEM or O_SMEM operand of @p instr, if any to the
 107    :  // corresponding Operand.
 108  E :  MemoryAccessMode DecodeMemoryAccess(const Instruction& instr, Operand* access) {
 109  E :    DCHECK(access != NULL);
 110  E :    const _DInst& repr = instr.representation();
 111    :  
 112    :    // Figure out which operand we're instrumenting.
 113  E :    size_t mem_op_id = -1;
 114  E :    if (IsInstrumentable(repr.ops[0])) {
 115    :      // The first operand is instrumentable.
 116  E :      mem_op_id = 0;
 117  E :    } else if (IsInstrumentable(repr.ops[1])) {
 118    :      // The second operand is instrumentable.
 119  E :      mem_op_id = 1;
 120  E :    } else {
 121    :      // Neither of the first two operands is instrumentable.
 122  E :      return kNoAccess;
 123    :    }
 124    :  
 125  E :    if (repr.ops[mem_op_id].type == O_SMEM) {
 126    :      // Simple memory dereference with optional displacement.
 127  E :      Register base_reg(RegisterCode(repr.ops[mem_op_id].index - R_EAX));
 128    :      // Get the displacement for the operand.
 129  E :      Displacement displ = ComputeDisplacementForOperand(instr, mem_op_id);
 130    :  
 131  E :      *access = Operand(base_reg, displ);
 132  E :    } else if (repr.ops[0].type == O_MEM || repr.ops[1].type == O_MEM) {
 133    :      // Complex memory dereference.
 134  E :      Register index_reg(RegisterCode(repr.ops[mem_op_id].index - R_EAX));
 135  E :      core::ScaleFactor scale = core::kTimes1;
 136  E :      switch (repr.scale) {
 137    :        case 2:
 138  E :          scale = core::kTimes2;
 139  E :          break;
 140    :        case 4:
 141  E :          scale = core::kTimes4;
 142  E :          break;
 143    :        case 8:
 144  E :          scale = core::kTimes8;
 145    :          break;
 146    :        default:
 147    :          break;
 148    :      }
 149    :  
 150    :      // Get the displacement for the operand (if any).
 151  E :      Displacement displ = ComputeDisplacementForOperand(instr, mem_op_id);
 152    :  
 153    :      // Compute the full operand.
 154  E :      if (repr.base != R_NONE) {
 155  E :        Register base_reg(RegisterCode(repr.base - R_EAX));
 156  E :        if (displ.size() == core::kSizeNone) {
 157    :          // No displacement, it's a [base + index * scale] access.
 158  i :          *access = Operand(base_reg, index_reg, scale);
 159  i :        } else {
 160    :          // This is a [base + index * scale + displ] access.
 161  E :          *access = Operand(base_reg, index_reg, scale, displ);
 162    :        }
 163  E :      } else {
 164    :        // No base, this is an [index * scale + displ] access.
 165    :        // TODO(siggi): AFAIK, there's no encoding for [index * scale] without
 166    :        //    a displacement. If this assert fires, I'm proven wrong.
 167  E :        DCHECK_NE(core::kSizeNone, displ.size());
 168    :  
 169  E :        *access = Operand(index_reg, scale, displ);
 170    :      }
 171  E :    } else {
 172  i :      NOTREACHED();
 173    :  
 174  i :      return kNoAccess;
 175    :    }
 176    :  
 177  E :    if ((repr.flags & FLAG_DST_WR) && mem_op_id == 0) {
 178    :      // The first operand is written to.
 179  E :      return kWriteAccess;
 180    :    } else {
 181  E :      return kReadAccess;
 182    :    }
 183  E :  }
 184    :  
 185    :  // Use @p bb_asm to inject a hook to @p hook to instrument the access to the
 186    :  // address stored in the operand @p op.
 187    :  void InjectAsanHook(BasicBlockAssembler* bb_asm,
 188    :                      const Operand& op,
 189  E :                      BlockGraph::Reference* hook) {
 190  E :    DCHECK(hook != NULL);
 191  E :    bb_asm->push(core::eax);
 192  E :    bb_asm->lea(core::eax, op);
 193  E :    bb_asm->call(Operand(Displacement(hook->referenced(), hook->offset())));
 194  E :  }
 195    :  
 196    :  typedef std::pair<BlockGraph::Block*, BlockGraph::Offset> ReferenceDest;
 197    :  typedef std::map<ReferenceDest, ReferenceDest> ReferenceMap;
 198    :  typedef std::set<BlockGraph::Block*> BlockSet;
 199    :  
 200    :  // For every block referencing @p dst_blocks, redirects any reference "ref" in
 201    :  // @p redirects to @p redirects[ref].
 202    :  void RedirectReferences(const BlockSet& dst_blocks,
 203  E :                          const ReferenceMap& redirects) {
 204    :    // For each block referenced by any source reference.
 205  E :    BlockSet::const_iterator dst_block_it = dst_blocks.begin();
 206  E :    for (; dst_block_it != dst_blocks.end(); ++dst_block_it) {
 207    :      // Iterate over all their referrers.
 208  E :      BlockGraph::Block* referred_block = *dst_block_it;
 209  E :      BlockGraph::Block::ReferrerSet referrers = referred_block->referrers();
 210  E :      BlockGraph::Block::ReferrerSet::iterator referrer_it = referrers.begin();
 211  E :      for (; referrer_it != referrers.end(); ++referrer_it) {
 212  E :        BlockGraph::Block* referrer = referrer_it->first;
 213    :  
 214    :        // And redirect any references that happen to match a source reference.
 215    :        BlockGraph::Block::ReferenceMap::const_iterator reference_it =
 216  E :            referrer->references().begin();
 217    :  
 218  E :        for (; reference_it != referrer->references().end(); ++reference_it) {
 219  E :          const BlockGraph::Reference& ref(reference_it->second);
 220  E :          ReferenceDest dest(std::make_pair(ref.referenced(), ref.offset()));
 221    :  
 222  E :          ReferenceMap::const_iterator it(redirects.find(dest));
 223  E :          if (it != redirects.end()) {
 224    :            BlockGraph::Reference new_reference(ref.type(),
 225    :                                                ref.size(),
 226    :                                                it->second.first,
 227    :                                                it->second.second,
 228  E :                                                0);
 229    :  
 230  E :            referrer->SetReference(reference_it->first, new_reference);
 231    :          }
 232  E :        }
 233  E :      }
 234  E :    }
 235  E :  }
 236    :  
 237    :  }  // namespace
 238    :  
 239    :  const char AsanBasicBlockTransform::kTransformName[] =
 240    :      "SyzyAsanBasicBlockTransform";
 241    :  
 242  E :  bool AsanBasicBlockTransform::InstrumentBasicBlock(BasicBlock* basic_block) {
 243  E :    DCHECK(basic_block != NULL);
 244    :    BasicBlock::Instructions::iterator iter_inst =
 245  E :        basic_block->instructions().begin();
 246    :  
 247    :    // Process each instruction and inject a call to Asan when we find an
 248    :    // instrumentable memory access.
 249  E :    for (; iter_inst != basic_block->instructions().end(); ++iter_inst) {
 250  E :      Operand operand(core::eax);
 251  E :      const Instruction& instr = *iter_inst;
 252  E :      const _DInst& repr = instr.representation();
 253    :  
 254  E :      MemoryAccessMode access_mode = DecodeMemoryAccess(instr, &operand);
 255    :  
 256    :      // Bail if this is not a memory access.
 257  E :      if (access_mode == kNoAccess)
 258  E :        continue;
 259    :  
 260    :      // A basic block reference means that can be either a computed jump,
 261    :      // or a load from a case table. In either case it doesn't make sense
 262    :      // to instrument the access.
 263    :      if (operand.displacement().reference().referred_type() ==
 264  E :          BasicBlockReference::REFERRED_TYPE_BASIC_BLOCK) {
 265  E :        continue;
 266    :      }
 267    :  
 268    :      // A block reference means this instruction is reading or writing to
 269    :      // a global variable or some such. It's viable to pad and align global
 270    :      // variables and to red-zone the padding, but without that, there's nothing
 271    :      // to gain by instrumenting these accesses.
 272    :      if (operand.displacement().reference().referred_type() ==
 273  E :          BasicBlockReference::REFERRED_TYPE_BLOCK) {
 274  E :        continue;
 275    :      }
 276    :  
 277    :      // Is this an instruction we should be instrumenting.
 278  E :      if (!ShouldInstrumentOpcode(repr.opcode))
 279  E :        continue;
 280    :  
 281    :      // No point in instrumenting ESP-relative accesses.
 282  E :      if (operand.base() == core::kRegisterEsp)
 283  E :        continue;
 284    :  
 285    :      // We can't deal with repeated (string) instructions.
 286  E :      if (FLAG_GET_PREFIX(repr.flags) & (FLAG_REPNZ | FLAG_REP))
 287  E :        continue;
 288    :  
 289  E :      BasicBlockAssembler bb_asm(iter_inst, &basic_block->instructions());
 290  E :      Instruction::Representation inst = iter_inst->representation();
 291  E :      InjectAsanHook(&bb_asm, operand, hook_access_);
 292  E :    }
 293  E :    return true;
 294  E :  }
 295    :  
 296    :  bool AsanBasicBlockTransform::TransformBasicBlockSubGraph(
 297  E :      BlockGraph* block_graph, BasicBlockSubGraph* subgraph) {
 298  E :    DCHECK(block_graph != NULL);
 299  E :    DCHECK(subgraph != NULL);
 300    :  
 301    :    // Iterates through each basic block and instruments it.
 302    :    BasicBlockSubGraph::BBCollection::iterator it =
 303  E :        subgraph->basic_blocks().begin();
 304  E :    for (; it != subgraph->basic_blocks().end(); ++it) {
 305  E :      if (!InstrumentBasicBlock(&it->second))
 306  i :        return false;
 307  E :    }
 308  E :    return true;
 309  E :  }
 310    :  
 311    :  const char AsanTransform::kTransformName[] =
 312    :      "SyzyAsanTransform";
 313    :  
 314    :  const char AsanTransform::kCheckAccessName[] =
 315    :      "asan_check_access";
 316    :  
 317    :  const char AsanTransform::kSyzyAsanDll[] = "asan_rtl.dll";
 318    :  
 319  E :  AsanTransform::AsanTransform() : asan_dll_name_(kSyzyAsanDll) {
 320  E :  }
 321    :  
 322    :  bool AsanTransform::PreBlockGraphIteration(BlockGraph* block_graph,
 323  E :                                             BlockGraph::Block* header_block) {
 324    :    // Add an import entry for the ASAN runtime.
 325  E :    AddImportsTransform::ImportedModule import_module(asan_dll_name_.c_str());
 326    :  
 327    :    // Add the probe function import.
 328    :    size_t asan_hook_check_access_index =
 329  E :        import_module.AddSymbol(kCheckAccessName);
 330    :  
 331  E :    AddImportsTransform add_imports_transform;
 332  E :    add_imports_transform.AddModule(&import_module);
 333    :  
 334  E :    if (!add_imports_transform.TransformBlockGraph(block_graph, header_block)) {
 335  i :      LOG(ERROR) << "Unable to add imports for Asan instrumentation DLL.";
 336  i :      return false;
 337    :    }
 338    :  
 339    :    if (!import_module.GetSymbolReference(asan_hook_check_access_index ,
 340  E :                                          &hook_asan_check_access_)) {
 341  i :      LOG(ERROR) << "Unable to get import reference for Asan.";
 342  i :      return false;
 343    :    }
 344    :  
 345  E :    return true;
 346  E :  }
 347    :  
 348    :  bool AsanTransform::OnBlock(BlockGraph* block_graph,
 349  E :                              BlockGraph::Block* block) {
 350  E :    DCHECK(block_graph != NULL);
 351  E :    DCHECK(block != NULL);
 352  E :    if (block->type() != BlockGraph::CODE_BLOCK)
 353  E :      return true;
 354    :  
 355  E :    if (!pe::CodeBlockIsBasicBlockDecomposable(block))
 356  E :      return true;
 357    :  
 358  E :    AsanBasicBlockTransform transform(&hook_asan_check_access_);
 359  E :    if (!ApplyBasicBlockSubGraphTransform(&transform, block_graph, block, NULL))
 360  i :      return false;
 361    :  
 362  E :    return true;
 363  E :  }
 364    :  
 365    :  bool AsanTransform::PostBlockGraphIteration(BlockGraph* block_graph,
 366  E :                                              BlockGraph::Block* header_block) {
 367    :    // This function redirects a the heap-related kernel32 imports to point to
 368    :    // a set of "override" imports in the ASAN runtime.
 369    :  
 370    :    // Import entries for the ASAN runtime and kernel32.
 371  E :    AddImportsTransform::ImportedModule module_kernel32("kernel32.dll");
 372  E :    AddImportsTransform::ImportedModule module_asan(asan_dll_name_.c_str());
 373    :  
 374    :    struct Kernel32ImportRedirect {
 375    :      const char* import_name;
 376    :      const char* redirect_name;
 377    :    };
 378    :    static const Kernel32ImportRedirect kKernel32Redirects[] = {
 379    :      { "HeapCreate", "asan_HeapCreate" },
 380    :      { "HeapDestroy", "asan_HeapDestroy" },
 381    :      { "HeapAlloc", "asan_HeapAlloc" },
 382    :      { "HeapReAlloc", "asan_HeapReAlloc" },
 383    :      { "HeapFree", "asan_HeapFree" },
 384    :      { "HeapSize", "asan_HeapSize" },
 385    :      { "HeapValidate", "asan_HeapValidate" },
 386    :      { "HeapCompact", "asan_HeapCompact" },
 387    :      { "HeapLock", "asan_HeapLock" },
 388    :      { "HeapUnlock", "asan_HeapUnlock" },
 389    :      { "HeapWalk", "asan_HeapWalk" },
 390    :      { "HeapSetInformation", "asan_HeapSetInformation" },
 391    :      { "HeapQueryInformation", "asan_HeapQueryInformation" },
 392    :    };
 393    :  
 394    :    // Add imports for the overrides to the respective modules.
 395    :    // HACK ALERT: This uses the AddImportsTransform to:
 396    :    // 1. Find existing imports we want to redirect. This has the unfortunate
 397    :    //    side effect of adding all of the imports we query for.
 398    :    // 2. Create imports for the redirects, which will create imports for
 399    :    //    all of the redirects, irrespective of whether we have anything to
 400    :    //    redirect them to.
 401    :    // TODO(siggi): Clean this up by factoring import discovery/probing out of the
 402    :    //     AddImports transform, and perhaps write yet another transform to remove
 403    :    //     unused imports.
 404  E :    std::vector<std::pair<size_t, size_t>> override_indexes;
 405  E :    for (size_t i = 0; i < arraysize(kKernel32Redirects); ++i) {
 406    :      size_t kernel32_index =
 407  E :          module_kernel32.AddSymbol(kKernel32Redirects[i].import_name);
 408    :      size_t asan_index =
 409  E :          module_asan.AddSymbol(kKernel32Redirects[i].redirect_name);
 410    :  
 411  E :      override_indexes.push_back(std::make_pair(kernel32_index, asan_index));
 412  E :    }
 413    :  
 414  E :    AddImportsTransform add_imports_transform;
 415  E :    add_imports_transform.AddModule(&module_asan);
 416  E :    add_imports_transform.AddModule(&module_kernel32);
 417  E :    if (!add_imports_transform.TransformBlockGraph(block_graph, header_block)) {
 418  i :      LOG(ERROR) << "Unable to add imports for import redirection.";
 419  i :      return false;
 420    :    }
 421    :  
 422    :    // Keeps track of all the blocks referenced by the original references.
 423  E :    BlockSet dst_blocks;
 424    :    // Stores the reference mapping we want to rewrite.
 425  E :    ReferenceMap reference_redirect_map;
 426    :  
 427  E :    for (size_t i = 0; i < override_indexes.size(); ++i) {
 428  E :      BlockGraph::Reference src;
 429  E :      BlockGraph::Reference dst;
 430    :      if (!module_kernel32.GetSymbolReference(override_indexes[i].first, &src) ||
 431  E :          !module_asan.GetSymbolReference(override_indexes[i].second, &dst)) {
 432  i :         NOTREACHED() << "Unable to get references after a successful transform.";
 433  i :        return false;
 434    :      }
 435    :  
 436    :      // Add the destination block to the set of referred blocks.
 437  E :      dst_blocks.insert(src.referenced());
 438    :      reference_redirect_map.insert(
 439    :          std::make_pair(ReferenceDest(src.referenced(), src.offset()),
 440  E :                         ReferenceDest(dst.referenced(), dst.offset())));
 441  E :    }
 442    :  
 443  E :    RedirectReferences(dst_blocks, reference_redirect_map);
 444    :  
 445  E :    return true;
 446  E :  }
 447    :  
 448    :  }  // namespace transforms
 449    :  }  // namespace instrument

Coverage information generated Thu Sep 06 11:30:46 2012.