Coverage for /Syzygy/instrument/transforms/entry_thunk_transform.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
84.0%1311560.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/instrument/transforms/entry_thunk_transform.h"
  16    :  
  17    :  #include "base/logging.h"
  18    :  #include "base/stringprintf.h"
  19    :  #include "syzygy/block_graph/typed_block.h"
  20    :  #include "syzygy/pe/pe_utils.h"
  21    :  #include "syzygy/pe/transforms/add_imports_transform.h"
  22    :  
  23    :  namespace instrument {
  24    :  namespace transforms {
  25    :  
  26    :  namespace {
  27    :  
  28    :  using block_graph::BlockGraph;
  29    :  using block_graph::TypedBlock;
  30    :  
  31    :  // We add this suffix to the destination
  32    :  const char kThunkSuffix[] = "_thunk";
  33    :  
  34    :  bool IsUnsafeReference(const BlockGraph::Block* referrer,
  35  E :                         const BlockGraph::Reference& ref) {
  36    :    // Skip references with a non-zero offset if we're
  37    :    // not instrumenting unsafe references.
  38  E :    if (ref.offset() != 0)
  39  E :      return true;
  40    :  
  41    :    BlockGraph::BlockAttributes kUnsafeAttribs =
  42    :        BlockGraph::HAS_INLINE_ASSEMBLY |
  43  E :        BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER;
  44    :  
  45  E :    bool unsafe_referrer = false;
  46    :    if (referrer->type() == BlockGraph::CODE_BLOCK &&
  47  E :        (referrer->attributes() & kUnsafeAttribs) != 0) {
  48  E :      unsafe_referrer = true;
  49    :    }
  50    :  
  51  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, ref.referenced()->type());
  52  E :    bool unsafe_block = (ref.referenced()->attributes() & kUnsafeAttribs) != 0;
  53    :  
  54    :    // If both the referrer and the referenced blocks are unsafe, we can't
  55    :    // safely assume that this reference represents a call semantics,
  56    :    // e.g. where a return address is at the top of stack at entry.
  57    :    // Ideally we'd decide this on the basis of a full stack analysis, but
  58    :    // beggers can't be choosers, plus for hand-coded assembly that's
  59    :    // the halting problem :).
  60    :    // For instrumentation that uses return address swizzling, instrumenting
  61    :    // an unsafe reference leads to crashes, so better to back off and get
  62    :    // slightly less coverage.
  63  E :    return unsafe_referrer && unsafe_block;
  64  E :  }
  65    :  
  66    :  }  // namespace
  67    :  
  68    :  using pe::transforms::AddImportsTransform;
  69    :  
  70    :  const char EntryThunkTransform::kTransformName[] =
  71    :      "EntryThunkTransform";
  72    :  
  73    :  const char EntryThunkTransform::kEntryHookName[] = "_indirect_penter";
  74    :  const char EntryThunkTransform::kDllMainEntryHookName[] =
  75    :      "_indirect_penter_dllmain";
  76    :  const char EntryThunkTransform::kExeEntryHookName[] =
  77    :      "_indirect_penter_exe_entry";
  78    :  const char EntryThunkTransform::kDefaultInstrumentDll[] =
  79    :      "call_trace_client.dll";
  80    :  
  81    :  // We push the absolute address of the function to be called on the
  82    :  // stack, and then we invoke the instrumentation function indirectly
  83    :  // through the import table.
  84    :  // 6844332211    push  offset (11223344)
  85    :  // FF2588776655  jmp   dword ptr [(55667788)]
  86    :  const EntryThunkTransform::Thunk EntryThunkTransform::kThunkTemplate = {
  87    :      0x68, NULL, // push immediate
  88    :      0x25FF, NULL  // jmp DWORD PTR[immediate]
  89    :    };
  90    :  
  91    :  EntryThunkTransform::EntryThunkTransform()
  92    :      : thunk_section_(NULL),
  93    :        instrument_unsafe_references_(true),
  94    :        src_ranges_for_thunks_(false),
  95    :        only_instrument_module_entry_(false),
  96  E :        instrument_dll_name_(kDefaultInstrumentDll) {
  97  E :  }
  98    :  
  99    :  bool EntryThunkTransform::PreBlockGraphIteration(
 100  E :      BlockGraph* block_graph, BlockGraph::Block* header_block) {
 101  E :    DCHECK(thunk_section_ == NULL);
 102    :  
 103  E :    if (!GetEntryPoints(header_block))
 104  i :      return false;
 105    :  
 106    :    AddImportsTransform::ImportedModule import_module(
 107  E :        instrument_dll_name_.c_str());
 108    :  
 109    :    // We import the minimal set of symbols necessary, depending on the types of
 110    :    // entry points we find in the module. We maintain a list of symbol indices/
 111    :    // reference pointers, which will be traversed after the import to populate
 112    :    // the references.
 113    :    typedef std::pair<size_t, BlockGraph::Reference*> ImportHook;
 114  E :    std::vector<ImportHook> import_hooks;
 115    :  
 116    :    // If there are any DllMain-like entry points (TLS initializers or DllMain
 117    :    // itself) then we need the DllMain entry hook.
 118  E :    if (dllmain_entrypoints_.size() > 0) {
 119    :      import_hooks.push_back(std::make_pair(
 120    :          import_module.AddSymbol(kDllMainEntryHookName),
 121  E :          &hook_dllmain_ref_));
 122    :    }
 123    :  
 124    :    // If this was an EXE then we need the EXE entry hook.
 125  E :    if (exe_entry_point_.first != NULL) {
 126    :      import_hooks.push_back(std::make_pair(
 127    :          import_module.AddSymbol(kExeEntryHookName),
 128  E :          &hook_exe_entry_ref_));
 129    :    }
 130    :  
 131    :    // If we're not only instrumenting module entry then we need the function
 132    :    // entry hook.
 133  E :    if (!only_instrument_module_entry_) {
 134    :      import_hooks.push_back(std::make_pair(
 135    :          import_module.AddSymbol(kEntryHookName),
 136  E :          &hook_ref_));
 137    :    }
 138    :  
 139    :    // Nothing to do if we don't need any import hooks.
 140  E :    if (import_hooks.empty())
 141  E :      return true;
 142    :  
 143    :    // Run the transform.
 144  E :    AddImportsTransform add_imports_transform;
 145  E :    add_imports_transform.AddModule(&import_module);
 146  E :    if (!add_imports_transform.TransformBlockGraph(block_graph, header_block)) {
 147  i :      LOG(ERROR) << "Unable to add imports for instrumentation DLL.";
 148  i :      return false;
 149    :    }
 150    :  
 151    :    // Get references to each of the imported symbols.
 152  E :    for (size_t i = 0; i < import_hooks.size(); ++i) {
 153    :      if (!import_module.GetSymbolReference(import_hooks[i].first,
 154  E :                                            import_hooks[i].second)) {
 155  i :        LOG(ERROR) << "Unable to get reference to import.";
 156  i :        return false;
 157    :      }
 158  E :    }
 159    :  
 160    :    // Find or create the section we put our thunks in.
 161    :    thunk_section_ = block_graph->FindOrAddSection(".thunks",
 162  E :                                                   pe::kCodeCharacteristics);
 163  E :    DCHECK(thunk_section_ != NULL);
 164    :  
 165  E :    return true;
 166  E :  }
 167    :  
 168    :  bool EntryThunkTransform::OnBlock(BlockGraph* block_graph,
 169  E :                                    BlockGraph::Block* block) {
 170  E :    DCHECK(block != NULL);
 171    :  
 172  E :    if (block->type() != BlockGraph::CODE_BLOCK)
 173  E :      return true;
 174    :  
 175  E :    return InstrumentCodeBlock(block_graph, block);
 176  E :  }
 177    :  
 178    :  bool EntryThunkTransform::InstrumentCodeBlock(
 179  E :      BlockGraph* block_graph, BlockGraph::Block* block) {
 180  E :    DCHECK(block_graph != NULL);
 181  E :    DCHECK(block != NULL);
 182    :  
 183    :    // Typedef for the thunk block map. The key is the offset within the callee
 184    :    // block and the value is the thunk block that forwards to the callee at that
 185    :    // offset.
 186  E :    ThunkBlockMap thunk_block_map;
 187    :  
 188    :    // Iterate through all the block's referrers, creating thunks as we go.
 189    :    // We copy the referrer set for simplicity, as it's potentially mutated
 190    :    // in the loop.
 191  E :    BlockGraph::Block::ReferrerSet referrers = block->referrers();
 192  E :    BlockGraph::Block::ReferrerSet::const_iterator referrer_it(referrers.begin());
 193  E :    for (; referrer_it != referrers.end(); ++referrer_it) {
 194  E :      const BlockGraph::Block::Referrer& referrer = *referrer_it;
 195    :      if (!InstrumentCodeBlockReferrer(
 196  E :          referrer, block_graph, block, &thunk_block_map)) {
 197  i :        return false;
 198    :      }
 199  E :    }
 200    :  
 201  E :    return true;
 202  E :  }
 203    :  
 204    :  bool EntryThunkTransform::InstrumentCodeBlockReferrer(
 205    :      const BlockGraph::Block::Referrer& referrer,
 206    :      BlockGraph* block_graph,
 207    :      BlockGraph::Block* block,
 208  E :      ThunkBlockMap* thunk_block_map) {
 209  E :    DCHECK(block_graph != NULL);
 210  E :    DCHECK(block != NULL);
 211  E :    DCHECK(thunk_block_map != NULL);
 212    :  
 213    :    // Get the reference.
 214  E :    BlockGraph::Reference ref;
 215  E :    if (!referrer.first->GetReference(referrer.second, &ref)) {
 216  i :      LOG(ERROR) << "Unable to get reference from referrer.";
 217  i :      return false;
 218    :    }
 219    :  
 220    :    // Skip self-references, except long references to the start of the block.
 221    :    // TODO(siggi): This needs refining, as it may currently miss important
 222    :    //     cases. Notably if a block contains more than one function, and the
 223    :    //     functions are mutually recursive, we'll only record the original
 224    :    //     entry to the block, but will miss the internal recursion.
 225    :    //     As-is, this does work for the common case where a block contains
 226    :    //     one self-recursive function, however.
 227  E :    if (referrer.first == block) {
 228    :      // Skip short references.
 229  E :      if (ref.size() < sizeof(core::AbsoluteAddress))
 230  i :        return true;
 231    :  
 232    :      // Skip interior references. The rationale for this is because these
 233    :      // references will tend to be switch tables, and we don't need the
 234    :      // overhead of instrumenting and recording all switch statement executions
 235    :      // for now.
 236  E :      if (ref.offset() != 0)
 237  i :        return true;
 238    :    }
 239    :  
 240    :    // See whether this is one of the DLL entrypoints.
 241  E :    pe::EntryPoint entry(ref.referenced(), ref.offset());
 242    :    pe::EntryPointSet::const_iterator entry_it(dllmain_entrypoints_.find(
 243  E :        entry));
 244  E :    bool is_dllmain_entry = entry_it != dllmain_entrypoints_.end();
 245    :  
 246    :    // Determine if this is an EXE entry point.
 247  E :    bool is_exe_entry = entry == exe_entry_point_;
 248    :  
 249    :    // It can't be both an EXE and a DLL entry.
 250  E :    DCHECK(!is_dllmain_entry || !is_exe_entry);
 251    :  
 252    :    // If we're only instrumenting entry points and this isn't one, then skip it.
 253  E :    if (only_instrument_module_entry_ && !is_dllmain_entry && !is_exe_entry)
 254  E :      return true;
 255    :  
 256    :    if (!instrument_unsafe_references_ &&
 257  E :        IsUnsafeReference(referrer.first, ref)) {
 258  E :      LOG(INFO) << "Skipping reference between unsafe block pair '"
 259    :                << referrer.first->name() << "' and '"
 260    :                << block->name() << "'";
 261  E :      return true;
 262    :    }
 263    :  
 264    :    // Determine which hook function to use.
 265  E :    BlockGraph::Reference* hook_ref = &hook_ref_;
 266  E :    if (is_dllmain_entry)
 267  E :      hook_ref = &hook_dllmain_ref_;
 268  E :    else if (is_exe_entry)
 269  E :      hook_ref = &hook_exe_entry_ref_;
 270  E :    DCHECK(hook_ref->referenced() != NULL);
 271    :  
 272    :    // Look for the reference in the thunk block map, and only create a new one
 273    :    // if it does not already exist.
 274  E :    BlockGraph::Block* thunk_block = NULL;
 275  E :    ThunkBlockMap::const_iterator thunk_it = thunk_block_map->find(ref.offset());
 276  E :    if (thunk_it == thunk_block_map->end()) {
 277  E :      thunk_block = CreateOneThunk(block_graph, ref, *hook_ref);
 278  E :      if (thunk_block == NULL) {
 279  i :        LOG(ERROR) << "Unable to create thunk block.";
 280  i :        return false;
 281    :      }
 282  E :      (*thunk_block_map)[ref.offset()] = thunk_block;
 283  E :    } else {
 284  E :      thunk_block = thunk_it->second;
 285    :    }
 286  E :    DCHECK(thunk_block != NULL);
 287    :  
 288    :    // Update the referrer to point to the thunk.
 289    :    BlockGraph::Reference new_ref(ref.type(),
 290    :                                  ref.size(),
 291    :                                  thunk_block,
 292  E :                                  0, 0);
 293  E :    referrer.first->SetReference(referrer.second, new_ref);
 294    :  
 295  E :    return true;
 296  E :  }
 297    :  
 298    :  BlockGraph::Block* EntryThunkTransform::CreateOneThunk(
 299    :      BlockGraph* block_graph,
 300    :      const BlockGraph::Reference& destination,
 301  E :      const BlockGraph::Reference& hook) {
 302  E :    std::string name;
 303  E :    if (destination.offset() == 0) {
 304    :      name = base::StringPrintf("%s%s",
 305    :                                destination.referenced()->name().c_str(),
 306  E :                                kThunkSuffix);
 307  E :    } else {
 308    :      name = base::StringPrintf("%s%s+%d",
 309    :                                destination.referenced()->name().c_str(),
 310    :                                kThunkSuffix,
 311  E :                                destination.offset());
 312    :    }
 313    :  
 314    :    // Create and initialize the new thunk.
 315    :    BlockGraph::Block* thunk = block_graph->AddBlock(BlockGraph::CODE_BLOCK,
 316    :                                                     sizeof(kThunkTemplate),
 317  E :                                                     name.c_str());
 318  E :    if (thunk == NULL)
 319  i :      return NULL;
 320    :  
 321  E :    thunk->set_section(thunk_section_->id());
 322    :    thunk->SetData(reinterpret_cast<const uint8*>(&kThunkTemplate),
 323  E :                   sizeof(kThunkTemplate));
 324    :  
 325  E :    if (src_ranges_for_thunks_) {
 326    :      // Give the thunk a source range synonymous with the destination.
 327    :      // That way the debugger will resolve calls and jumps to the thunk to the
 328    :      // destination function's name, which makes the assembly much easier to
 329    :      // read. The downside to this is that the symbols are now no longer unique,
 330    :      // and searching for a function by name may turn up either the function or
 331    :      // the thunk.
 332    :      const BlockGraph::Block::SourceRanges& source_ranges =
 333  E :          destination.referenced()->source_ranges();
 334    :      const BlockGraph::Block::SourceRanges::RangePair* source =
 335  E :          source_ranges.FindRangePair(destination.offset(), thunk->size());
 336  E :      if (source != NULL) {
 337    :        // Calculate the offset into the range.
 338  E :        size_t offs = destination.offset() - source->first.start();
 339  E :        BlockGraph::Block::DataRange data(0, thunk->size());
 340    :        BlockGraph::Block::SourceRange src(source->second.start() + offs,
 341  E :                                            thunk->size());
 342  E :        bool pushed = thunk->source_ranges().Push(data, src);
 343  E :        DCHECK(pushed);
 344    :      }
 345    :    }
 346    :  
 347  E :    if (!InitializeThunk(thunk, destination, hook)) {
 348  i :      bool removed = block_graph->RemoveBlock(thunk);
 349  i :      DCHECK(removed);
 350    :  
 351  i :      thunk = NULL;
 352    :    }
 353    :  
 354  E :    return thunk;
 355  E :  }
 356    :  
 357    :  bool EntryThunkTransform::InitializeThunk(
 358    :      BlockGraph::Block* thunk_block,
 359    :      const BlockGraph::Reference& destination,
 360  E :      const BlockGraph::Reference& import_entry) {
 361  E :    TypedBlock<Thunk> thunk;
 362  E :    if (!thunk.Init(0, thunk_block))
 363  i :      return false;
 364    :  
 365    :    if (!thunk.SetReference(BlockGraph::ABSOLUTE_REF,
 366    :                            thunk->func_addr,
 367    :                            destination.referenced(),
 368    :                            destination.offset(),
 369  E :                            destination.offset())) {
 370  i :      return false;
 371    :    }
 372    :  
 373    :    if (!thunk.SetReference(BlockGraph::ABSOLUTE_REF,
 374    :                            thunk->hook_addr,
 375    :                            import_entry.referenced(),
 376    :                            import_entry.offset(),
 377  E :                            import_entry.offset())) {
 378  i :      return false;
 379    :    }
 380    :  
 381  E :    return true;
 382  E :  }
 383    :  
 384  E :  bool EntryThunkTransform::GetEntryPoints(BlockGraph::Block* header_block) {
 385    :    // Get the TLS initializer entry-points. These have the same signature and
 386    :    // call patterns to DllMain.
 387  E :    if (!pe::GetTlsInitializers(header_block, &dllmain_entrypoints_)) {
 388  i :      LOG(ERROR) << "Failed to populate the TLS Initializer entry-points.";
 389  i :      return false;
 390    :    }
 391    :  
 392    :    // Get the DLL entry-point.
 393  E :    pe::EntryPoint dll_entry_point;
 394  E :    if (!pe::GetDllEntryPoint(header_block, &dll_entry_point)) {
 395  i :      LOG(ERROR) << "Failed to resolve the DLL entry-point.";
 396  i :      return false;
 397    :    }
 398    :  
 399    :    // If the image is an EXE or is a DLL that does not specify an entry-point
 400    :    // (the entry-point is optional for DLLs) then the dll_entry_point will have
 401    :    // a NULL block pointer. Otherwise, add it to the entry-point set.
 402  E :    if (dll_entry_point.first != NULL) {
 403  E :      dllmain_entrypoints_.insert(dll_entry_point);
 404  E :    } else {
 405    :      // Get the EXE entry point. We only need to bother looking if we didn't get
 406    :      // a DLL entry point, as we can't have both.
 407  E :      if (!pe::GetExeEntryPoint(header_block, &exe_entry_point_)) {
 408  i :        LOG(ERROR) << "Failed to resolve the EXE entry-point.";
 409  i :        return false;
 410    :      }
 411    :    }
 412    :  
 413  E :    return true;
 414  E :  }
 415    :  
 416    :  }  // namespace transforms
 417    :  }  // namespace instrument

Coverage information generated Thu Sep 06 11:30:46 2012.