Coverage for /Syzygy/pe/decomposer.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
78.3%114714640.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/pe/decomposer.h"
  16    :  
  17    :  #include <cvconst.h>
  18    :  #include <algorithm>
  19    :  
  20    :  #include "base/bind.h"
  21    :  #include "base/file_path.h"
  22    :  #include "base/logging.h"
  23    :  #include "base/path_service.h"
  24    :  #include "base/string_util.h"
  25    :  #include "base/stringprintf.h"
  26    :  #include "base/utf_string_conversions.h"
  27    :  #include "base/memory/scoped_ptr.h"
  28    :  #include "base/win/scoped_bstr.h"
  29    :  #include "base/win/scoped_comptr.h"
  30    :  #include "sawbuck/common/com_utils.h"
  31    :  #include "sawbuck/sym_util/types.h"
  32    :  #include "syzygy/block_graph/block_util.h"
  33    :  #include "syzygy/block_graph/typed_block.h"
  34    :  #include "syzygy/core/disassembler_util.h"
  35    :  #include "syzygy/core/zstream.h"
  36    :  #include "syzygy/pdb/omap.h"
  37    :  #include "syzygy/pdb/pdb_byte_stream.h"
  38    :  #include "syzygy/pdb/pdb_util.h"
  39    :  #include "syzygy/pe/dia_util.h"
  40    :  #include "syzygy/pe/find.h"
  41    :  #include "syzygy/pe/metadata.h"
  42    :  #include "syzygy/pe/pdb_info.h"
  43    :  #include "syzygy/pe/pe_file_parser.h"
  44    :  #include "syzygy/pe/pe_utils.h"
  45    :  #include "syzygy/pe/serialization.h"
  46    :  
  47    :  namespace pe {
  48    :  namespace {
  49    :  
  50    :  using base::win::ScopedBstr;
  51    :  using base::win::ScopedComPtr;
  52    :  using block_graph::BlockGraph;
  53    :  using block_graph::ConstTypedBlock;
  54    :  using builder::Opt;
  55    :  using builder::Seq;
  56    :  using builder::Star;
  57    :  using core::AbsoluteAddress;
  58    :  using core::Disassembler;
  59    :  using core::RelativeAddress;
  60    :  
  61    :  typedef Disassembler::CallbackDirective CallbackDirective;
  62    :  
  63    :  const size_t kPointerSize = sizeof(AbsoluteAddress);
  64    :  
  65    :  // Converts from PdbFixup::Type to BlockGraph::ReferenceType.
  66    :  BlockGraph::ReferenceType PdbFixupTypeToReferenceType(
  67  E :      pdb::PdbFixup::Type type) {
  68  E :    switch (type) {
  69    :      case pdb::PdbFixup::TYPE_ABSOLUTE:
  70  E :        return BlockGraph::ABSOLUTE_REF;
  71    :  
  72    :      case pdb::PdbFixup::TYPE_RELATIVE:
  73  E :        return BlockGraph::RELATIVE_REF;
  74    :  
  75    :      case pdb::PdbFixup::TYPE_PC_RELATIVE:
  76  E :        return BlockGraph::PC_RELATIVE_REF;
  77    :  
  78    :      default:
  79  i :        NOTREACHED() << "Invalid PdbFixup::Type.";
  80    :        // The return type here is meaningless.
  81  i :        return BlockGraph::ABSOLUTE_REF;
  82    :    }
  83  E :  }
  84    :  
  85    :  // Adds a reference to the provided intermediate reference map. If one already
  86    :  // exists, will validate that they are consistent.
  87    :  bool AddReference(RelativeAddress src_addr,
  88    :                    BlockGraph::ReferenceType type,
  89    :                    BlockGraph::Size size,
  90    :                    RelativeAddress dst_base,
  91    :                    BlockGraph::Offset dst_offset,
  92  E :                    Decomposer::IntermediateReferenceMap* references) {
  93  E :    DCHECK(references != NULL);
  94    :  
  95    :    // If we get an iterator to a reference and it has the same source address
  96    :    // then ensure that we are consistent with it.
  97    :    Decomposer::IntermediateReferenceMap::iterator it =
  98  E :        references->lower_bound(src_addr);
  99  E :    if (it != references->end() && it->first == src_addr) {
 100    :      if (type != it->second.type || size != it->second.size ||
 101  E :          dst_base != it->second.base || dst_offset != it->second.offset) {
 102  i :        LOG(ERROR) << "Trying to insert inconsistent and colliding intermediate "
 103    :                      "references.";
 104  i :        return false;
 105    :      }
 106    :    }
 107    :  
 108  E :    Decomposer::IntermediateReference ref = { type,
 109  E :                                              size,
 110  E :                                              dst_base,
 111  E :                                              dst_offset };
 112    :  
 113    :    // Since we used lower_bound above, we can use it as a hint for the
 114    :    // insertion. This saves us from incurring the lookup cost twice.
 115  E :    references->insert(it, std::make_pair(src_addr, ref));
 116  E :    return true;
 117  E :  }
 118    :  
 119    :  // Validates the given reference against the given fixup map entry. If they
 120    :  // are consistent, marks the fixup as having been visited.
 121    :  bool ValidateReference(RelativeAddress src_addr,
 122    :                         BlockGraph::ReferenceType type,
 123    :                         BlockGraph::Size size,
 124  E :                         Decomposer::FixupMap::iterator fixup_it) {
 125  E :    if (type != fixup_it->second.type || size != kPointerSize) {
 126  i :      LOG(ERROR) << "Reference at " << src_addr
 127    :                 << " not consistent with corresponding fixup.";
 128  i :      return false;
 129    :    }
 130    :  
 131    :    // Mark this fixup as having been visited.
 132  E :    fixup_it->second.visited = true;
 133    :  
 134  E :    return true;
 135  E :  }
 136    :  
 137    :  enum ValidateOrAddReferenceMode {
 138    :    // Look for an existing fixup. If we find one, validate against it,
 139    :    // otherwise create a new intermediate reference.
 140    :    FIXUP_MAY_EXIST,
 141    :    // Compare against an existing fixup, bailing if there is none. Does not
 142    :    // create a new intermediate reference.
 143    :    FIXUP_MUST_EXIST,
 144    :    // Look for an existing fixup, and fail if one exists. Otherwise, create
 145    :    // a new intermediate reference.
 146    :    FIXUP_MUST_NOT_EXIST
 147    :  };
 148    :  bool ValidateOrAddReference(ValidateOrAddReferenceMode mode,
 149    :                              RelativeAddress src_addr,
 150    :                              BlockGraph::ReferenceType type,
 151    :                              BlockGraph::Size size,
 152    :                              RelativeAddress dst_base,
 153    :                              BlockGraph::Offset dst_offset,
 154    :                              Decomposer::FixupMap* fixup_map,
 155  E :                              Decomposer::IntermediateReferenceMap* references) {
 156  E :    DCHECK(fixup_map != NULL);
 157  E :    DCHECK(references != NULL);
 158    :  
 159  E :    Decomposer::FixupMap::iterator it = fixup_map->find(src_addr);
 160    :  
 161  E :    switch (mode) {
 162    :      case FIXUP_MAY_EXIST: {
 163    :        if (it != fixup_map->end() &&
 164  E :            !ValidateReference(src_addr, type, size, it))
 165  i :          return false;
 166    :        return AddReference(src_addr, type, size, dst_base, dst_offset,
 167  E :                            references);
 168    :      }
 169    :  
 170    :      case FIXUP_MUST_EXIST: {
 171  E :        if (it == fixup_map->end()) {
 172  i :          LOG(ERROR) << "Reference at " << src_addr << " has no matching fixup.";
 173  i :          return false;
 174    :        }
 175  E :        if (!ValidateReference(src_addr, type, size, it))
 176  i :          return false;
 177    :        // Do not create a new intermediate reference.
 178  E :        return true;
 179    :      }
 180    :  
 181    :      case FIXUP_MUST_NOT_EXIST: {
 182  E :        if (it != fixup_map->end()) {
 183  i :          LOG(ERROR) << "Reference at " << src_addr
 184    :                     << " collides with an existing fixup.";
 185  i :          return false;
 186    :        }
 187    :        return AddReference(src_addr, type, size, dst_base, dst_offset,
 188  E :                            references);
 189    :      }
 190    :  
 191    :      default: {
 192  i :        NOTREACHED() << "Invalid ValidateOrAddReferenceMode.";
 193  i :        return false;
 194    :      }
 195    :    }
 196  E :  }
 197    :  
 198  E :  bool GetSymTag(IDiaSymbol* symbol, DWORD* sym_tag) {
 199  E :    DCHECK(sym_tag != NULL);
 200  E :    *sym_tag = SymTagNull;
 201  E :    HRESULT hr = symbol->get_symTag(sym_tag);
 202  E :    if (hr != S_OK) {
 203  i :      LOG(ERROR) << "Error getting sym tag: " << com::LogHr(hr) << ".";
 204  i :      return false;
 205    :    }
 206  E :    return true;
 207  E :  }
 208    :  
 209  E :  bool GetTypeInfo(IDiaSymbol* symbol, size_t* length) {
 210  E :    DCHECK(symbol != NULL);
 211  E :    DCHECK(length != NULL);
 212    :  
 213  E :    *length = 0;
 214  E :    ScopedComPtr<IDiaSymbol> type;
 215  E :    HRESULT hr = symbol->get_type(type.Receive());
 216    :    // This happens if the symbol has no type information.
 217  E :    if (hr == S_FALSE)
 218  E :      return true;
 219  E :    if (hr != S_OK) {
 220  i :      LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
 221  i :      return false;
 222    :    }
 223    :  
 224  E :    ULONGLONG ull_length = 0;
 225  E :    hr = type->get_length(&ull_length);
 226  E :    if (hr != S_OK) {
 227  i :      LOG(ERROR) << "Failed to retrieve type length properties: "
 228    :                 << com::LogHr(hr) << ".";
 229  i :      return false;
 230    :    }
 231  E :    *length = ull_length;
 232    :  
 233  E :    return true;
 234  E :  }
 235    :  
 236    :  enum SectionType {
 237    :    kSectionCode,
 238    :    kSectionData,
 239    :    kSectionUnknown
 240    :  };
 241    :  
 242  E :  SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
 243  E :    DCHECK(header != NULL);
 244  E :    if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
 245  E :      return kSectionCode;
 246  E :    if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
 247  E :      return kSectionData;
 248  i :    return kSectionUnknown;
 249  E :  }
 250    :  
 251  E :  bool IsSymTag(IDiaSymbol* symbol, DWORD expected_sym_tag) {
 252  E :    DWORD sym_tag = SymTagNull;
 253  E :    if (!GetSymTag(symbol, &sym_tag))
 254  i :      return false;
 255    :  
 256  E :    return sym_tag == expected_sym_tag;
 257  E :  }
 258    :  
 259  E :  size_t GuessAddressAlignment(RelativeAddress address) {
 260    :    // Count the trailing zeros in the original address. We only care
 261    :    // about alignment up to 16, so only have to check the first 4 bits.
 262    :    // TODO(chrisha): This can be done quite efficiently using various bit
 263    :    //     twiddling tricks, and there may very well be a library implementation
 264    :    //     of this somewhere (typically named ctz for 'count training zeros').
 265  E :    size_t i = address.value();
 266  E :    if ((i & ((1 << 4) - 1)) == 0)
 267  E :      return (1 << 4);  // 16.
 268    :  
 269  E :    if ((i & ((1 << 3) - 1)) == 0)
 270  E :      return (1 << 3);  // 8.
 271    :  
 272  E :    if ((i & ((1 << 2) - 1)) == 0)
 273  E :      return (1 << 2);  // 4.
 274    :  
 275  E :    if ((i & ((1 << 1) - 1)) == 0)
 276  E :      return (1 << 1);  // 2.
 277    :  
 278  E :    return 1;
 279  E :  }
 280    :  
 281  E :  void GuessDataBlockAlignment(BlockGraph::Block* block) {
 282  E :    DCHECK(block != NULL);
 283  E :    block->set_alignment(GuessAddressAlignment(block->addr()));
 284  E :  }
 285    :  
 286    :  bool AreMatchedBlockAndLabelAttributes(
 287    :      BlockGraph::BlockType bt,
 288    :      BlockGraph::LabelAttributes la) {
 289    :    return (bt == BlockGraph::CODE_BLOCK && (la & BlockGraph::CODE_LABEL) != 0) ||
 290    :        (bt == BlockGraph::DATA_BLOCK && (la & BlockGraph::DATA_LABEL) != 0);
 291    :  }
 292    :  
 293  E :  BlockGraph::LabelAttributes SymTagToLabelAttributes(enum SymTagEnum sym_tag) {
 294  E :    switch (sym_tag) {
 295    :      case SymTagData:
 296  E :        return BlockGraph::DATA_LABEL;
 297    :      case SymTagLabel:
 298  E :        return BlockGraph::CODE_LABEL;
 299    :      case SymTagFuncDebugStart:
 300  E :        return BlockGraph::DEBUG_START_LABEL;
 301    :      case SymTagFuncDebugEnd:
 302  E :        return BlockGraph::DEBUG_END_LABEL;
 303    :      case SymTagBlock:
 304  E :        return BlockGraph::SCOPE_START_LABEL;
 305    :  #if _MSC_VER >= 1600
 306    :      // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
 307    :      case SymTagCallSite:
 308    :        return BlockGraph::CALL_SITE_LABEL;
 309    :  #endif
 310    :    }
 311    :  
 312  i :    NOTREACHED();
 313  i :    return 0;
 314  E :  }
 315    :  
 316    :  bool AddLabelToBlock(RelativeAddress addr,
 317    :                       const base::StringPiece& name,
 318    :                       BlockGraph::LabelAttributes label_attributes,
 319  E :                       BlockGraph::Block* block) {
 320  E :    DCHECK(block != NULL);
 321  E :    DCHECK_LE(block->addr(), addr);
 322  E :    DCHECK_GT(block->addr() + block->size(), addr);
 323    :  
 324  E :    BlockGraph::Offset offset = addr - block->addr();
 325    :  
 326    :    // Try to create the label.
 327  E :    if (block->SetLabel(offset, name, label_attributes)) {
 328    :      // If there was no label at offset 0, then this block has not yet been
 329    :      // renamed, and still has its section contribution as a name. Update it to
 330    :      // the first symbol we get for it. We parse symbols from most useful
 331    :      // (undecorated function names) to least useful (mangled public symbols), so
 332    :      // this ensures a block has the most useful name.
 333  E :      if (offset == 0)
 334  E :        block->set_name(name);
 335    :  
 336  E :      return true;
 337    :    }
 338    :  
 339    :    // If we get here there's an already existing label. Update it.
 340  E :    BlockGraph::Label label;
 341  E :    CHECK(block->GetLabel(offset, &label));
 342    :  
 343    :    // It is conceivable that there could be more than one scope with either the
 344    :    // same beginning or the same ending. However, this doesn't appear to happen
 345    :    // in any version of Chrome up to 20. We add this check so that we'd at least
 346    :    // be made aware of the situation. (We don't rely on these labels, so we
 347    :    // merely output a warning rather than an error.)
 348    :    {
 349    :      const BlockGraph::LabelAttributes kScopeAttributes =
 350    :          BlockGraph::SCOPE_START_LABEL |
 351  E :          BlockGraph::SCOPE_END_LABEL;
 352    :      BlockGraph::LabelAttributes scope_attributes =
 353  E :          label_attributes & kScopeAttributes;
 354  E :      if (scope_attributes != 0) {
 355  E :        if (label.has_any_attributes(scope_attributes)) {
 356  i :          LOG(WARNING) << "Detected colliding scope labels at offset "
 357    :                       << offset << " of block \"" << block->name() << "\".";
 358    :        }
 359    :      }
 360    :    }
 361    :  
 362    :    // Merge the names if this isn't a repeated name.
 363  E :    std::string new_name = label.name();
 364  E :    if (new_name.find(name.data()) == new_name.npos) {
 365  E :      new_name.append(", ");
 366  E :      name.AppendToString(&new_name);
 367    :    }
 368    :  
 369    :    // Merge the attributes.
 370    :    BlockGraph::LabelAttributes new_label_attr = label.attributes() |
 371  E :        label_attributes;
 372  E :    if (!BlockGraph::Label::AreValidAttributes(new_label_attr)) {
 373    :      // It's not clear which attributes should be the winner here, so we log an
 374    :      // error.
 375  i :      LOG(ERROR) << "Trying to merge conflicting label attributes \""
 376    :                 << BlockGraph::LabelAttributesToString(label_attributes)
 377    :                 << "\" for label \"" << label.ToString() << "\" at offset "
 378    :                 << offset << " of block \"" << block->name() << "\".";
 379  i :      return false;
 380    :    }
 381    :  
 382    :    // Update the label.
 383  E :    label = BlockGraph::Label(new_name, new_label_attr);
 384  E :    CHECK(block->RemoveLabel(offset));
 385  E :    CHECK(block->SetLabel(offset, label));
 386    :  
 387  E :    return true;
 388  E :  }
 389    :  
 390    :  // The MS linker pads between code blocks with int3s.
 391    :  static const uint8 kInt3 = 0xCC;
 392    :  
 393    :  // If the given run of bytes consists of a single value repeated, returns that
 394    :  // value. Otherwise, returns -1.
 395  E :  int RepeatedValue(const uint8* data, size_t size) {
 396  E :    DCHECK(data != NULL);
 397  E :    const uint8* data_end = data + size;
 398  E :    uint8 value = *(data++);
 399  E :    for (; data < data_end; ++data) {
 400  E :      if (*data != value)
 401  i :        return -1;
 402  E :    }
 403  E :    return value;
 404  E :  }
 405    :  
 406    :  const BlockGraph::BlockId kNullBlockId(-1);
 407    :  
 408    :  void GetDisassemblyStartingPoints(
 409    :      const BlockGraph::Block* block,
 410    :      AbsoluteAddress abs_block_addr,
 411    :      const PEFile::RelocSet& reloc_set,
 412  E :      Disassembler::AddressSet* addresses) {
 413  E :    DCHECK(block != NULL);
 414  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 415  E :    DCHECK(addresses != NULL);
 416    :  
 417  E :    addresses->clear();
 418    :  
 419    :    // Use code labels as starting points.
 420  E :    BlockGraph::Block::LabelMap::const_iterator it(block->labels().begin());
 421  E :    for (; it != block->labels().end(); ++it) {
 422  E :      BlockGraph::Offset offset = it->first;
 423  E :      DCHECK_LE(0, offset);
 424  E :      DCHECK_GT(block->size(), static_cast<size_t>(offset));
 425    :  
 426  E :      if (it->second.has_attributes(BlockGraph::CODE_LABEL)) {
 427    :        // We sometimes receive code labels that land on lookup tables; we can
 428    :        // detect these because the label will point directly to a reloc. These
 429    :        // should have already been marked as data by now. DCHECK to validate.
 430    :        // TODO(chrisha): Get rid of this DCHECK, and allow mixed CODE and DATA
 431    :        //     labels. Simply only use ones that are DATA only.
 432  E :        DCHECK_EQ(0u, reloc_set.count(block->addr() + offset));
 433    :  
 434  E :        addresses->insert(abs_block_addr + offset);
 435    :      }
 436  E :    }
 437  E :  }
 438    :  
 439    :  // Determines if the provided code block has the expected layout of code first,
 440    :  // data second. Returns true if so, false otherwise. Also returns the size of
 441    :  // the code portion of the block by trimming off any data labels.
 442    :  bool BlockHasExpectedCodeDataLayout(const BlockGraph::Block* block,
 443  E :                                      size_t* code_size) {
 444  E :    DCHECK(block != NULL);
 445  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 446  E :    DCHECK(code_size != NULL);
 447    :  
 448  E :    *code_size = block->data_size();
 449    :  
 450    :    BlockGraph::Block::LabelMap::const_reverse_iterator label_it =
 451  E :        block->labels().rbegin();
 452    :    BlockGraph::Block::LabelMap::const_reverse_iterator label_end =
 453  E :        block->labels().rend();
 454    :  
 455  E :    bool seen_non_data = false;
 456    :  
 457    :    // Walk through the labels in reverse order (by decreasing offset). Trim
 458    :    // any data labels from this blocks data_size.
 459  E :    for (; label_it != label_end; ++label_it) {
 460  E :      if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) {
 461    :        // We've encountered data not strictly at the end of the block. This
 462    :        // violates assumptions about code generated by cl.exe.
 463  E :        if (seen_non_data)
 464  E :          return false;
 465    :  
 466    :        // Otherwise, we're still in a run of data labels at the tail of the
 467    :        // block. Keep trimming the code size.
 468  E :        size_t offset = static_cast<size_t>(label_it->first);
 469  E :        if (offset < *code_size)
 470  E :          *code_size = offset;
 471  E :      } else {
 472  E :        seen_non_data = true;
 473    :      }
 474  E :    }
 475    :  
 476  E :    return true;
 477  E :  }
 478    :  
 479    :  // Given a compiland, returns its compiland details.
 480    :  bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
 481  E :                                       IDiaSymbol** compiland_details) {
 482  E :    DCHECK(compiland != NULL);
 483  E :    DCHECK(compiland_details != NULL);
 484  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 485    :  
 486  E :    *compiland_details = NULL;
 487    :  
 488    :    // Get the enumeration of compiland details.
 489  E :    ScopedComPtr<IDiaEnumSymbols> enum_symbols;
 490    :    HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
 491  E :                                         enum_symbols.Receive());
 492  E :    DCHECK_EQ(S_OK, hr);
 493    :  
 494    :    // We expect there to be compiland details. For compilands built by
 495    :    // non-standard toolchains, there usually aren't any.
 496  E :    LONG count = 0;
 497  E :    hr = enum_symbols->get_Count(&count);
 498  E :    DCHECK_EQ(S_OK, hr);
 499  E :    if (count == 0)
 500  i :      return false;
 501    :  
 502    :    // Get the compiland details.
 503  E :    ULONG fetched = 0;
 504  E :    hr = enum_symbols->Next(1, compiland_details, &fetched);
 505  E :    DCHECK_EQ(S_OK, hr);
 506  E :    DCHECK_EQ(1u, fetched);
 507  E :    return true;
 508  E :  }
 509    :  
 510    :  // Stores information regarding known compilers.
 511    :  struct KnownCompilerInfo {
 512    :    wchar_t* compiler_name;
 513    :    bool supported;
 514    :  };
 515    :  
 516    :  // A list of known compilers, and their status as being supported or not.
 517    :  KnownCompilerInfo kKnownCompilerInfos[] = {
 518    :    { L"Microsoft (R) Macro Assembler", false },
 519    :    { L"Microsoft (R) Optimizing Compiler", true },
 520    :    { L"Microsoft (R) LINK", false }
 521    :  };
 522    :  
 523    :  // Given a compiland, determines whether the compiler used is one of those that
 524    :  // we whitelist.
 525  E :  bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
 526  E :    DCHECK(compiland != NULL);
 527  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 528    :  
 529  E :    ScopedComPtr<IDiaSymbol> compiland_details;
 530    :    if (!GetCompilandDetailsForCompiland(compiland,
 531  E :                                         compiland_details.Receive())) {
 532    :      // If the compiland has no compiland details we assume the compiler is not
 533    :      // supported.
 534  i :      ScopedBstr compiland_name;
 535  i :      if (compiland->get_name(compiland_name.Receive()) == S_OK) {
 536  i :        VLOG(1) << "Compiland has no compiland details: "
 537    :                << com::ToString(compiland_name);
 538    :      }
 539  i :      return false;
 540    :    }
 541  E :    DCHECK(compiland_details.get() != NULL);
 542    :  
 543    :    // Get the compiler name.
 544  E :    ScopedBstr compiler_name;
 545  E :    HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
 546  E :    DCHECK_EQ(S_OK, hr);
 547    :  
 548    :    // Check the compiler name against the list of known compilers.
 549  E :    for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
 550  E :      if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
 551  E :        return kKnownCompilerInfos[i].supported;
 552    :      }
 553  E :    }
 554    :  
 555    :    // Anything we don't explicitly know about is not supported.
 556  i :    VLOG(1) << "Encountered unknown compiler: " << compiler_name;
 557  i :    return false;
 558  E :  }
 559    :  
 560    :  // Logs an error if @p error is true, a verbose logging message otherwise.
 561    :  #define LOG_ERROR_OR_VLOG1(error) LAZY_STREAM( \
 562    :      ::logging::LogMessage(__FILE__, \
 563    :                            __LINE__, \
 564    :                            (error) ? ::logging::LOG_ERROR : -1).stream(), \
 565    :      (error ? LOG_IS_ON(ERROR) : VLOG_IS_ON(1)))
 566    :  
 567    :  // Logs a warning if @p warn is true, a verbose logging message otherwise.
 568    :  #define LOG_WARNING_OR_VLOG1(warn) LAZY_STREAM( \
 569    :      ::logging::LogMessage(__FILE__, \
 570    :                            __LINE__, \
 571    :                            (warn) ? ::logging::LOG_WARNING : -1).stream(), \
 572    :      (warn ? LOG_IS_ON(WARNING) : VLOG_IS_ON(1)))
 573    :  
 574    :  // Sets the disassembler directive to an error if @p strict is true, otherwise
 575    :  // sets it to an early termination.
 576  E :  CallbackDirective AbortOrTerminateDisassembly(bool strict) {
 577  E :    if (strict)
 578  i :      return Disassembler::kDirectiveAbort;
 579    :    else
 580  E :      return Disassembler::kDirectiveTerminateWalk;
 581  E :  }
 582    :  
 583    :  // Returns true if the callback-directive is an early termination that should be
 584    :  // returned immediately.
 585  E :  bool IsFatalCallbackDirective(CallbackDirective directive) {
 586  E :    switch (directive) {
 587    :      case Disassembler::kDirectiveContinue:
 588    :      case Disassembler::kDirectiveTerminatePath:
 589  E :        return false;
 590    :  
 591    :      case Disassembler::kDirectiveTerminateWalk:
 592    :      case Disassembler::kDirectiveAbort:
 593  i :        return true;
 594    :  
 595    :      default:
 596  i :        NOTREACHED();
 597    :    }
 598    :  
 599  i :    return true;
 600  E :  }
 601    :  
 602    :  // Combines two callback directives. Higher codes supersede lower ones.
 603    :  CallbackDirective CombineCallbackDirectives(CallbackDirective d1,
 604  E :                                              CallbackDirective d2) {
 605    :    // This ensures that this logic remains valid. This should prevent people
 606    :    // from tinkering with CallbackDirective and breaking this code.
 607    :    COMPILE_ASSERT(Disassembler::kDirectiveContinue <
 608    :                       Disassembler::kDirectiveTerminatePath &&
 609    :                   Disassembler::kDirectiveTerminatePath <
 610    :                       Disassembler::kDirectiveTerminateWalk &&
 611    :                   Disassembler::kDirectiveTerminateWalk <
 612    :                       Disassembler::kDirectiveAbort,
 613    :                   callback_directive_enum_is_not_sorted);
 614  E :    return std::max(d1, d2);
 615  E :  }
 616    :  
 617    :  // Determines if the given block has a data label in the given range of bytes.
 618    :  bool HasDataLabelInRange(const BlockGraph::Block* block,
 619    :                           BlockGraph::Offset offset,
 620  E :                           BlockGraph::Size size) {
 621    :    BlockGraph::Block::LabelMap::const_iterator it =
 622  E :        block->labels().lower_bound(offset);
 623    :    BlockGraph::Block::LabelMap::const_iterator end =
 624  E :        block->labels().lower_bound(offset + size);
 625    :  
 626  E :    for (; it != end; ++it) {
 627  i :      if (it->second.has_attributes(BlockGraph::DATA_LABEL))
 628  i :        return true;
 629  i :    }
 630    :  
 631  E :    return false;
 632  E :  }
 633    :  
 634    :  void ReportPotentialNonReturningFunction(
 635    :      const Decomposer::IntermediateReferenceMap& refs,
 636    :      const BlockGraph::AddressSpace& image,
 637    :      const BlockGraph::Block* block,
 638    :      BlockGraph::Offset call_ref_offset,
 639  i :      const char* reason) {
 640    :    typedef Decomposer::IntermediateReferenceMap::const_iterator RefIter;
 641    :  
 642    :    // Try and track down the block being pointed at by the call. If this is a
 643    :    // computed address there will be no reference.
 644  i :    RefIter ref_it = refs.find(block->addr() + call_ref_offset);
 645  i :    if (ref_it == refs.end()) {
 646  i :      LOG(WARNING) << "Suspected non-returning function call from offset "
 647    :                   << call_ref_offset << " (followed by " << reason
 648    :                   << ") of block \"" << block->name()
 649    :                   << "\", but target can not be tracked down.";
 650  i :      return;
 651    :    }
 652    :  
 653  i :    BlockGraph::Block* target = image.GetBlockByAddress(ref_it->second.base);
 654  i :    DCHECK(target != NULL);
 655    :  
 656    :    // If this was marked as non-returning, then its not suspicious.
 657  i :    if ((target->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
 658  i :      return;
 659    :  
 660    :    // If the target is a code block then this is a direct call.
 661  i :    if (target->type() == BlockGraph::CODE_BLOCK) {
 662  i :      LOG(WARNING) << "Suspected non-returning call from offset "
 663    :                   << call_ref_offset << " (followed by " << reason
 664    :                   << ") of block \"" << block->name() << "\" to code block \""
 665    :                   << target->name() << "\".";
 666  i :      return;
 667    :    }
 668    :    // Otherwise the target is a data block and this is a memory indirect call
 669    :    // to a thunk.
 670  i :    DCHECK_EQ(BlockGraph::DATA_BLOCK, target->type());
 671    :  
 672    :    // Track down the import thunk.
 673  i :    RefIter thunk_ref_it = refs.find(ref_it->second.base);
 674  i :    DCHECK(thunk_ref_it != refs.end());
 675  i :    BlockGraph::Block* thunk = image.GetBlockByAddress(thunk_ref_it->second.base);
 676    :  
 677    :    // If this was marked as non-returning, then its not suspicious.
 678  i :    if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
 679  i :      return;
 680    :  
 681    :    // Otherwise, this is an import thunk. Get the module and symbol names.
 682  i :    LOG(WARNING) << "Suspected non-returning call from offset "
 683    :                 << call_ref_offset << " (followed by " << reason
 684    :                 << ") of block \"" << block->name() << "\" to import thunk \""
 685    :                 << thunk->name() << "\".";
 686  i :  }
 687    :  
 688    :  void LookForNonReturningFunctions(
 689    :      const Decomposer::IntermediateReferenceMap& refs,
 690    :      const BlockGraph::AddressSpace& image,
 691    :      const BlockGraph::Block* block,
 692  E :      const Disassembler& disasm) {
 693  E :    bool saw_call = false;
 694  E :    bool saw_call_then_nop = false;
 695  E :    BlockGraph::Offset call_ref_offset = 0;
 696    :  
 697  E :    AbsoluteAddress end_of_last_inst;
 698    :    Disassembler::VisitedSpace::const_iterator inst_it =
 699  E :        disasm.visited().begin();
 700  E :    for (; inst_it != disasm.visited().end(); ++inst_it) {
 701    :      // Not contiguous with the last instruction? Then we're spanning a gap. If
 702    :      // it's an instruction then we didn't parse it; thus, we already know that
 703    :      // if the last instruction is a call it's to a non-returning function. So,
 704    :      // we only need to check for data.
 705  E :      if (inst_it->first.start() != end_of_last_inst) {
 706  E :        if (saw_call || saw_call_then_nop) {
 707  E :          BlockGraph::Offset offset = end_of_last_inst - disasm.code_addr();
 708  E :          BlockGraph::Size size = inst_it->first.start() - end_of_last_inst;
 709  E :          if (HasDataLabelInRange(block, offset, size))
 710    :            // We do not expect this to ever occur in cl.exe generated code.
 711    :            // However, it is entirely possible in hand-written assembly.
 712    :            ReportPotentialNonReturningFunction(
 713    :                refs, image, block, call_ref_offset,
 714  i :                saw_call ? "data" : "nop(s) and data");
 715    :        }
 716    :  
 717  E :        saw_call = false;
 718  E :        saw_call_then_nop = false;
 719    :      }
 720    :  
 721  E :      _DInst inst = { 0 };
 722  E :      BlockGraph::Offset offset = inst_it->first.start() - disasm.code_addr();
 723  E :      const uint8* code = disasm.code() + offset;
 724  E :      CHECK(core::DecodeOneInstruction(code, inst_it->first.size(), &inst));
 725    :  
 726    :      // Previous instruction was a call?
 727  E :      if (saw_call) {
 728  E :        if (core::IsNop(inst)) {
 729  i :          saw_call_then_nop = true;
 730  E :        } else if (core::IsDebugInterrupt(inst)) {
 731    :          ReportPotentialNonReturningFunction(
 732  i :              refs, image, block, call_ref_offset, "int3");
 733    :        }
 734  E :        saw_call = false;
 735  E :      } else if (saw_call_then_nop) {
 736    :        // The previous instructions we've seen have been a call followed by
 737    :        // arbitrary many nops. Look for another nop to continue the pattern.
 738  i :        saw_call_then_nop = core::IsNop(inst);
 739  i :      } else {
 740    :        // The previous instruction was not a call, so we're looking for one.
 741    :        // If this instruction is a call, remember that fact and also remember
 742    :        // the offset of its operand (the call target).
 743  E :        if (core::IsCall(inst)) {
 744  E :          saw_call = true;
 745    :          call_ref_offset = offset + inst_it->first.size() -
 746  E :              BlockGraph::Reference::kMaximumSize;
 747    :        }
 748    :      }
 749    :  
 750    :      // Remember the end of the last instruction we processed.
 751  E :      end_of_last_inst = inst_it->first.end();
 752  E :    }
 753    :  
 754    :    // If the last instruction was a call and we've marked that we've disassembled
 755    :    // past the end, then this is also a suspected non-returning function.
 756    :    if ((saw_call || saw_call_then_nop) &&
 757  E :        (block->attributes() & BlockGraph::DISASSEMBLED_PAST_END) != 0) {
 758  i :      const char* reason = saw_call ? "end of block" : "nop(s) and end of block";
 759    :      ReportPotentialNonReturningFunction(
 760  i :          refs, image, block, call_ref_offset, reason);
 761    :    }
 762  E :  }
 763    :  
 764    :  }  // namespace
 765    :  
 766    :  Decomposer::Decomposer(const PEFile& image_file)
 767    :      : image_(NULL),
 768    :        image_file_(image_file),
 769    :        current_block_(NULL),
 770  E :        be_strict_with_current_block_(true) {
 771    :    // Register static initializer patterns that we know are always present.
 772    :    // CRT C/C++/etc initializers.
 773  E :    CHECK(RegisterStaticInitializerPatterns("(__x.*)_a", "(__x.*)_z"));
 774    :    // RTC (run-time checks) initializers (part of CRT).
 775  E :    CHECK(RegisterStaticInitializerPatterns("(__rtc_[it])aa", "(__rtc_[it])zz"));
 776    :    // ATL object map initializers.
 777    :    CHECK(RegisterStaticInitializerPatterns("(__pobjMapEntry)First",
 778  E :                                            "(__pobjMapEntry)Last"));
 779    :    // Thread-local storage template.
 780  E :    CHECK(RegisterStaticInitializerPatterns("(_tls_)start", "(_tls_)end"));
 781    :  
 782    :    // Register non-returning functions that for some reason the symbols lie to
 783    :    // us about.
 784  E :    CHECK(RegisterNonReturningFunction("_CxxThrowException"));
 785  E :    CHECK(RegisterNonReturningFunction("_longjmp"));
 786    :  
 787    :    // Register non-returning imports that we know about.
 788  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitProcess"));
 789  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitThread"));
 790  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "RaiseException"));
 791  E :  }
 792    :  
 793  E :  bool Decomposer::Decompose(ImageLayout* image_layout) {
 794    :    // We start by finding the PDB path.
 795  E :    if (!FindAndValidatePdbPath())
 796  E :      return false;
 797  E :    DCHECK(!pdb_path_.empty());
 798    :  
 799    :    // Check if the block-graph has already been serialized into the PDB and load
 800    :    // it from here in this case. This allows round-trip decomposition.
 801  E :    bool stream_exists = false;
 802    :    if (LoadBlockGraphFromPdb(pdb_path_, image_file_, image_layout,
 803  E :                              &stream_exists)) {
 804  E :      return true;
 805    :    } else {
 806    :      // If the stream exists but hasn't been loaded we return an error. At this
 807    :      // point an error message has already been logged if there was one.
 808  E :      if (stream_exists)
 809  i :        return false;
 810    :    }
 811    :  
 812    :    // Move on to instantiating and initializing our Debug Interface Access
 813    :    // session.
 814  E :    ScopedComPtr<IDiaDataSource> dia_source;
 815  E :    if (!CreateDiaSource(dia_source.Receive()))
 816  i :      return false;
 817    :  
 818    :    // We create the session using the PDB file directly, as we've already
 819    :    // validated that it matches the module.
 820  E :    ScopedComPtr<IDiaSession> dia_session;
 821    :    if (!CreateDiaSession(pdb_path_,
 822    :                          dia_source.get(),
 823  E :                          dia_session.Receive())) {
 824  i :      return false;
 825    :    }
 826    :  
 827    :    HRESULT hr = dia_session->put_loadAddress(
 828  E :        image_file_.nt_headers()->OptionalHeader.ImageBase);
 829  E :    if (hr != S_OK) {
 830  i :      LOG(ERROR) << "Failed to set the DIA load address: "
 831    :                 << com::LogHr(hr) << ".";
 832  i :      return false;
 833    :    }
 834    :  
 835  E :    ScopedComPtr<IDiaSymbol> global;
 836  E :    hr = dia_session->get_globalScope(global.Receive());
 837  E :    if (hr != S_OK) {
 838  i :      LOG(ERROR) << "Failed to get the DIA global scope: "
 839    :                 << com::LogHr(hr) << ".";
 840  i :      return false;
 841    :    }
 842    :  
 843  E :    image_ = &image_layout->blocks;
 844    :  
 845    :    // Create the sections for the image.
 846  E :    bool success = CreateSections();
 847    :  
 848    :    // Load FIXUP information from the PDB file. We do this early on so that we
 849    :    // can do accounting with references that are created later on.
 850  E :    if (success)
 851  E :      success = LoadDebugStreams(dia_session);
 852    :  
 853    :    // Create intermediate references for each fixup entry.
 854  E :    if (success)
 855  E :      success = CreateReferencesFromFixups();
 856    :  
 857    :    // Chunk out important PE image structures, like the headers and such.
 858  E :    PEFileParser::PEHeader header;
 859  E :    if (success)
 860  E :      success = CreatePEImageBlocksAndReferences(&header);
 861    :  
 862    :    // Parse and validate the relocation entries.
 863  E :    if (success)
 864  E :      success = ParseRelocs();
 865    :  
 866    :    // Our first round of parsing is using section contributions. This creates
 867    :    // both code and data blocks.
 868  E :    if (success)
 869  E :      success = CreateBlocksFromSectionContribs(dia_session);
 870    :  
 871    :    // Process the function and thunk symbols in the image. This does not create
 872    :    // any blocks, as all functions are covered by section contributions.
 873  E :    if (success)
 874  E :      success = ProcessCodeSymbols(global);
 875    :  
 876    :    // Process data symbols. This can cause the creation of some blocks as the
 877    :    // data sections are not fully covered by section contributions.
 878  E :    if (success)
 879  E :      success = ProcessDataSymbols(global);
 880    :  
 881    :    // Create labels in code blocks.
 882  E :    if (success)
 883  E :      success = CreateGlobalLabels(global);
 884    :  
 885    :    // Create gap blocks. This ensures that we have complete coverage of the
 886    :    // entire image.
 887  E :    if (success)
 888  E :      success = CreateGapBlocks();
 889    :  
 890    :    // Parse public symbols, augmenting code and data labels where possible.
 891    :    // Some public symbols land on gap blocks, so they need to have been parsed
 892    :    // already.
 893  E :    if (success)
 894  E :      success = ProcessPublicSymbols(global);
 895    :  
 896    :    // Parse initialization bracketing symbols. This needs to happen after
 897    :    // PublicSymbols have been parsed.
 898  E :    if (success)
 899  E :      success = ProcessStaticInitializers();
 900    :  
 901    :    // We know that some data blocks need to have alignment precisely preserved.
 902    :    // For now, we very conservatively (guaranteed to be correct, but causes many
 903    :    // blocks to be aligned that don't strictly need alignment) guess alignment
 904    :    // for each block. This must be run after static initializers have been
 905    :    // parsed.
 906  E :    if (success)
 907  E :      success = GuessDataBlockAlignments();
 908    :  
 909    :    // Disassemble code blocks and create PC-relative references
 910  E :    if (success)
 911  E :      success = CreateCodeReferences();
 912    :  
 913    :    // Turn the address->address format references we've created into
 914    :    // block->block references on the blocks in the image.
 915  E :    if (success)
 916  E :      success = FinalizeIntermediateReferences();
 917    :  
 918    :    // Everything called after this points requires the references to have been
 919    :    // finalized.
 920    :  
 921    :    // One way of ensuring full coverage is to check that all of the fixups
 922    :    // were visited during decomposition.
 923  E :    if (success)
 924  E :      success = ConfirmFixupsVisited();
 925    :  
 926    :    // Now, find and label any padding blocks.
 927  E :    if (success)
 928  E :      success = FindPaddingBlocks();
 929    :  
 930    :    // Finally, copy the image headers over to the layout.
 931  E :    if (success)
 932  E :      success = CopyHeaderToImageLayout(header.nt_headers, image_layout);
 933    :  
 934  E :    image_ = NULL;
 935    :  
 936  E :    return success;
 937  E :  }
 938    :  
 939  E :  bool Decomposer::FindAndValidatePdbPath() {
 940    :    // Manually find the PDB path if it is not specified.
 941  E :    if (pdb_path_.empty()) {
 942    :      if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
 943  E :          pdb_path_.empty()) {
 944  i :        LOG(ERROR) << "Unable to find PDB file for module: "
 945    :                   << image_file_.path().value();
 946  i :        return false;
 947    :      }
 948    :    }
 949  E :    DCHECK(!pdb_path_.empty());
 950    :  
 951  E :    if (!file_util::PathExists(pdb_path_)) {
 952  E :      LOG(ERROR) << "Path not found: " << pdb_path_.value();
 953  E :      return false;
 954    :    }
 955    :  
 956    :    // Get the PDB info from the PDB file.
 957    :    pdb::PdbInfoHeader70 pdb_info_header;
 958  E :    if (!pdb::ReadPdbHeader(pdb_path_, &pdb_info_header)) {
 959  i :      LOG(ERROR) << "Unable to read PDB info header from PDB file: "
 960    :                 << pdb_path_.value();
 961  i :      return false;
 962    :    }
 963    :  
 964    :    // Get the PDB info from the module.
 965  E :    PdbInfo pdb_info;
 966  E :    if (!pdb_info.Init(image_file_)) {
 967  i :      LOG(ERROR) << "Unable to read PDB info from PE file: "
 968    :                 << image_file_.path().value();
 969  i :      return false;
 970    :    }
 971    :  
 972    :    // Ensure that they are consistent.
 973  E :    if (!pdb_info.IsConsistent(pdb_info_header)) {
 974  i :      LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
 975    :                 << "module \"" << image_file_.path().value() << "\".";
 976  i :      return false;
 977    :    }
 978    :  
 979  E :    return true;
 980  E :  }
 981    :  
 982  E :  bool Decomposer::ProcessCodeSymbols(IDiaSymbol* global) {
 983  E :    if (!ProcessFunctionSymbols(global))
 984  i :      return false;
 985  E :    if (!ProcessThunkSymbols(global))
 986  i :      return false;
 987    :  
 988  E :    return true;
 989  E :  }
 990    :  
 991  E :  bool Decomposer::ProcessFunctionSymbols(IDiaSymbol* global) {
 992  E :    DCHECK(IsSymTag(global, SymTagExe));
 993    :  
 994    :    // Otherwise enumerate its offspring.
 995  E :    ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
 996    :    HRESULT hr = global->findChildren(SymTagFunction,
 997    :                                      NULL,
 998    :                                      nsNone,
 999  E :                                      dia_enum_symbols.Receive());
1000  E :    if (hr != S_OK) {
1001  i :      LOG(ERROR) << "Failed to get the DIA function enumerator: "
1002    :                 << com::LogHr(hr) << ".";
1003  i :      return false;
1004    :    }
1005    :  
1006  E :    LONG count = 0;
1007  E :    if (dia_enum_symbols->get_Count(&count) != S_OK) {
1008  i :      LOG(ERROR) << "Failed to get function enumeration length.";
1009  i :      return false;
1010    :    }
1011    :  
1012  E :    for (LONG visited = 0; visited < count; ++visited) {
1013  E :      ScopedComPtr<IDiaSymbol> function;
1014  E :      ULONG fetched = 0;
1015  E :      hr = dia_enum_symbols->Next(1, function.Receive(), &fetched);
1016  E :      if (hr != S_OK) {
1017  i :        LOG(ERROR) << "Failed to enumerate functions: " << com::LogHr(hr) << ".";
1018  i :        return false;
1019    :      }
1020  E :      if (fetched == 0)
1021  i :        break;
1022    :  
1023    :      // Create the block representing the function.
1024  E :      DCHECK(IsSymTag(function, SymTagFunction));
1025  E :      if (!ProcessFunctionOrThunkSymbol(function))
1026  i :        return false;
1027  E :    }
1028    :  
1029  E :    return true;
1030  E :  }
1031    :  
1032  E :  bool Decomposer::ProcessFunctionOrThunkSymbol(IDiaSymbol* function) {
1033  E :    DCHECK(IsSymTag(function, SymTagFunction) || IsSymTag(function, SymTagThunk));
1034    :  
1035  E :    DWORD location_type = LocIsNull;
1036  E :    HRESULT hr = E_FAIL;
1037  E :    if (FAILED(hr = function->get_locationType(&location_type))) {
1038  i :      LOG(ERROR) << "Failed to retrieve function address type: "
1039    :                 << com::LogHr(hr) << ".";
1040  i :      return false;
1041    :    }
1042  E :    if (location_type != LocIsStatic) {
1043  i :      DCHECK_EQ(static_cast<DWORD>(LocIsNull), location_type);
1044  i :      return true;
1045    :    }
1046    :  
1047  E :    DWORD rva = 0;
1048  E :    ULONGLONG length = 0;
1049  E :    ScopedBstr name;
1050    :    if ((hr = function->get_relativeVirtualAddress(&rva)) != S_OK ||
1051    :        (hr = function->get_length(&length)) != S_OK ||
1052  E :        (hr = function->get_name(name.Receive())) != S_OK) {
1053  i :      LOG(ERROR) << "Failed to retrieve function information: "
1054    :                 << com::LogHr(hr) << ".";
1055  i :      return false;
1056    :    }
1057    :  
1058    :    // Certain properties are not defined on all blocks, so the following calls
1059    :    // may return S_FALSE.
1060  E :    BOOL no_return = FALSE;
1061  E :    if (function->get_noReturn(&no_return) != S_OK)
1062  E :      no_return = FALSE;
1063    :  
1064  E :    BOOL has_inl_asm = FALSE;
1065  E :    if (function->get_hasInlAsm(&has_inl_asm) != S_OK)
1066  E :      has_inl_asm = FALSE;
1067    :  
1068  E :    BOOL has_eh = FALSE;
1069  E :    if (function->get_hasEH(&has_eh) != S_OK)
1070  E :      has_eh = FALSE;
1071    :  
1072  E :    BOOL has_seh = FALSE;
1073  E :    if (function->get_hasSEH(&has_seh) != S_OK)
1074  E :      has_seh = FALSE;
1075    :  
1076  E :    std::string block_name;
1077  E :    if (!WideToUTF8(name, name.Length(), &block_name)) {
1078  i :      LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1079  i :      return false;
1080    :    }
1081    :  
1082    :    // Find the block to which this symbol maps, and ensure it fully covers the
1083    :    // symbol.
1084  E :    RelativeAddress block_addr(rva);
1085  E :    BlockGraph::Block* block = image_->GetBlockByAddress(block_addr);
1086  E :    if (block == NULL) {
1087  i :      LOG(ERROR) << "No block found for function/thunk symbol \""
1088    :                 << block_name << "\".";
1089  i :      return false;
1090    :    }
1091  E :    if (block->addr() + block->size() < block_addr + length) {
1092  i :      LOG(ERROR) << "Section contribution \"" << block->name() << "\" does not "
1093    :                 << "fully cover function/thunk symbol \"" << block_name << "\".";
1094  i :      return false;
1095    :    }
1096    :  
1097    :    // Annotate the block with a label, as this is an entry point to it. This is
1098    :    // the routine that adds labels, so there should never be any collisions.
1099  E :    CHECK(AddLabelToBlock(block_addr, block_name, BlockGraph::CODE_LABEL, block));
1100    :  
1101    :    // If we didn't get an explicit no-return flag from the symbols check our
1102    :    // list of exceptions.
1103  E :    if (no_return == FALSE && non_returning_functions_.count(block->name()) > 0) {
1104  E :      VLOG(1) << "Forcing non-returning attribute on function \""
1105    :              << block->name() << "\".";
1106  E :      no_return = TRUE;
1107    :    }
1108    :  
1109    :    // Set the block attributes.
1110  E :    if (no_return == TRUE)
1111  E :      block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1112  E :    if (has_inl_asm == TRUE)
1113  E :      block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1114  E :    if (has_eh || has_seh)
1115  E :      block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1116    :  
1117  E :    if (!CreateLabelsForFunction(function, block)) {
1118  i :      LOG(ERROR) << "Failed to create labels for '" << block->name() << "'.";
1119  i :      return false;
1120    :    }
1121    :  
1122  E :    return true;
1123  E :  }
1124    :  
1125    :  bool Decomposer::CreateLabelsForFunction(IDiaSymbol* function,
1126  E :                                           BlockGraph::Block* block) {
1127  E :    DCHECK(function != NULL);
1128  E :    DCHECK(block != NULL);
1129    :  
1130    :    // Lookup the block address.
1131  E :    RelativeAddress block_addr;
1132  E :    if (!image_->GetAddressOf(block, &block_addr)) {
1133  i :      NOTREACHED() << "Block " << block->name() << " has no address.";
1134  i :      return false;
1135    :    }
1136    :  
1137    :    // Enumerate all symbols which are children of function.
1138  E :    ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1139    :    HRESULT hr = function->findChildren(SymTagNull,
1140    :                                        NULL,
1141    :                                        nsNone,
1142  E :                                        dia_enum_symbols.Receive());
1143  E :    if (FAILED(hr)) {
1144  i :      LOG(ERROR) << "Failed to get the DIA label enumerator: "
1145    :                 << com::LogHr(hr) << ".";
1146  i :      return false;
1147    :    }
1148    :  
1149  E :    while (true) {
1150  E :      ScopedComPtr<IDiaSymbol> symbol;
1151  E :      ULONG fetched = 0;
1152  E :      hr = dia_enum_symbols->Next(1, symbol.Receive(), &fetched);
1153  E :      if (FAILED(hr)) {
1154  i :        LOG(ERROR) << "Failed to enumerate the DIA symbol: "
1155    :                   << com::LogHr(hr) << ".";
1156  i :        return false;
1157    :      }
1158  E :      if (hr != S_OK || fetched == 0)
1159  E :        break;
1160    :  
1161    :      // If it doesn't have an RVA then it's not interesting to us.
1162  E :      DWORD temp_rva = 0;
1163  E :      if (symbol->get_relativeVirtualAddress(&temp_rva) != S_OK)
1164  E :        continue;
1165    :  
1166    :      // Get the type of symbol we're looking at.
1167  E :      DWORD temp_sym_tag = 0;
1168  E :      if (symbol->get_symTag(&temp_sym_tag) != S_OK) {
1169  i :        LOG(ERROR) << "Failed to retrieve label information.";
1170  i :        return false;
1171    :      }
1172    :  
1173  E :      enum SymTagEnum sym_tag = static_cast<enum SymTagEnum>(temp_sym_tag);
1174  E :      BlockGraph::LabelAttributes label_attr = SymTagToLabelAttributes(sym_tag);
1175    :  
1176    :      // TODO(rogerm): Add a flag to include/exclude the symbol types that are
1177    :      //     interesting for debugging purposes, but not actually needed for
1178    :      //     decomposition: FuncDebugStart/End, Block, etc.
1179    :  
1180    :      // We ignore labels that fall outside of the code block. We sometimes
1181    :      // get labels at the end of a code block, and if the binary has any OMAP
1182    :      // information these follow the original successor block, and they can
1183    :      // end up most anywhere in the binary.
1184  E :      RelativeAddress label_rva(temp_rva);
1185  E :      if (label_rva < block_addr || label_rva >= block_addr + block->size())
1186  E :        continue;
1187    :  
1188    :      // Extract the symbol's name.
1189  E :      std::string label_name;
1190    :      {
1191  E :        ScopedBstr temp_name;
1192    :        if (symbol->get_name(temp_name.Receive()) == S_OK &&
1193  E :            !WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1194  i :          LOG(ERROR) << "Failed to convert label name to UTF8.";
1195  i :          return false;
1196    :        }
1197  E :      }
1198    :  
1199    :      // Not all symbols have a name, if we've found one without a name, make
1200    :      // one up.
1201  E :      BlockGraph::Offset offset = label_rva - block_addr;
1202  E :      if (label_name.empty()) {
1203  E :        switch (sym_tag) {
1204    :          case SymTagFuncDebugStart: {
1205  E :            label_name = "<debug-start>";
1206  E :            break;
1207    :          }
1208    :  
1209    :          case SymTagFuncDebugEnd: {
1210  E :            label_name = "<debug-end>";
1211  E :            break;
1212    :          }
1213    :  
1214    :          case SymTagData: {
1215  E :            if (reloc_set_.count(label_rva)) {
1216  E :              label_name = base::StringPrintf("<jump-table-%d>", offset);
1217  E :              label_attr |= BlockGraph::JUMP_TABLE_LABEL;
1218  E :            } else {
1219  E :              label_name = base::StringPrintf("<case-table-%d>", offset);
1220  E :              label_attr |= BlockGraph::CASE_TABLE_LABEL;
1221    :            }
1222  E :            break;
1223    :          }
1224    :  
1225    :          case SymTagBlock: {
1226  E :            label_name = "<scope-start>";
1227  E :            break;
1228    :          }
1229    :  
1230    :  #if _MSC_VER >= 1600
1231    :          // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
1232    :          case SymTagCallSite: {
1233    :            label_name = "<call-site>";
1234    :            break;
1235    :          }
1236    :  #endif
1237    :  
1238    :          default: {
1239  i :            LOG(WARNING) << "Unexpected symbol type " << sym_tag << " in "
1240    :                         << block->name() << " at "
1241    :                         << base::StringPrintf("0x%08X.", label_rva.value());
1242  i :            label_name = base::StringPrintf("<anonymous-%d>", sym_tag);
1243    :          }
1244    :        }
1245    :      }
1246    :  
1247    :      // We expect that we'll never see a code label that refers to a reloc.
1248    :      // This happens sometimes, however, as we generally get a code label for
1249    :      // the first byte after a switch statement. This can sometimes land on the
1250    :      // following jump table.
1251  E :      if ((label_attr & BlockGraph::CODE_LABEL) && reloc_set_.count(label_rva)) {
1252  E :        VLOG(1) << "Collision between reloc and code label in "
1253    :                << block->name() << " at " << label_name
1254    :                << base::StringPrintf(" (0x%08X).", label_rva.value())
1255    :                << " Falling back to data label.";
1256  E :        label_attr = BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL;
1257  E :        DCHECK_EQ(block_addr, block->addr());
1258  E :        BlockGraph::Label label;
1259    :        if (block->GetLabel(offset, &label) &&
1260  E :            !label.has_attributes(BlockGraph::DATA_LABEL)) {
1261  i :          VLOG(1) << block->name() << ": Replacing label " << label.name()
1262    :                  << " ("
1263    :                  << BlockGraph::LabelAttributesToString(label.attributes())
1264    :                  << ") at offset " << offset << ".";
1265  i :          block->RemoveLabel(offset);
1266    :        }
1267  E :      }
1268    :  
1269    :      // Add the label to the block.
1270  E :      if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1271  i :        LOG(ERROR) << "Failed to add label to code block.";
1272  i :        return false;
1273    :      }
1274    :  
1275    :      // Is this a scope? Then it also has a length. Use it to create the matching
1276    :      // scope end.
1277  E :      if (sym_tag == SymTagBlock) {
1278  E :        ULONGLONG length = 0;
1279  E :        if (symbol->get_length(&length) != S_OK) {
1280  i :          LOG(ERROR) << "Failed to extract code scope length for "
1281    :                     << block->name();
1282  i :          return false;
1283    :        }
1284  E :        label_rva += length;
1285  E :        label_name = "<scope-end>";
1286  E :        label_attr = BlockGraph::SCOPE_END_LABEL;
1287  E :        if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1288  i :          LOG(ERROR) << "Failed to add label to code block.";
1289  i :          return false;
1290    :        }
1291    :      }
1292  E :    }
1293    :  
1294  E :    return true;
1295  E :  }
1296    :  
1297  E :  bool Decomposer::ProcessThunkSymbols(IDiaSymbol* globals) {
1298  E :    ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1299    :    HRESULT hr = globals->findChildren(SymTagCompiland,
1300    :                                       NULL,
1301    :                                       nsNone,
1302  E :                                       enum_compilands.Receive());
1303  E :    if (FAILED(hr)) {
1304  i :      LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1305    :                 << com::LogHr(hr) << ".";
1306  i :      return false;
1307    :    }
1308    :  
1309  E :    while (true) {
1310  E :      ScopedComPtr<IDiaSymbol> compiland;
1311  E :      ULONG fetched = 0;
1312  E :      hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1313  E :      if (FAILED(hr)) {
1314  i :        LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1315    :                   << com::LogHr(hr) << ".";
1316  i :        return false;
1317    :      }
1318  E :      if (hr != S_OK || fetched == 0)
1319  E :        break;
1320    :  
1321  E :      ScopedComPtr<IDiaEnumSymbols> enum_thunks;
1322    :      hr = compiland->findChildren(SymTagThunk,
1323    :                                   NULL,
1324    :                                   nsNone,
1325  E :                                   enum_thunks.Receive());
1326  E :      if (FAILED(hr)) {
1327  i :        LOG(ERROR) << "Failed to retrieve thunk enumerator: "
1328    :                   << com::LogHr(hr) << ".";
1329  i :        return false;
1330    :      }
1331    :  
1332  E :      while (true) {
1333  E :        ScopedComPtr<IDiaSymbol> thunk;
1334  E :        hr = enum_thunks->Next(1, thunk.Receive(), &fetched);
1335  E :        if (FAILED(hr)) {
1336  i :          LOG(ERROR) << "Failed to enumerate thunk enumerator: "
1337    :                     << com::LogHr(hr) << ".";
1338  i :          return false;
1339    :        }
1340  E :        if (hr != S_OK || fetched == 0)
1341  E :          break;
1342    :  
1343    :  
1344  E :        DCHECK(IsSymTag(thunk, SymTagThunk));
1345    :  
1346  E :        if (!ProcessFunctionOrThunkSymbol(thunk))
1347  i :          return false;
1348  E :      }
1349  E :    }
1350    :  
1351  E :    return true;
1352  E :  }
1353    :  
1354  E :  bool Decomposer::CreateGlobalLabels(IDiaSymbol* globals) {
1355  E :    ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1356    :    HRESULT hr = globals->findChildren(SymTagCompiland,
1357    :                                       NULL,
1358    :                                       nsNone,
1359  E :                                       enum_compilands.Receive());
1360  E :    if (FAILED(hr)) {
1361  i :      LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1362    :                 << com::LogHr(hr) << ".";
1363  i :      return false;
1364    :    }
1365    :  
1366  E :    while (true) {
1367  E :      ScopedComPtr<IDiaSymbol> compiland;
1368  E :      ULONG fetched = 0;
1369  E :      hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1370  E :      if (FAILED(hr)) {
1371  i :        LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1372    :                   << com::LogHr(hr) << ".";
1373  i :        return false;
1374    :      }
1375  E :      if (hr != S_OK || fetched == 0)
1376  E :        break;
1377    :  
1378  E :      ScopedComPtr<IDiaEnumSymbols> enum_labels;
1379    :      hr = compiland->findChildren(SymTagLabel,
1380    :                                   NULL,
1381    :                                   nsNone,
1382  E :                                   enum_labels.Receive());
1383  E :      if (FAILED(hr)) {
1384  i :        LOG(ERROR) << "Failed to retrieve label enumerator: "
1385    :                   << com::LogHr(hr) << ".";
1386  i :        return false;
1387    :      }
1388    :  
1389  E :      while (true) {
1390  E :        ScopedComPtr<IDiaSymbol> label;
1391  E :        hr = enum_labels->Next(1, label.Receive(), &fetched);
1392  E :        if (FAILED(hr)) {
1393  i :          LOG(ERROR) << "Failed to enumerate label enumerator: "
1394    :                     << com::LogHr(hr) << ".";
1395  i :          return false;
1396    :        }
1397  E :        if (hr != S_OK || fetched == 0)
1398  E :          break;
1399    :  
1400  E :        DCHECK(IsSymTag(label, SymTagLabel));
1401    :  
1402  E :        DWORD addr = 0;
1403  E :        ScopedBstr temp_name;
1404    :        if (label->get_relativeVirtualAddress(&addr) != S_OK ||
1405  E :            label->get_name(temp_name.Receive()) != S_OK) {
1406  i :          LOG(ERROR) << "Failed to retrieve label address or name.";
1407  i :          return false;
1408    :        }
1409    :  
1410  E :        std::string label_name;
1411  E :        if (!WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1412  i :          LOG(ERROR) << "Failed to convert label name to UTF8.";
1413  i :          return false;
1414    :        }
1415    :  
1416  E :        RelativeAddress label_addr(addr);
1417  E :        BlockGraph::Block* block = image_->GetBlockByAddress(label_addr);
1418  E :        if (block == NULL) {
1419  i :          LOG(ERROR) << "No block for label " << label_name << " at " << addr;
1420  i :          return false;
1421    :        }
1422    :  
1423    :        if (!AddLabelToBlock(label_addr,
1424    :                             label_name,
1425    :                             BlockGraph::CODE_LABEL,
1426  E :                             block)) {
1427  i :          LOG(ERROR) << "Failed to add label to code block.";
1428  i :          return false;
1429    :        }
1430  E :      }
1431  E :    }
1432    :  
1433  E :    return true;
1434  E :  }
1435    :  
1436    :  bool Decomposer::CreateGapBlock(BlockGraph::BlockType block_type,
1437    :                                  RelativeAddress address,
1438  E :                                  BlockGraph::Size size) {
1439    :    BlockGraph::Block* block = FindOrCreateBlock(block_type, address, size,
1440    :        StringPrintf("Gap Block 0x%08X", address.value()).c_str(),
1441  E :        kExpectNoBlock);
1442  E :    if (block == NULL) {
1443  i :      LOG(ERROR) << "Unable to create gap block.";
1444  i :      return false;
1445    :    }
1446  E :    block->set_attribute(BlockGraph::GAP_BLOCK);
1447    :  
1448  E :    return true;
1449  E :  }
1450    :  
1451    :  bool Decomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
1452  E :                                          BlockGraph::BlockType block_type) {
1453  E :    RelativeAddress section_begin(header->VirtualAddress);
1454  E :    RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
1455    :    RelativeAddress image_end(
1456  E :        image_file_.nt_headers()->OptionalHeader.SizeOfImage);
1457    :  
1458    :    // Search for the first and last blocks interesting from the start and end
1459    :    // of the section to the end of the image.
1460    :    BlockGraph::AddressSpace::RangeMap::const_iterator it(
1461    :        image_->address_space_impl().FindFirstIntersection(
1462    :            BlockGraph::AddressSpace::Range(section_begin,
1463  E :                                            image_end - section_begin)));
1464    :    BlockGraph::AddressSpace::RangeMap::const_iterator end(
1465    :        image_->address_space_impl().FindFirstIntersection(
1466    :            BlockGraph::AddressSpace::Range(section_end,
1467  E :                                            image_end - section_end)));
1468    :  
1469    :    // The whole section is missing. Cover it with one gap block.
1470  E :    if (it == end)
1471    :      return CreateGapBlock(
1472  i :          block_type, section_begin, section_end - section_begin);
1473    :  
1474    :    // Create the head gap block if need be.
1475  E :    if (section_begin < it->first.start())
1476    :      if (!CreateGapBlock(
1477  i :          block_type, section_begin, it->first.start() - section_begin))
1478  i :        return false;
1479    :  
1480    :    // Now iterate the blocks and fill in gaps.
1481  E :    for (; it != end; ++it) {
1482  E :      const BlockGraph::Block* block = it->second;
1483  E :      DCHECK(block != NULL);
1484  E :      RelativeAddress block_end = it->first.start() + block->size();
1485  E :      if (block_end >= section_end)
1486  E :        break;
1487    :  
1488    :      // Walk to the next address in turn.
1489  E :      BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
1490  E :      ++next;
1491  E :      if (next == end) {
1492    :        // We're at the end of the list. Create the tail gap block.
1493  E :        DCHECK_GT(section_end, block_end);
1494  E :        if (!CreateGapBlock(block_type, block_end, section_end - block_end))
1495  i :          return false;
1496  E :        break;
1497    :      }
1498    :  
1499    :      // Create the interstitial gap block.
1500  E :      if (block_end < next->first.start())
1501    :        if (!CreateGapBlock(
1502  E :            block_type, block_end, next->first.start() - block_end))
1503  i :          return false;
1504  E :    }
1505    :  
1506  E :    return true;
1507  E :  }
1508    :  
1509  E :  bool Decomposer::CreateGapBlocks() {
1510  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1511    :  
1512    :    // Iterate through all the image sections.
1513  E :    for (size_t i = 0; i < num_sections; ++i) {
1514  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1515  E :      DCHECK(header != NULL);
1516    :  
1517  E :      BlockGraph::BlockType type = BlockGraph::CODE_BLOCK;
1518  E :      const char* section_type = NULL;
1519  E :      switch (GetSectionType(header)) {
1520    :        case kSectionCode:
1521  E :          type = BlockGraph::CODE_BLOCK;
1522  E :          section_type = "code";
1523  E :          break;
1524    :  
1525    :        case kSectionData:
1526  E :          type = BlockGraph::DATA_BLOCK;
1527  E :          section_type = "data";
1528  E :          break;
1529    :  
1530    :        default:
1531  i :          continue;
1532    :      }
1533    :  
1534  E :      if (!CreateSectionGapBlocks(header, type)) {
1535  i :        LOG(ERROR) << "Unable to create gap blocks for " << section_type
1536    :                   << " section \"" << header->Name << "\".";
1537  i :        return false;
1538    :      }
1539  E :    }
1540    :  
1541  E :    return true;
1542  E :  }
1543    :  
1544    :  bool Decomposer::AddReferenceCallback(RelativeAddress src_addr,
1545    :                                        BlockGraph::ReferenceType type,
1546    :                                        BlockGraph::Size size,
1547  E :                                        RelativeAddress dst_addr) {
1548    :    // This is only called by the PEFileParser, and it creates some references
1549    :    // for which there are no corresponding fixup entries.
1550    :    return ValidateOrAddReference(FIXUP_MAY_EXIST, src_addr, type, size, dst_addr,
1551  E :                                  0, &fixup_map_, &references_);
1552  E :  }
1553    :  
1554  E :  bool Decomposer::ParseRelocs() {
1555  E :    if (!image_file_.DecodeRelocs(&reloc_set_)) {
1556  i :      LOG(ERROR) << "Unable to decode image relocs.";
1557  i :      return false;
1558    :    }
1559    :  
1560  E :    PEFile::RelocMap reloc_map;
1561  E :    if (!image_file_.ReadRelocs(reloc_set_, &reloc_map)) {
1562  i :      LOG(ERROR) << "Unable to read image relocs.";
1563  i :      return false;
1564    :    }
1565    :  
1566    :    // Get a set of relocation destinations. These are effectively 'references'
1567    :    // to labels, and will be used to weed out unreferenced labels.
1568  E :    PEFile::RelocMap::const_iterator it = reloc_map.begin();
1569  E :    for (; it != reloc_map.end(); ++it) {
1570  E :      RelativeAddress rva;
1571  E :      if (!image_file_.Translate(it->second, &rva)) {
1572  i :        LOG(ERROR) << "Unable to translate absolute address to relative: "
1573    :                   << it->second;
1574  i :        return false;
1575    :      }
1576  E :      reloc_refs_.insert(rva);
1577  E :    }
1578    :  
1579    :    // Validate each relocation entry against the corresponding fixup entry.
1580  E :    if (!ValidateRelocs(reloc_map))
1581  i :      return false;
1582    :  
1583  E :    return true;
1584  E :  }
1585    :  
1586  E :  bool Decomposer::CreateReferencesFromFixups() {
1587  E :    FixupMap::const_iterator it(fixup_map_.begin());
1588  E :    for (; it != fixup_map_.end(); ++it) {
1589  E :      RelativeAddress src_addr(it->second.location);
1590  E :      uint32 data = 0;
1591  E :      if (!image_file_.ReadImage(src_addr, &data, sizeof(data))) {
1592  i :        LOG(ERROR) << "Unable to read image data for fixup with source at "
1593    :                   << src_addr;
1594  i :        return false;
1595    :      }
1596    :  
1597  E :      RelativeAddress dst_addr;
1598  E :      switch (it->second.type) {
1599    :        case BlockGraph::PC_RELATIVE_REF: {
1600  E :          dst_addr = src_addr + kPointerSize + data;
1601  E :          break;
1602    :        }
1603    :  
1604    :        case BlockGraph::ABSOLUTE_REF: {
1605  E :          AbsoluteAddress dst_addr_abs(data);
1606  E :          bool success = image_file_.Translate(dst_addr_abs, &dst_addr);
1607  E :          DCHECK_EQ(true, success);
1608  E :          break;
1609    :        }
1610    :  
1611    :        case BlockGraph::RELATIVE_REF: {
1612  E :          dst_addr = RelativeAddress(data);
1613  E :          break;
1614    :        }
1615    :  
1616    :        default: {
1617  i :          NOTREACHED() << "Invalid reference type.";
1618    :          break;
1619    :        }
1620    :      }
1621    :  
1622  E :      RelativeAddress dst_base(it->second.base);
1623  E :      BlockGraph::Offset dst_offset = dst_addr - dst_base;
1624    :      if (!AddReference(src_addr, it->second.type, kPointerSize, dst_base,
1625  E :                        dst_offset, &references_)) {
1626  i :        return false;
1627    :      }
1628  E :    }
1629    :  
1630  E :    return true;
1631  E :  }
1632    :  
1633  E :  bool Decomposer::ValidateRelocs(const PEFile::RelocMap& reloc_map) {
1634  E :    PEFile::RelocMap::const_iterator it(reloc_map.begin());
1635  E :    PEFile::RelocMap::const_iterator end(reloc_map.end());
1636  E :    for (; it != end; ++it) {
1637  E :      RelativeAddress src(it->first);
1638  E :      RelativeAddress dst;
1639  E :      if (!image_file_.Translate(it->second, &dst)) {
1640  i :        LOG(ERROR) << "Unable to translate relocation destination.";
1641  i :        return false;
1642    :      }
1643    :  
1644    :      if (!ValidateOrAddReference(FIXUP_MUST_EXIST, src, BlockGraph::ABSOLUTE_REF,
1645  E :                                  sizeof(dst), dst, 0, &fixup_map_, &references_))
1646  i :        return false;
1647  E :    }
1648    :  
1649  E :    return true;
1650  E :  }
1651    :  
1652  E :  bool Decomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1653  E :    ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1654    :    SearchResult search_result = FindDiaTable(session,
1655  E :                                              section_contribs.Receive());
1656  E :    if (search_result != kSearchSucceeded) {
1657  i :      if (search_result == kSearchFailed)
1658  i :        LOG(ERROR) << "No section contribution table found.";
1659  i :      return false;
1660    :    }
1661    :  
1662  E :    size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1663    :  
1664  E :    LONG count = 0;
1665  E :    if (section_contribs->get_Count(&count) != S_OK) {
1666  i :      LOG(ERROR) << "Failed to get section contributions enumeration length.";
1667  i :      return false;
1668    :    }
1669    :  
1670  E :    for (LONG visited = 0; visited < count; ++visited) {
1671  E :      ScopedComPtr<IDiaSectionContrib> section_contrib;
1672  E :      ULONG fetched = 0;
1673  E :      HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1674  E :      if (hr != S_OK) {
1675  i :        LOG(ERROR) << "Failed to get DIA section contribution: "
1676    :                   << com::LogHr(hr) << ".";
1677  i :        return false;
1678    :      }
1679  E :      if (fetched == 0)
1680  i :        break;
1681    :  
1682  E :      hr = E_FAIL;
1683  E :      DWORD rva = 0;
1684  E :      DWORD length = 0;
1685  E :      DWORD section_id = 0;
1686  E :      BOOL code = FALSE;
1687  E :      ScopedComPtr<IDiaSymbol> compiland;
1688  E :      ScopedBstr bstr_name;
1689    :      if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1690    :          (hr = section_contrib->get_length(&length)) != S_OK ||
1691    :          (hr = section_contrib->get_addressSection(&section_id)) != S_OK ||
1692    :          (hr = section_contrib->get_code(&code)) != S_OK ||
1693    :          (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1694  E :          (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1695  i :        LOG(ERROR) << "Failed to get section contribution properties: "
1696    :                   << com::LogHr(hr) << ".";
1697  i :        return false;
1698    :      }
1699    :  
1700    :      // Determine if this function was built by a supported compiler.
1701    :      bool is_built_by_supported_compiler =
1702  E :          IsBuiltBySupportedCompiler(compiland.get());
1703    :  
1704    :      // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1705  E :      DCHECK_LT(0u, section_id);
1706  E :      --section_id;
1707    :  
1708    :      // We don't parse the resource section, as it is parsed by the PEFileParser.
1709  E :      if (section_id == rsrc_id)
1710  i :        continue;
1711    :  
1712  E :      std::string name;
1713  E :      if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1714  i :        LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1715  i :        return false;
1716    :      }
1717    :  
1718    :      // Create the block.
1719    :      BlockGraph::BlockType block_type =
1720  E :          code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1721    :      BlockGraph::Block* block = FindOrCreateBlock(block_type,
1722    :                                                   RelativeAddress(rva),
1723    :                                                   length,
1724    :                                                   name.c_str(),
1725  E :                                                   kExpectNoBlock);
1726  E :      if (block == NULL) {
1727  i :        LOG(ERROR) << "Unable to create block.";
1728  i :        return false;
1729    :      }
1730    :  
1731    :      // Set the block attributes.
1732  E :      block->set_attribute(BlockGraph::SECTION_CONTRIB);
1733  E :      if (!is_built_by_supported_compiler)
1734  E :        block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1735  E :    }
1736    :  
1737  E :    return true;
1738  E :  }
1739    :  
1740    :  DiaBrowser::BrowserDirective Decomposer::OnDataSymbol(
1741    :      const DiaBrowser& dia_browser,
1742    :      const DiaBrowser::SymTagVector& sym_tags,
1743  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1744  E :    DCHECK_LT(0u, sym_tags.size());
1745  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1746  E :    DCHECK_EQ(SymTagData, sym_tags.back());
1747    :  
1748  E :    const DiaBrowser::SymbolPtr& data(symbols.back());
1749    :  
1750  E :    HRESULT hr = E_FAIL;
1751  E :    DWORD location_type = LocIsNull;
1752  E :    DWORD rva = 0;
1753  E :    ScopedBstr name_bstr;
1754    :    if (FAILED(hr = data->get_locationType(&location_type)) ||
1755    :        FAILED(hr = data->get_relativeVirtualAddress(&rva)) ||
1756  E :        FAILED(hr = data->get_name(name_bstr.Receive()))) {
1757  i :      LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1758  i :      return DiaBrowser::kBrowserAbort;
1759    :    }
1760    :  
1761    :    // We only parse data symbols with static storage.
1762  E :    if (location_type != LocIsStatic)
1763  E :      return DiaBrowser::kBrowserContinue;
1764    :  
1765    :    // Symbols with an address of zero are essentially invalid. They appear to
1766    :    // have been optimized away by the compiler, but they are still reported.
1767  E :    if (rva == 0)
1768  E :      return DiaBrowser::kBrowserContinue;
1769    :  
1770    :    // TODO(chrisha): We eventually want to get alignment info from the type
1771    :    //     information. This is strictly a lower bound, however, as certain
1772    :    //     data may be used in instructions that impose stricter alignment
1773    :    //     requirements.
1774  E :    size_t length = 0;
1775  E :    if (!GetTypeInfo(data, &length)) {
1776  i :      return DiaBrowser::kBrowserAbort;
1777    :    }
1778    :    // Zero-length data symbols act as 'forward declares' in some sense. They
1779    :    // are always followed by a non-zero length data symbol with the same name
1780    :    // and location.
1781  E :    if (length == 0)
1782  E :      return DiaBrowser::kBrowserContinue;
1783    :  
1784  E :    RelativeAddress addr(rva);
1785  E :    std::string name;
1786  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1787  i :      LOG(ERROR) << "Failed to convert data symbol name to UTF8.";
1788  i :      return DiaBrowser::kBrowserAbort;
1789    :    }
1790    :  
1791    :    BlockGraph::Block* block = FindOrCreateBlock(BlockGraph::DATA_BLOCK,
1792    :                                                 addr, length, name.c_str(),
1793  E :                                                 kAllowCoveringBlock);
1794    :  
1795  E :    if (block->type() == BlockGraph::CODE_BLOCK) {
1796    :      // The NativeClient bits of chrome.dll consists of hand-written assembly
1797    :      // that is compiled using a custom non-Microsoft toolchain. Unfortunately
1798    :      // for us this toolchain emits 1-byte data symbols instead of code labels.
1799    :      static const char kNaClPrefix[] = "NaCl";
1800    :      if (length == 1 &&
1801  E :          name.compare(0, arraysize(kNaClPrefix) - 1, kNaClPrefix) == 0) {
1802  i :        if (!AddLabelToBlock(addr, name, BlockGraph::CODE_LABEL, block)) {
1803  i :          LOG(ERROR) << "Failed to add label to code block.";
1804  i :          return DiaBrowser::kBrowserAbort;
1805    :        }
1806    :  
1807  i :        return DiaBrowser::kBrowserContinue;
1808    :      }
1809    :    }
1810    :  
1811  E :    if (!AddLabelToBlock(addr, name, BlockGraph::DATA_LABEL, block)) {
1812  i :      LOG(ERROR) << "Failed to add data label to block.";
1813  i :      return DiaBrowser::kBrowserAbort;
1814    :    }
1815    :  
1816  E :    return DiaBrowser::kBrowserContinue;
1817  E :  }
1818    :  
1819    :  DiaBrowser::BrowserDirective Decomposer::OnPublicSymbol(
1820    :      const DiaBrowser& dia_browser,
1821    :      const DiaBrowser::SymTagVector& sym_tags,
1822  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1823  E :    DCHECK_LT(0u, sym_tags.size());
1824  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1825  E :    DCHECK_EQ(SymTagPublicSymbol, sym_tags.back());
1826  E :    const DiaBrowser::SymbolPtr& symbol(symbols.back());
1827    :  
1828    :    // We don't care about symbols that don't have addresses.
1829  E :    DWORD rva = 0;
1830  E :    if (S_OK != symbol->get_relativeVirtualAddress(&rva))
1831  E :      return DiaBrowser::kBrowserContinue;
1832    :  
1833  E :    ScopedBstr name_bstr;
1834  E :    if (S_OK != symbol->get_name(name_bstr.Receive())) {
1835  i :      LOG(ERROR) << "Failed to get public symbol name.";
1836  i :      return DiaBrowser::kBrowserAbort;
1837    :    }
1838    :  
1839  E :    std::string name;
1840  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1841  i :      LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1842  i :      return DiaBrowser::kBrowserAbort;
1843    :    }
1844    :  
1845  E :    RelativeAddress addr(rva);
1846  E :    BlockGraph::Block* block = image_->GetBlockByAddress(addr);
1847  E :    if (block == NULL) {
1848  i :      LOG(ERROR) << "No block found for public symbol \"" << name << "\".";
1849  i :      return DiaBrowser::kBrowserAbort;
1850    :    }
1851    :  
1852    :    // Public symbol names are mangled. Remove leading '_' as per
1853    :    // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1854  E :    if (name[0] == '_')
1855  E :      name = name.substr(1);
1856    :  
1857    :    // Set the block name or add a label. For code blocks these are entry points,
1858    :    // while for data blocks these are simply to aid debugging.
1859    :    BlockGraph::LabelAttributes label_attributes =
1860    :        block->type() == BlockGraph::CODE_BLOCK ? BlockGraph::CODE_LABEL :
1861  E :                                                  BlockGraph::DATA_LABEL;
1862  E :    if (!AddLabelToBlock(addr, name, label_attributes, block))
1863  i :      return DiaBrowser::kBrowserAbort;
1864    :  
1865  E :    return DiaBrowser::kBrowserContinue;
1866  E :  }
1867    :  
1868  E :  bool Decomposer::ProcessStaticInitializers() {
1869    :    typedef std::pair<RelativeAddress, RelativeAddress> AddressPair;
1870    :    typedef std::map<std::string, AddressPair> AddressPairMap;
1871    :  
1872  E :    const RelativeAddress kNull(0);
1873    :  
1874    :    // This stores pairs of addresses, representing the beginning and the end
1875    :    // of each static initializer block. It is keyed with a string, which is
1876    :    // returned by the match group of the corresponding initializer pattern.
1877    :    // The key is necessary to correlate matching labels (as multiple pairs
1878    :    // of labels may match through a single pattern).
1879  E :    AddressPairMap addr_pair_map;
1880    :  
1881    :    // Used for keeping track of which label, if any, we matched.
1882    :    enum MatchType {
1883    :      kMatchNone,
1884    :      kMatchBeginLabel,
1885    :      kMatchEndLabel
1886    :    };
1887    :  
1888    :    // Iterate through all data blocks, looking for known initializer labels.
1889  E :    BlockGraph::AddressSpace::RangeMapConstIter block_it = image_->begin();
1890  E :    for (; block_it != image_->end(); ++block_it) {
1891  E :      const BlockGraph::Block* block = block_it->second;
1892    :      // Skip non-data blocks.
1893  E :      if (block->type() != BlockGraph::DATA_BLOCK)
1894  E :        continue;
1895    :  
1896    :      // Check the block name against each of the initializer patterns.
1897  E :      MatchType match = kMatchNone;
1898  E :      std::string block_name = block->name();
1899  E :      std::string name;
1900  E :      for (size_t i = 0; i < static_initializer_patterns_.size(); ++i) {
1901  E :        REPair& re_pair(static_initializer_patterns_[i]);
1902  E :        if (re_pair.first.FullMatch(block_name, &name))
1903  E :          match = kMatchBeginLabel;
1904  E :        else if (re_pair.second.FullMatch(block_name, &name))
1905  E :          match = kMatchEndLabel;
1906    :  
1907  E :        if (match != kMatchNone)
1908  E :          break;
1909  E :      }
1910    :  
1911    :      // No pattern matched this symbol? Continue to the next one.
1912  E :      if (match == kMatchNone)
1913  E :        continue;
1914    :  
1915    :      // Ensure this symbol exists in the map. Thankfully, addresses default
1916    :      // construct to NULL.
1917  E :      AddressPair& addr_pair = addr_pair_map[name];
1918    :  
1919    :      // Update the bracketing symbol endpoint. Make sure each symbol endpoint
1920    :      // is only seen once.
1921  E :      RelativeAddress* addr = NULL;
1922  E :      RelativeAddress new_addr;
1923  E :      if (match == kMatchBeginLabel) {
1924  E :        addr = &addr_pair.first;
1925  E :        new_addr = block->addr();
1926  E :      } else {
1927  E :        addr = &addr_pair.second;
1928  E :        new_addr = block->addr() + block->size();
1929    :      }
1930  E :      if (*addr != kNull) {
1931  i :        LOG(ERROR) << "Bracketing symbol appears multiple times: "
1932    :                   << block_name;
1933  i :        return false;
1934    :      }
1935  E :      *addr = new_addr;
1936  E :    }
1937    :  
1938    :    // Use the bracketing symbols to make the initializers contiguous.
1939  E :    AddressPairMap::const_iterator init_it = addr_pair_map.begin();
1940  E :    for (; init_it != addr_pair_map.end(); ++init_it) {
1941  E :      RelativeAddress begin_addr = init_it->second.first;
1942  E :      if (begin_addr == kNull) {
1943  i :        LOG(ERROR) << "Bracketing start symbol missing: " << init_it->first;
1944  i :        return false;
1945    :      }
1946    :  
1947  E :      RelativeAddress end_addr = init_it->second.second;
1948  E :      if (end_addr == kNull) {
1949  i :        LOG(ERROR) << "Bracketing end symbol missing: " << init_it->first;
1950  i :        return false;
1951    :      }
1952    :  
1953  E :      if (begin_addr > end_addr) {
1954  i :        LOG(ERROR) << "Bracketing symbols out of order: " << init_it->first;
1955  i :        return false;
1956    :      }
1957    :  
1958    :      // Merge the initializers.
1959  E :      DataSpace::Range range(begin_addr, end_addr - begin_addr);
1960  E :      BlockGraph::Block* merged = image_->MergeIntersectingBlocks(range);
1961    :      std::string name = StringPrintf("Bracketed Initializers: %s",
1962  E :                                      init_it->first.c_str());
1963  E :      merged->set_name(name);
1964  E :      DCHECK(merged != NULL);
1965  E :    }
1966    :  
1967  E :    return true;
1968  E :  }
1969    :  
1970  E :  bool Decomposer::ProcessDataSymbols(IDiaSymbol* root) {
1971    :    DiaBrowser::MatchCallback on_data_symbol(
1972  E :        base::Bind(&Decomposer::OnDataSymbol, base::Unretained(this)));
1973    :  
1974  E :    DiaBrowser dia_browser;
1975    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1976  E :                           on_data_symbol);
1977    :    dia_browser.AddPattern(Seq(SymTagCompiland, SymTagFunction,
1978    :                               Star(SymTagBlock), SymTagData),
1979  E :                           on_data_symbol);
1980    :  
1981  E :    return dia_browser.Browse(root);
1982  E :  }
1983    :  
1984  E :  bool Decomposer::ProcessPublicSymbols(IDiaSymbol* root) {
1985    :    DiaBrowser::MatchCallback on_public_symbol(
1986  E :        base::Bind(&Decomposer::OnPublicSymbol, base::Unretained(this)));
1987    :  
1988  E :    DiaBrowser dia_browser;
1989  E :    dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
1990    :  
1991  E :    return dia_browser.Browse(root);
1992  E :  }
1993    :  
1994  E :  bool Decomposer::GuessDataBlockAlignments() {
1995  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1996    :    // Iterate through all the image sections.
1997  E :    for (size_t i = 0; i < num_sections; ++i) {
1998  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1999  E :      DCHECK(header != NULL);
2000    :  
2001    :      // Only iterate through data sections.
2002  E :      if (GetSectionType(header) != kSectionData)
2003  E :        continue;
2004    :  
2005  E :      RelativeAddress section_begin(header->VirtualAddress);
2006  E :      size_t section_length = header->Misc.VirtualSize;
2007    :  
2008    :      // Get the range of blocks in this section.
2009    :      BlockGraph::AddressSpace::RangeMapIterPair it_pair =
2010  E :          image_->GetIntersectingBlocks(section_begin, section_length);
2011    :  
2012    :      // Iterate through the blocks in the section, setting their alignment.
2013  E :      BlockGraph::AddressSpace::RangeMapIter it = it_pair.first;
2014  E :      for (; it != it_pair.second; ++it) {
2015  E :        BlockGraph::Block* block = it->second;
2016  E :        GuessDataBlockAlignment(block);
2017  E :      }
2018  E :    }
2019    :  
2020  E :    return true;
2021  E :  }
2022    :  
2023  E :  bool Decomposer::CreateCodeReferences() {
2024  E :    BlockGraph::BlockMap::iterator it(image_->graph()->blocks_mutable().begin());
2025  E :    BlockGraph::BlockMap::iterator end(image_->graph()->blocks_mutable().end());
2026  E :    for (; it != end; ++it) {
2027  E :      BlockGraph::Block* block = &it->second;
2028  E :      if (block->type() != BlockGraph::CODE_BLOCK)
2029  E :        continue;
2030    :  
2031  E :      if (!CreateCodeReferencesForBlock(block))
2032  i :        return false;
2033  E :    }
2034    :  
2035  E :    return true;
2036  E :  }
2037    :  
2038  E :  bool Decomposer::CreateCodeReferencesForBlock(BlockGraph::Block* block) {
2039  E :    DCHECK(current_block_ == NULL);
2040  E :    current_block_ = block;
2041    :  
2042  E :    RelativeAddress block_addr;
2043  E :    if (!image_->GetAddressOf(block, &block_addr)) {
2044  i :      LOG(ERROR) << "Block \"" << block->name() << "\" has no address.";
2045  i :      return false;
2046    :    }
2047    :  
2048  E :    AbsoluteAddress abs_block_addr;
2049  E :    if (!image_file_.Translate(block_addr, &abs_block_addr)) {
2050  i :      LOG(ERROR) << "Unable to get absolute address for " << block_addr;
2051  i :      return false;
2052    :    }
2053    :  
2054    :    Disassembler::InstructionCallback on_instruction(
2055  E :        base::Bind(&Decomposer::OnInstruction, base::Unretained(this)));
2056    :  
2057    :    // Use block labels and code references as starting points for disassembly.
2058  E :    Disassembler::AddressSet starting_points;
2059    :    GetDisassemblyStartingPoints(block, abs_block_addr, reloc_set_,
2060  E :                                 &starting_points);
2061    :  
2062    :    // Determine whether or not we are being strict during disassembly.
2063  E :    bool strict = block_graph::CodeBlockAttributesAreBasicBlockSafe(block);
2064  E :    be_strict_with_current_block_ = strict;
2065    :  
2066    :    // Determine the length of the code portion of the block by trimming off any
2067    :    // known trailing data. Also, if we're in strict mode, ensure that our
2068    :    // assumption regarding code/data layout is met.
2069  E :    size_t code_size = 0;
2070    :    if (!BlockHasExpectedCodeDataLayout(block, &code_size) &&
2071  E :        be_strict_with_current_block_) {
2072  i :      LOG(ERROR) << "Block \"" << block->name() << "\" has unexpected code/data "
2073    :                 << "layout.";
2074  i :      return false;
2075    :    }
2076    :  
2077    :    // Disassemble the block.
2078    :    Disassembler disasm(block->data(),
2079    :                        code_size,
2080    :                        abs_block_addr,
2081    :                        starting_points,
2082  E :                        on_instruction);
2083  E :    Disassembler::WalkResult result = disasm.Walk();
2084    :  
2085    :    // If we're strict (that is, we're confident that the block was produced by
2086    :    // cl.exe), then we can use that knowledge to look for calls that appear to be
2087    :    // to non-returning functions that we may not have symbol info for.
2088  E :    if (be_strict_with_current_block_)
2089  E :      LookForNonReturningFunctions(references_, *image_, current_block_, disasm);
2090    :  
2091  E :    DCHECK_EQ(block, current_block_);
2092  E :    current_block_ = NULL;
2093  E :    be_strict_with_current_block_ = true;
2094    :  
2095  E :    switch (result) {
2096    :      case Disassembler::kWalkIncomplete:
2097    :        // There were computed branches that couldn't be chased down.
2098  E :        block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2099  E :        return true;
2100    :  
2101    :      case Disassembler::kWalkTerminated:
2102    :        // This exit condition should only ever occur for non-strict disassembly.
2103    :        // If strict, we should always get kWalkError.
2104  E :        DCHECK(!strict);
2105    :        // This means that they code was malformed, or broke some expected
2106    :        // conventions. This code is not safe for basic block disassembly.
2107  E :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2108  E :        return true;
2109    :  
2110    :      case Disassembler::kWalkSuccess:
2111    :        // Were any bytes in the block not accounted for? This generally means
2112    :        // unreachable code, which we see quite often, especially in debug builds.
2113  E :        if (disasm.code_size() != disasm.disassembled_bytes())
2114  E :          block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2115  E :        return true;
2116    :  
2117    :      case Disassembler::kWalkError:
2118  i :        return false;
2119    :  
2120    :      default:
2121  i :        NOTREACHED() << "Unhandled Disassembler WalkResult.";
2122  i :        return false;
2123    :    }
2124  E :  }
2125    :  
2126    :  BlockGraph::Block* Decomposer::CreateBlock(BlockGraph::BlockType type,
2127    :                                             RelativeAddress address,
2128    :                                             BlockGraph::Size size,
2129  E :                                             const base::StringPiece& name) {
2130  E :    BlockGraph::Block* block = image_->AddBlock(type, address, size, name);
2131  E :    if (block == NULL) {
2132  i :      LOG(ERROR) << "Unable to add block at " << address << " with size "
2133    :                 << size << ".";
2134  i :      return NULL;
2135    :    }
2136    :  
2137    :    // Mark the source range from whence this block originates.
2138    :    bool pushed = block->source_ranges().Push(
2139    :        BlockGraph::Block::DataRange(0, size),
2140  E :        BlockGraph::Block::SourceRange(address, size));
2141  E :    DCHECK(pushed);
2142    :  
2143  E :    BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2144  E :    if (section == BlockGraph::kInvalidSectionId) {
2145  i :      LOG(ERROR) << "Block at " << address << " with size " << size
2146    :                 << " lies outside of all sections.";
2147  i :      return NULL;
2148    :    }
2149  E :    block->set_section(section);
2150    :  
2151  E :    const uint8* data = image_file_.GetImageData(address, size);
2152  E :    if (data != NULL)
2153  E :      block->SetData(data, size);
2154    :  
2155  E :    return block;
2156  E :  }
2157    :  
2158    :  BlockGraph::Block* Decomposer::FindOrCreateBlock(
2159    :      BlockGraph::BlockType type,
2160    :      RelativeAddress addr,
2161    :      BlockGraph::Size size,
2162    :      const base::StringPiece& name,
2163  E :      FindOrCreateBlockDirective directive) {
2164  E :    BlockGraph::Block* block = image_->GetBlockByAddress(addr);
2165  E :    if (block != NULL) {
2166    :      // Always allow collisions where the new block is a proper subset of
2167    :      // an existing PE parsed block. The PE parser often knows more than we do
2168    :      // about blocks that need to stick together.
2169  E :      if (block->attributes() & BlockGraph::PE_PARSED)
2170  E :        directive = kAllowCoveringBlock;
2171    :  
2172  E :      bool collision = false;
2173  E :      switch (directive) {
2174    :        case kExpectNoBlock: {
2175  i :          collision = true;
2176  i :          break;
2177    :        }
2178    :        case kAllowIdenticalBlock: {
2179  i :          collision = (block->addr() != addr || block->size() != size);
2180  i :          break;
2181    :        }
2182    :        default: {
2183  E :          DCHECK(directive == kAllowCoveringBlock);
2184    :          collision = block->addr() > addr ||
2185  E :              (block->addr() + block->size()) < addr + size;
2186    :          break;
2187    :        }
2188    :      }
2189    :  
2190  E :      if (collision) {
2191  i :        LOG(ERROR) << "Block collision for function at "
2192    :                   << addr.value() << "(" << size << ") with " << block->name();
2193  i :        return NULL;
2194    :      }
2195    :  
2196  E :      return block;
2197    :    }
2198  E :    DCHECK(block == NULL);
2199    :  
2200  E :    return CreateBlock(type, addr, size, name);
2201  E :  }
2202    :  
2203    :  CallbackDirective Decomposer::LookPastInstructionForData(
2204  E :      RelativeAddress instr_end) {
2205    :    // If this instruction terminates at a data boundary (ie: the *next*
2206    :    // instruction will be data or a reloc), we can be certain that a new
2207    :    // lookup table is starting at this address.
2208  E :    if (reloc_set_.find(instr_end) == reloc_set_.end())
2209  E :      return Disassembler::kDirectiveContinue;
2210    :  
2211    :    // Find the block housing the reloc. We expect the reloc to be contained
2212    :    // completely within this block.
2213  E :    BlockGraph::Block* block = image_->GetContainingBlock(instr_end, 4);
2214  E :    if (block != current_block_) {
2215  i :      CHECK(block != NULL);
2216  i :      LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2217    :          << "Found an instruction/data boundary between blocks: "
2218    :          << current_block_->name() << " and " << block->name();
2219  i :      return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2220    :    }
2221    :  
2222  E :    BlockGraph::Offset offset = instr_end - block->addr();
2223    :  
2224    :    // We expect there to be a jump-table data label already.
2225  E :    BlockGraph::Label label;
2226  E :    bool have_label = block->GetLabel(offset, &label);
2227    :    if (!have_label || !label.has_attributes(
2228  E :            BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)) {
2229  i :      LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2230    :          << "Expected there to be a data label marking the jump "
2231    :          << "table at " << block->name() << " + " << offset << ".";
2232    :  
2233    :      // If we're in strict mode, we're a block that obeys standard conventions.
2234    :      // Which means we should already be aware of any jump tables in this block.
2235  i :      if (be_strict_with_current_block_)
2236  i :        return Disassembler::kDirectiveAbort;
2237    :  
2238    :      // If we're not in strict mode, add the jump-table label.
2239  i :      if (have_label) {
2240  i :        CHECK(block->RemoveLabel(offset));
2241    :      }
2242    :  
2243    :      CHECK(block->SetLabel(offset, BlockGraph::Label(
2244    :          base::StringPrintf("<JUMP-TABLE-%d>", offset),
2245  i :          BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)));
2246    :    }
2247    :  
2248  E :    return Disassembler::kDirectiveTerminatePath;
2249  E :  }
2250    :  
2251  i :  void Decomposer::MarkDisassembledPastEnd() {
2252    :    static size_t count = 0;
2253  i :    DCHECK(current_block_ != NULL);
2254  i :    current_block_->set_attribute(BlockGraph::DISASSEMBLED_PAST_END);
2255    :    // TODO(chrisha): The entire "disassembled past end" and non-returning
2256    :    //     function infrastructure can be ripped out once we rework the BB
2257    :    //     disassembler to be straight path, and remove the disassembly phase
2258    :    //     from the decomposer (where it's no longer needed). In the meantime
2259    :    //     we simply crank down this log verbosity due to all of the false
2260    :    //     positives.
2261  i :    VLOG(1) << "Disassembled past end of block or into known data for block \""
2262    :            << current_block_->name() << "\" at " << current_block_->addr()
2263    :            << ".";
2264  i :  }
2265    :  
2266    :  CallbackDirective Decomposer::VisitNonFlowControlInstruction(
2267  E :      RelativeAddress instr_start, RelativeAddress instr_end) {
2268    :    // TODO(chrisha): We could walk the operands and follow references
2269    :    //     explicitly. If any of them are of reference type and there's no
2270    :    //     matching reference, this would be cause to blow up and die (we
2271    :    //     should get all of these as relocs and/or fixups).
2272    :  
2273    :    IntermediateReferenceMap::const_iterator ref_it =
2274  E :        references_.upper_bound(instr_start);
2275    :    IntermediateReferenceMap::const_iterator ref_end =
2276  E :        references_.lower_bound(instr_end);
2277    :  
2278  E :    for (; ref_it != ref_end; ++ref_it) {
2279    :      BlockGraph::Block* ref_block = image_->GetContainingBlock(
2280  E :          ref_it->second.base, 1);
2281  E :      DCHECK(ref_block != NULL);
2282    :  
2283    :      // This is an inter-block reference.
2284  E :      if (ref_block != current_block_) {
2285    :        // There should be no cross-block references to the middle of other
2286    :        // code blocks (to the top is fine, as we could be passing around a
2287    :        // function pointer). The exception is if the remote block is not
2288    :        // generated by cl.exe. In this case, there could be arbitrary labels
2289    :        // that act like functions within the body of that block, and referring
2290    :        // to them is perfectly fine.
2291    :        if (ref_block->type() == BlockGraph::CODE_BLOCK &&
2292    :            ref_it->second.base != ref_block->addr() &&
2293  E :            block_graph::CodeBlockAttributesAreBasicBlockSafe(ref_block)) {
2294  E :          LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2295    :              << "Found a non-control-flow code-block to middle-of-code-block "
2296    :              << "reference from block \"" << current_block_->name()
2297    :              << "\" to block \"" << ref_block->name() << "\".";
2298  E :          return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2299    :        }
2300  E :      } else {
2301    :        // This is an intra-block reference.
2302    :        BlockGraph::Offset ref_offset =
2303  E :            ref_it->second.base - current_block_->addr();
2304    :  
2305    :        // If this is to offset zero, we assume we are taking a pointer to
2306    :        // ourself, which is safe.
2307  E :        if (ref_offset != 0) {
2308    :          // If this is 'clean' code it should be to data, and there should be a
2309    :          // label.
2310  E :          BlockGraph::Label label;
2311  E :          if (!current_block_->GetLabel(ref_offset, &label)) {
2312  i :            LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2313    :                << "Found an intra-block data-reference with no label.";
2314  i :            return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2315    :          } else {
2316    :            if (!label.has_attributes(BlockGraph::DATA_LABEL) ||
2317  E :                label.has_attributes(BlockGraph::CODE_LABEL)) {
2318  E :              LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2319    :                  << "Found an intra-block data-like reference to a non-data "
2320    :                  << "or code label in block \"" << current_block_->name()
2321    :                  << "\".";
2322  E :              return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2323    :            }
2324    :          }
2325  E :        }
2326    :      }
2327  E :    }
2328    :  
2329  E :    return Disassembler::kDirectiveContinue;
2330  E :  }
2331    :  
2332    :  CallbackDirective Decomposer::VisitPcRelativeFlowControlInstruction(
2333    :      AbsoluteAddress instr_abs,
2334    :      RelativeAddress instr_rel,
2335    :      const _DInst& instruction,
2336  E :      bool end_of_code) {
2337  E :    int fc = META_GET_FC(instruction.meta);
2338  E :    DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
2339  E :    DCHECK_EQ(O_PC, instruction.ops[0].type);
2340  E :    DCHECK_EQ(O_NONE, instruction.ops[1].type);
2341  E :    DCHECK_EQ(O_NONE, instruction.ops[2].type);
2342  E :    DCHECK_EQ(O_NONE, instruction.ops[3].type);
2343    :    DCHECK(instruction.ops[0].size == 8 ||
2344    :        instruction.ops[0].size == 16 ||
2345  E :        instruction.ops[0].size == 32);
2346    :    // Distorm gives us size in bits, we want bytes.
2347  E :    BlockGraph::Size size = instruction.ops[0].size / 8;
2348    :  
2349    :    // Get the reference's address. Note we assume it's in the instruction's
2350    :    // tail end - I don't know of a case where a PC-relative offset in a branch
2351    :    // or call is not the very last thing in an x86 instruction.
2352  E :    AbsoluteAddress abs_src = instr_abs + instruction.size - size;
2353    :    AbsoluteAddress abs_dst = instr_abs + instruction.size +
2354  E :        static_cast<size_t>(instruction.imm.addr);
2355    :  
2356  E :    RelativeAddress src, dst;
2357    :    if (!image_file_.Translate(abs_src, &src) ||
2358  E :        !image_file_.Translate(abs_dst, &dst)) {
2359  i :      LOG(ERROR) << "Unable to translate absolute to relative addresses.";
2360  i :      return Disassembler::kDirectiveAbort;
2361    :    }
2362    :  
2363    :    // Get the block associated with the destination address. It must exist
2364    :    // and be a code block.
2365  E :    BlockGraph::Block* block = image_->GetContainingBlock(dst, 1);
2366  E :    DCHECK(block != NULL);
2367  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
2368    :  
2369    :    // For short references, we should not see a fixup.
2370  E :    ValidateOrAddReferenceMode mode = FIXUP_MUST_NOT_EXIST;
2371  E :    if (size == kPointerSize) {
2372    :      // Long PC_RELATIVE reference within a single block? FIXUPs aren't
2373    :      // strictly necessary.
2374  E :      if (block->Contains(src, kPointerSize))
2375  E :        mode = FIXUP_MAY_EXIST;
2376  E :      else
2377    :        // But if they're between blocks (section contributions), we expect to
2378    :        // find them.
2379  E :        mode = FIXUP_MUST_EXIST;
2380  E :    } else {
2381    :      // Since we slice by section contributions we no longer see short
2382    :      // references across blocks. If we do, bail!
2383  E :      if (block != current_block_) {
2384  i :        LOG(ERROR) << "Found a short PC-relative reference out of block \""
2385    :                   << current_block_->name() << "\".";
2386  i :        return Disassembler::kDirectiveAbort;
2387    :      }
2388    :    }
2389    :  
2390    :    // Validate or create the reference, as necessary.
2391    :    if (!ValidateOrAddReference(mode, src, BlockGraph::PC_RELATIVE_REF, size,
2392  E :                                dst, 0, &fixup_map_, &references_)) {
2393  i :      LOG(ERROR) << "Failed to validate/create reference originating from "
2394    :                 << "block \"" << current_block_->name() << "\".";
2395  i :      return Disassembler::kDirectiveAbort;
2396    :    }
2397    :  
2398    :    // If this is a call and the destination is a non-returning function,
2399    :    // then indicate that we should terminate this disassembly path.
2400    :    if (fc == FC_CALL &&
2401  E :        (block->attributes() & BlockGraph::NON_RETURN_FUNCTION)) {
2402    :      // TODO(chrisha): For now, we enforce that the call be to the beginning
2403    :      //    of the function. This may not be necessary, but better safe than
2404    :      //    sorry for now.
2405  E :      if (block->addr() != dst) {
2406  i :        LOG(ERROR) << "Calling inside the body of a non-returning function: "
2407    :                   << block->name();
2408  i :        return Disassembler::kDirectiveAbort;
2409    :      }
2410    :  
2411  E :      return Disassembler::kDirectiveTerminatePath;
2412    :    }
2413    :  
2414    :    // If we get here, then we don't think it's a non-returning call. If it's
2415    :    // not an unconditional jump and we're at the end of the code for this block
2416    :    // then we consider this as disassembling past the end.
2417  E :    if (fc != FC_UNC_BRANCH && end_of_code)
2418  i :      MarkDisassembledPastEnd();
2419    :  
2420  E :    return Disassembler::kDirectiveContinue;
2421  E :  }
2422    :  
2423    :  CallbackDirective Decomposer::VisitIndirectMemoryCallInstruction(
2424  E :        const _DInst& instruction, bool end_of_code) {
2425  E :    DCHECK_EQ(FC_CALL, META_GET_FC(instruction.meta));
2426  E :    DCHECK_EQ(O_DISP, instruction.ops[0].type);
2427    :  
2428  E :    AbsoluteAddress disp_addr_abs(static_cast<uint32>(instruction.disp));
2429  E :    RelativeAddress disp_addr_rel;
2430  E :    if (!image_file_.Translate(disp_addr_abs, &disp_addr_rel)) {
2431  i :      LOG(ERROR) << "Unable to translate call address.";
2432  i :      return Disassembler::kDirectiveAbort;
2433    :    }
2434    :  
2435    :    // Try to dereference the address of the call instruction. This can fail
2436    :    // for blocks that are only initialized at runtime, so we don't fail if
2437    :    // we don't find a reference.
2438    :    IntermediateReferenceMap::const_iterator ref_it =
2439  E :        references_.find(disp_addr_rel);
2440  E :    if (ref_it == references_.end())
2441  E :      return Disassembler::kDirectiveContinue;
2442    :  
2443    :    // NOTE: This process derails for bound import tables. In this case the
2444    :    //     attempted dereference above will fail, but we could still actually
2445    :    //     find the import name thunk by inspecting the offset of the memory
2446    :    //     location.
2447    :  
2448    :    // The reference must be direct and 32-bit.
2449  E :    const IntermediateReference& ref = ref_it->second;
2450  E :    DCHECK_EQ(BlockGraph::Reference::kMaximumSize, ref.size);
2451  E :    DCHECK_EQ(0, ref.offset);
2452    :  
2453    :    // Look up the thunk this refers to.
2454  E :    BlockGraph::Block* thunk = image_->GetBlockByAddress(ref.base);
2455  E :    if (thunk == NULL) {
2456  i :      LOG(ERROR) << "Unable to dereference intermediate reference at "
2457    :                 << disp_addr_rel << " to " << ref.base << ".";
2458  i :      return Disassembler::kDirectiveAbort;
2459    :    }
2460    :  
2461  E :    if (ref.type == BlockGraph::RELATIVE_REF) {
2462    :      // If this is a relative reference it must be part of an import address
2463    :      // table (during runtime this address would be patched up with an absolute
2464    :      // reference). Thus we expect the referenced block to be data, an import
2465    :      // name thunk.
2466  E :      DCHECK_EQ(BlockGraph::DATA_BLOCK, thunk->type());
2467  E :    } else {
2468    :      // If this is an absolute address it should actually point directly to
2469    :      // code.
2470  E :      DCHECK_EQ(BlockGraph::ABSOLUTE_REF, ref.type);
2471  E :      DCHECK_EQ(BlockGraph::CODE_BLOCK, thunk->type());
2472    :    }
2473    :  
2474    :    // Either way, if the block is non-returning we terminate this path of
2475    :    // disassembly.
2476  E :    if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
2477  E :      return Disassembler::kDirectiveTerminatePath;
2478    :  
2479  E :    if (end_of_code)
2480  i :      MarkDisassembledPastEnd();
2481    :  
2482  E :    return Disassembler::kDirectiveContinue;
2483  E :  }
2484    :  
2485    :  CallbackDirective Decomposer::OnInstruction(const Disassembler& walker,
2486  E :                                              const _DInst& instruction) {
2487    :    // Get the relative address of this instruction.
2488  E :    AbsoluteAddress instr_abs(static_cast<uint32>(instruction.addr));
2489  E :    RelativeAddress instr_rel;
2490  E :    if (!image_file_.Translate(instr_abs, &instr_rel)) {
2491  i :      LOG(ERROR) << "Unable to translate instruction address.";
2492  i :      return Disassembler::kDirectiveAbort;
2493    :    }
2494  E :    RelativeAddress after_instr_rel = instr_rel + instruction.size;
2495    :  
2496    :  #ifndef NDEBUG
2497    :    // If we're in debug mode, it's helpful to have a pointer directly to the
2498    :    // beginning of this instruction in memory.
2499  E :    BlockGraph::Offset instr_offset = instr_rel - current_block_->addr();
2500  E :    const uint8* instr_data = current_block_->data() + instr_offset;
2501    :  #endif
2502    :  
2503    :    // TODO(chrisha): Certain instructions require aligned data (ie: MMX/SSE
2504    :    //     instructions). We need to follow the data that these instructions
2505    :    //     refer to, and set their alignment appropriately. For now, alignment
2506    :    //     is simply preserved from the original image.
2507    :  
2508  E :    CallbackDirective directive = LookPastInstructionForData(after_instr_rel);
2509  E :    if (IsFatalCallbackDirective(directive))
2510  i :      return directive;
2511    :  
2512    :    // We're at the end of code in this block if we encountered data, or this is
2513    :    // the last intruction to be processed.
2514  E :    RelativeAddress block_end(current_block_->addr() + current_block_->size());
2515    :    bool end_of_code = (directive == Disassembler::kDirectiveTerminatePath) ||
2516  E :        (after_instr_rel >= block_end);
2517    :  
2518  E :    int fc = META_GET_FC(instruction.meta);
2519    :  
2520  E :    if (fc == FC_NONE) {
2521    :      // There's no control flow and we're at the end of the block. Mark the
2522    :      // block as dirty.
2523  E :      if (end_of_code)
2524  i :        MarkDisassembledPastEnd();
2525    :  
2526    :      return CombineCallbackDirectives(directive,
2527  E :          VisitNonFlowControlInstruction(instr_rel, after_instr_rel));
2528    :    }
2529    :  
2530    :    if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
2531  E :        instruction.ops[0].type == O_PC) {
2532    :      // For all branches, calls and conditional branches to PC-relative
2533    :      // addresses, record a PC-relative reference.
2534    :      return CombineCallbackDirectives(directive,
2535    :          VisitPcRelativeFlowControlInstruction(instr_abs,
2536    :                                                instr_rel,
2537    :                                                instruction,
2538  E :                                                end_of_code));
2539    :    }
2540    :  
2541    :    // We explicitly handle indirect memory call instructions. These can often
2542    :    // be tracked down as pointing to a block in this image, or to an import
2543    :    // name thunk from another module.
2544  E :    if (fc == FC_CALL && instruction.ops[0].type == O_DISP) {
2545    :      return CombineCallbackDirectives(directive,
2546  E :          VisitIndirectMemoryCallInstruction(instruction, end_of_code));
2547    :    }
2548    :  
2549    :    // Look out for blocks where disassembly seems to run off the end of the
2550    :    // block. We do not treat interrupts as flow control as execution can
2551    :    // continue past the interrupt.
2552  E :    if (fc != FC_RET && fc != FC_UNC_BRANCH && end_of_code)
2553  i :      MarkDisassembledPastEnd();
2554    :  
2555  E :    return directive;
2556  E :  }
2557    :  
2558    :  bool Decomposer::CreatePEImageBlocksAndReferences(
2559  E :      PEFileParser::PEHeader* header) {
2560    :    PEFileParser::AddReferenceCallback add_reference(
2561  E :        base::Bind(&Decomposer::AddReferenceCallback, base::Unretained(this)));
2562  E :    PEFileParser parser(image_file_, image_, add_reference);
2563    :    parser.set_on_import_thunk(
2564  E :        base::Bind(&Decomposer::OnImportThunkCallback, base::Unretained(this)));
2565    :  
2566  E :    if (!parser.ParseImage(header)) {
2567  i :      LOG(ERROR) << "Unable to parse PE image.";
2568  i :      return false;
2569    :    }
2570    :  
2571  E :    return true;
2572  E :  }
2573    :  
2574  E :  bool Decomposer::FinalizeIntermediateReferences() {
2575  E :    IntermediateReferenceMap::const_iterator it(references_.begin());
2576  E :    IntermediateReferenceMap::const_iterator end(references_.end());
2577    :  
2578  E :    for (; it != end; ++it) {
2579  E :      RelativeAddress src_addr(it->first);
2580  E :      BlockGraph::Block* src = image_->GetBlockByAddress(src_addr);
2581  E :      RelativeAddress dst_base_addr(it->second.base);
2582  E :      RelativeAddress dst_addr(dst_base_addr + it->second.offset);
2583  E :      BlockGraph::Block* dst = image_->GetBlockByAddress(dst_base_addr);
2584    :  
2585  E :      if (src == NULL || dst == NULL) {
2586  i :        LOG(ERROR) << "Reference source or base destination address is out of "
2587    :                   << "range, src: " << src << ", dst: " << dst;
2588  i :        return false;
2589    :      }
2590    :  
2591  E :      RelativeAddress src_start = src->addr();
2592  E :      RelativeAddress dst_start = dst->addr();
2593    :  
2594    :      // Get the offset of the ultimate destination relative to the start of the
2595    :      // destination block.
2596  E :      BlockGraph::Offset dst_offset = dst_addr - dst_start;
2597    :  
2598    :      // Get the offset of the actual referenced object relative to the start of
2599    :      // the destination block.
2600  E :      BlockGraph::Offset dst_base = dst_base_addr - dst_start;
2601    :  
2602    :      BlockGraph::Reference ref(it->second.type,
2603    :                                it->second.size,
2604    :                                dst,
2605    :                                dst_offset,
2606  E :                                dst_base);
2607  E :      src->SetReference(src_addr - src_start, ref);
2608  E :    }
2609    :  
2610  E :    references_.clear();
2611    :  
2612  E :    return true;
2613  E :  }
2614    :  
2615  E :  bool Decomposer::ConfirmFixupsVisited() const {
2616  E :    bool success = true;
2617    :  
2618    :    // Ideally, all fixups should have been visited during decomposition.
2619    :    // TODO(chrisha): Address the root problems underlying the following
2620    :    //     temporary fix.
2621  E :    FixupMap::const_iterator fixup_it = fixup_map_.begin();
2622  E :    for (; fixup_it != fixup_map_.end(); ++fixup_it) {
2623  E :      if (fixup_it->second.visited)
2624  E :        continue;
2625    :  
2626    :      const BlockGraph::Block* block =
2627  E :          image_->GetContainingBlock(fixup_it->first, kPointerSize);
2628  E :      DCHECK(block != NULL);
2629    :  
2630    :      // We know that we currently do not have full disassembly coverage as there
2631    :      // are several orphaned pieces of apparently unreachable code in the CRT
2632    :      // that we do not disassemble, but which may contain jmp or call commands.
2633    :      // Thus, we expect that missed fixups are all PC-relative and lie within
2634    :      // code blocks.
2635    :      if (block->type() == BlockGraph::CODE_BLOCK &&
2636  E :          fixup_it->second.type == BlockGraph::PC_RELATIVE_REF)
2637  E :        continue;
2638    :  
2639  i :      success = false;
2640  i :      LOG(ERROR) << "Unexpected unseen fixup at " << fixup_it->second.location;
2641  i :    }
2642    :  
2643  E :    return success;
2644  E :  }
2645    :  
2646  E :  bool Decomposer::FindPaddingBlocks() {
2647  E :    DCHECK(image_ != NULL);
2648  E :    DCHECK(image_->graph() != NULL);
2649    :  
2650    :    BlockGraph::BlockMap::iterator block_it =
2651  E :        image_->graph()->blocks_mutable().begin();
2652  E :    for (; block_it != image_->graph()->blocks_mutable().end(); ++block_it) {
2653  E :      BlockGraph::Block& block = block_it->second;
2654    :  
2655    :      // Padding blocks must not have any symbol information: no labels,
2656    :      // no references, no referrers, and they must be a gap block.
2657    :      if (block.labels().size() != 0 ||
2658    :          block.references().size() != 0 ||
2659    :          block.referrers().size() != 0 ||
2660  E :          (block.attributes() & BlockGraph::GAP_BLOCK) == 0)
2661  E :        continue;
2662    :  
2663  E :      switch (block.type()) {
2664    :        // Code blocks should be fully defined and consist of only int3s.
2665    :        case BlockGraph::CODE_BLOCK: {
2666    :          if (block.data_size() != block.size() ||
2667  E :              RepeatedValue(block.data(), block.data_size()) != kInt3)
2668  i :            continue;
2669  E :          break;
2670    :        }
2671    :  
2672    :        // Data blocks should be uninitialized or have fully defined data
2673    :        // consisting only of zeros.
2674    :        default: {
2675  E :          DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
2676  E :          if (block.data_size() == 0)  // Uninitialized data blocks are padding.
2677  E :            break;
2678    :          if (block.data_size() != block.size() ||
2679  E :              RepeatedValue(block.data(), block.data_size()) != 0)
2680  i :            continue;
2681    :        }
2682    :      }
2683    :  
2684    :      // If we fall through to this point, then the block is a padding block.
2685  E :      block.set_attribute(BlockGraph::PADDING_BLOCK);
2686  E :    }
2687    :  
2688  E :    return true;
2689  E :  }
2690    :  
2691  E :  bool Decomposer::CreateSections() {
2692    :    // Iterate through the image sections, and create sections in the BlockGraph.
2693  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2694  E :    for (size_t i = 0; i < num_sections; ++i) {
2695  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2696  E :      std::string name = pe::PEFile::GetSectionName(*header);
2697    :      BlockGraph::Section* section = image_->graph()->AddSection(
2698  E :          name, header->Characteristics);
2699  E :      DCHECK(section != NULL);
2700    :  
2701    :      // For now, we expect them to have been created with the same IDs as those
2702    :      // in the original image.
2703  E :      if (section->id() != i) {
2704  i :        LOG(ERROR) << "Unexpected section ID.";
2705  i :        return false;
2706    :      }
2707  E :    }
2708    :  
2709  E :    return true;
2710  E :  }
2711    :  
2712  E :  bool Decomposer::LoadDebugStreams(IDiaSession* dia_session) {
2713  E :    DCHECK(dia_session != NULL);
2714    :  
2715    :    // Load the fixups. These must exist.
2716  E :    PdbFixups pdb_fixups;
2717    :    SearchResult search_result = FindAndLoadDiaDebugStreamByName(
2718  E :        kFixupDiaDebugStreamName, dia_session, &pdb_fixups);
2719  E :    if (search_result != kSearchSucceeded) {
2720  i :      if (search_result == kSearchFailed) {
2721  i :        LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
2722    :                      "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
2723    :      }
2724  i :      return false;
2725    :    }
2726    :  
2727    :    // Load the omap_from table. It is not necessary that one exist.
2728  E :    std::vector<OMAP> omap_from;
2729    :    search_result = FindAndLoadDiaDebugStreamByName(
2730  E :        kOmapFromDiaDebugStreamName, dia_session, &omap_from);
2731  E :    if (search_result == kSearchErrored)
2732  i :      return false;
2733    :  
2734    :    // Translate and validate fixups.
2735  E :    if (!OmapAndValidateFixups(omap_from, pdb_fixups))
2736  i :      return false;
2737    :  
2738  E :    return true;
2739  E :  }
2740    :  
2741    :  bool Decomposer::OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
2742  E :                                         const PdbFixups& pdb_fixups) {
2743  E :    bool have_omap = omap_from.size() != 0;
2744    :  
2745    :    // The resource section in Chrome is modified post-link by a tool that adds a
2746    :    // manifest to it. This causes all of the fixups in the resource section (and
2747    :    // anything beyond it) to be invalid. As long as the resource section is the
2748    :    // last section in the image, this is not a problem (we can safely ignore the
2749    :    // .rsrc fixups, which we know how to parse without them). However, if there
2750    :    // is a section after the resource section, things will have been shifted
2751    :    // and potentially crucial fixups will be invalid.
2752  E :    RelativeAddress rsrc_start(0xffffffff), max_start;
2753  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2754  E :    for (size_t i = 0; i < num_sections; ++i) {
2755  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2756  E :      RelativeAddress start(header->VirtualAddress);
2757  E :      if (start > max_start)
2758  E :        max_start = start;
2759    :      if (strncmp(kResourceSectionName,
2760    :                  reinterpret_cast<const char*>(header->Name),
2761  E :                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
2762  E :        rsrc_start = start;
2763  E :        break;
2764    :      }
2765  E :    }
2766    :  
2767    :    // Ensure there are no sections after the resource section.
2768  E :    if (max_start > rsrc_start) {
2769  i :      LOG(ERROR) << kResourceSectionName << " section is not the last section.";
2770  i :      return false;
2771    :    }
2772    :  
2773    :    // Ensure the fixups are all valid, and populate the fixup map.
2774  E :    size_t skipped = 0;
2775  E :    for (size_t i = 0; i < pdb_fixups.size(); ++i) {
2776  E :      if (!pdb_fixups[i].ValidHeader()) {
2777  i :        LOG(ERROR) << "Unknown fixup header: "
2778    :                   << StringPrintf("0x%08X.", pdb_fixups[i].header);
2779  i :        return false;
2780    :      }
2781    :  
2782    :      // For now, we skip any offset fixups. We've only seen this in the context
2783    :      // of TLS data access, and we don't mess with TLS structures.
2784  E :      if (pdb_fixups[i].is_offset())
2785  E :        continue;
2786    :  
2787    :      // All fixups we handle should be full size pointers.
2788  E :      DCHECK_EQ(kPointerSize, pdb_fixups[i].size());
2789    :  
2790    :      // Get the original addresses, and map them through OMAP information.
2791    :      // Normally DIA takes care of this for us, but there is no API for
2792    :      // getting DIA to give us FIXUP information, so we have to do it manually.
2793  E :      RelativeAddress rva_location(pdb_fixups[i].rva_location);
2794  E :      RelativeAddress rva_base(pdb_fixups[i].rva_base);
2795  E :      if (have_omap) {
2796  i :        rva_location = pdb::TranslateAddressViaOmap(omap_from, rva_location);
2797  i :        rva_base = pdb::TranslateAddressViaOmap(omap_from, rva_base);
2798    :      }
2799    :  
2800    :      // If these are part of the .rsrc section, ignore them.
2801  E :      if (rva_location >= rsrc_start)
2802  i :        continue;
2803    :  
2804    :      // Ensure they live within the image, and refer to things within the
2805    :      // image.
2806    :      if (!image_file_.Contains(rva_location, kPointerSize) ||
2807  E :          !image_file_.Contains(rva_base, 1)) {
2808  i :        LOG(ERROR) << "Fixup refers to addresses outside of image.";
2809  i :        return false;
2810    :      }
2811    :  
2812    :      // Add the fix up, and ensure the source address is unique.
2813  E :      Fixup fixup = { PdbFixupTypeToReferenceType(pdb_fixups[i].type),
2814  E :                      pdb_fixups[i].refers_to_code(),
2815  E :                      pdb_fixups[i].is_data(),
2816  E :                      false,
2817  E :                      rva_location,
2818  E :                      rva_base };
2819  E :      bool added = fixup_map_.insert(std::make_pair(rva_location, fixup)).second;
2820  E :      if (!added) {
2821  i :        LOG(ERROR) << "Colliding fixups at " << rva_location;
2822  i :        return false;
2823    :      }
2824  E :    }
2825    :  
2826  E :    return true;
2827  E :  }
2828    :  
2829    :  bool Decomposer::RegisterStaticInitializerPatterns(
2830  E :      const base::StringPiece& begin, const base::StringPiece& end) {
2831    :    // Ensuring the patterns each have exactly one capturing group.
2832    :    REPair re_pair = std::make_pair(RE(begin.as_string()),
2833  E :                                    RE(end.as_string()));
2834    :    if (re_pair.first.NumberOfCapturingGroups() != 1 ||
2835  E :        re_pair.second.NumberOfCapturingGroups() != 1)
2836  i :      return false;
2837    :  
2838  E :    static_initializer_patterns_.push_back(re_pair);
2839    :  
2840  E :    return true;
2841  E :  }
2842    :  
2843    :  bool Decomposer::RegisterNonReturningFunction(
2844  E :      const base::StringPiece& function_name) {
2845  E :    return non_returning_functions_.insert(function_name.as_string()).second;
2846  E :  }
2847    :  
2848    :  bool Decomposer::RegisterNonReturningImport(
2849    :      const base::StringPiece& module_name,
2850  E :      const base::StringPiece& function_name) {
2851  E :    StringSet& module_set = non_returning_imports_[module_name.as_string()];
2852  E :    return module_set.insert(function_name.as_string()).second;
2853  E :  }
2854    :  
2855    :  bool Decomposer::LoadBlockGraphFromPdbStream(const PEFile& image_file,
2856    :                                               pdb::PdbStream* block_graph_stream,
2857  E :                                               ImageLayout* image_layout) {
2858  E :    DCHECK(block_graph_stream != NULL);
2859  E :    DCHECK(image_layout != NULL);
2860  E :    LOG(INFO) << "Reading block-graph and image layout from the PDB.";
2861    :  
2862    :    // Initialize an input archive pointing to the stream.
2863  E :    scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
2864  E :    if (!byte_stream->Init(block_graph_stream))
2865  i :      return false;
2866  E :    DCHECK(byte_stream.get() != NULL);
2867    :  
2868  E :    core::ScopedInStreamPtr pdb_in_stream;
2869    :    pdb_in_stream.reset(core::CreateByteInStream(
2870  E :        byte_stream->data(), byte_stream->data() + byte_stream->length()));
2871    :  
2872    :    // Read the header.
2873  E :    uint32 stream_version = 0;
2874  E :    unsigned char compressed = 0;
2875    :    if (!pdb_in_stream->Read(sizeof(stream_version),
2876    :                             reinterpret_cast<core::Byte*>(&stream_version)) ||
2877    :        !pdb_in_stream->Read(sizeof(compressed),
2878  E :                             reinterpret_cast<core::Byte*>(&compressed))) {
2879  i :      LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
2880  i :      return false;
2881    :    }
2882    :  
2883    :    // Check the stream version.
2884  E :    if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
2885  E :      LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
2886    :                 << " version (got " << stream_version << ", expected "
2887    :                 << pdb::kSyzygyBlockGraphStreamVersion << ").";
2888  E :      return false;
2889    :    }
2890    :  
2891    :    // If the stream is compressed insert the decompression filter.
2892  E :    core::InStream* in_stream = pdb_in_stream.get();
2893  E :    scoped_ptr<core::ZInStream> zip_in_stream;
2894  E :    if (compressed != 0) {
2895  E :      zip_in_stream.reset(new core::ZInStream(in_stream));
2896  E :      if (!zip_in_stream->Init()) {
2897  i :        LOG(ERROR) << "Unable to initialize ZInStream.";
2898  i :        return false;
2899    :      }
2900  E :      in_stream = zip_in_stream.get();
2901    :    }
2902    :  
2903    :    // Deserialize the image-layout.
2904  E :    core::NativeBinaryInArchive in_archive(in_stream);
2905  E :    block_graph::BlockGraphSerializer::Attributes attributes = 0;
2906    :    if (!LoadBlockGraphAndImageLayout(
2907  E :        image_file, &attributes, image_layout, &in_archive)) {
2908  i :      LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
2909  i :      return false;
2910    :    }
2911    :  
2912  E :    return true;
2913  E :  }
2914    :  
2915    :  bool Decomposer::LoadBlockGraphFromPdb(const FilePath& pdb_path,
2916    :                                         const PEFile& image_file,
2917    :                                         ImageLayout* image_layout,
2918  E :                                         bool* stream_exists) {
2919  E :    DCHECK(image_layout != NULL);
2920  E :    DCHECK(stream_exists != NULL);
2921    :  
2922  E :    pdb::PdbFile pdb_file;
2923  E :    pdb::PdbReader pdb_reader;
2924  E :    if (!pdb_reader.Read(pdb_path, &pdb_file)) {
2925  i :      LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
2926    :                 << "\".";
2927  i :      return NULL;
2928    :    }
2929    :  
2930    :    // Try to get the block-graph stream from the PDB.
2931    :    scoped_refptr<pdb::PdbStream> block_graph_stream =
2932  E :        GetBlockGraphStreamFromPdb(&pdb_file);
2933  E :    if (block_graph_stream.get() == NULL) {
2934  E :      *stream_exists = false;
2935  E :      return false;
2936    :    }
2937    :  
2938    :    // The PDB contains a block-graph stream, the block-graph and the image layout
2939    :    // will be read from this stream.
2940  E :    *stream_exists = true;
2941    :    if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
2942  E :                                     image_layout)) {
2943  i :      return false;
2944    :    }
2945    :  
2946  E :    return true;
2947  E :  }
2948    :  
2949    :  scoped_refptr<pdb::PdbStream> Decomposer::GetBlockGraphStreamFromPdb(
2950  E :      pdb::PdbFile* pdb_file) {
2951  E :    scoped_refptr<pdb::PdbStream> block_graph_stream;
2952    :    // Get the PDB header and try to get the block-graph ID stream from it.
2953  E :    pdb::PdbInfoHeader70 pdb_header = {0};
2954  E :    pdb::NameStreamMap name_stream_map;
2955    :    if (!ReadHeaderInfoStream(pdb_file->GetStream(pdb::kPdbHeaderInfoStream),
2956    :                             &pdb_header,
2957  E :                             &name_stream_map)) {
2958  i :      LOG(ERROR) << "Failed to read header info stream.";
2959  i :      return block_graph_stream;
2960    :    }
2961    :    pdb::NameStreamMap::const_iterator name_it = name_stream_map.find(
2962  E :        pdb::kSyzygyBlockGraphStreamName);
2963  E :    if (name_it == name_stream_map.end()) {
2964  E :      return block_graph_stream;
2965    :    }
2966    :  
2967    :    // Get the block-graph stream and ensure that it's not empty.
2968  E :    block_graph_stream = pdb_file->GetStream(name_it->second);
2969  E :    if (block_graph_stream.get() == NULL) {
2970  i :      LOG(ERROR) << "Failed to read the block-graph stream from the PDB.";
2971  i :      return block_graph_stream;
2972    :    }
2973  E :    if (block_graph_stream->length() == 0) {
2974  i :      LOG(ERROR) << "The block-graph stream is empty.";
2975  i :      return block_graph_stream;
2976    :    }
2977    :  
2978  E :    return block_graph_stream;
2979  E :  }
2980    :  
2981    :  bool Decomposer::OnImportThunkCallback(const char* module_name,
2982    :                                         const char* symbol_name,
2983  E :                                         BlockGraph::Block* thunk) {
2984  E :    DCHECK(module_name != NULL);
2985  E :    DCHECK(symbol_name != NULL);
2986  E :    DCHECK(thunk != NULL);
2987    :  
2988    :    // Look for the module first.
2989    :    StringSetMap::const_iterator module_it =
2990  E :        non_returning_imports_.find(std::string(module_name));
2991  E :    if (module_it == non_returning_imports_.end())
2992  E :      return true;
2993    :  
2994    :    // Look for the symbol within the module.
2995  E :    if (module_it->second.count(std::string(symbol_name)) == 0)
2996  E :      return true;
2997    :  
2998    :    // If we get here then the imported symbol is found. Decorate the thunk.
2999  E :    thunk->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
3000  E :    VLOG(1) << "Forcing non-returning attribute on imported symbol \""
3001    :            << symbol_name << "\" from module \"" << module_name << "\".";
3002    :  
3003  E :    return true;
3004  E :  }
3005    :  
3006    :  }  // namespace pe

Coverage information generated Thu Sep 06 11:30:46 2012.