Coverage for /Syzygy/pe/new_decomposer.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
79.0%87011010.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/pe/new_decomposer.h"
  16    :  
  17    :  #include "pcrecpp.h"  // NOLINT
  18    :  #include "base/bind.h"
  19    :  #include "base/string_split.h"
  20    :  #include "base/stringprintf.h"
  21    :  #include "base/utf_string_conversions.h"
  22    :  #include "base/win/scoped_bstr.h"
  23    :  #include "base/win/scoped_comptr.h"
  24    :  #include "syzygy/core/disassembler_util.h"
  25    :  #include "syzygy/core/zstream.h"
  26    :  #include "syzygy/pdb/omap.h"
  27    :  #include "syzygy/pdb/pdb_byte_stream.h"
  28    :  #include "syzygy/pdb/pdb_constants.h"
  29    :  #include "syzygy/pdb/pdb_dbi_stream.h"
  30    :  #include "syzygy/pdb/pdb_file.h"
  31    :  #include "syzygy/pdb/pdb_reader.h"
  32    :  #include "syzygy/pdb/pdb_symbol_record.h"
  33    :  #include "syzygy/pdb/pdb_util.h"
  34    :  #include "syzygy/pe/dia_util.h"
  35    :  #include "syzygy/pe/find.h"
  36    :  #include "syzygy/pe/pe_file_parser.h"
  37    :  #include "syzygy/pe/pe_utils.h"
  38    :  #include "syzygy/pe/serialization.h"
  39    :  #include "third_party/cci/Files/CvInfo.h"
  40    :  
  41    :  namespace cci = Microsoft_Cci_Pdb;
  42    :  
  43    :  namespace {
  44    :  
  45    :  // A small helper struct for dumping block information to log messages.
  46    :  // TODO(chrisha): Move this to block_graph and reuse it everywhere!
  47    :  struct BlockInfo {
  48    :    enum AddressType {
  49    :      kNoAddress,
  50    :      kAbsoluteAddress,
  51    :      kFileOffsetAddress,
  52    :      kRelativeAddress,
  53    :    };
  54    :  
  55  i :    explicit BlockInfo(const block_graph::BlockGraph::Block* block)
  56    :        : block(block), type(kNoAddress) {
  57  i :      DCHECK(block != NULL);
  58  i :    }
  59    :  
  60  i :    BlockInfo(const block_graph::BlockGraph::Block* block,
  61    :              core::AbsoluteAddress address)
  62    :        : block(block), type(kAbsoluteAddress), abs_addr(address) {
  63  i :      DCHECK(block != NULL);
  64  i :    }
  65    :    BlockInfo(const block_graph::BlockGraph::Block* block,
  66    :              core::FileOffsetAddress address)
  67    :        : block(block), type(kFileOffsetAddress), file_addr(address) {
  68    :      DCHECK(block != NULL);
  69    :    }
  70    :    BlockInfo(const block_graph::BlockGraph::Block* block,
  71    :              core::RelativeAddress address)
  72    :        : block(block), type(kRelativeAddress), rel_addr(address) {
  73    :      DCHECK(block != NULL);
  74    :    }
  75    :  
  76    :    const block_graph::BlockGraph::Block* block;
  77    :    AddressType type;
  78    :  
  79    :    // Ideally these would be a in a union but because they have non-trivial
  80    :    // constructors they are not allowed.
  81    :    core::AbsoluteAddress abs_addr;
  82    :    core::FileOffsetAddress file_addr;
  83    :    core::RelativeAddress rel_addr;
  84    :  
  85    :   private:
  86    :    DISALLOW_COPY_AND_ASSIGN(BlockInfo);
  87    :  };
  88    :  
  89    :  }  // anonymous namespace
  90    :  
  91    :  // Pretty prints a BlockInfo to an ostream. This has to be outside of any
  92    :  // namespaces so that operator<< is found properly.
  93  i :  std::ostream& operator<<(std::ostream& os, const BlockInfo& bi) {
  94    :    os << "Block(id=" << bi.block->id() << ", name=\"" << bi.block->name()
  95  i :       << "\", size=" << bi.block->size();
  96  i :    if (bi.type != BlockInfo::kNoAddress) {
  97  i :      os << ", address=";
  98  i :      switch (bi.type) {
  99    :        case BlockInfo::kAbsoluteAddress: {
 100  i :          os << bi.abs_addr;
 101  i :          break;
 102    :        }
 103    :        case BlockInfo::kFileOffsetAddress: {
 104  i :          os << bi.file_addr;
 105  i :          break;
 106    :        }
 107    :        case BlockInfo::kRelativeAddress: {
 108  i :          os << bi.rel_addr;
 109    :          break;
 110    :        }
 111    :        default: break;
 112    :      }
 113    :    }
 114  i :    os << ")";
 115  i :    return os;
 116  i :  }
 117    :  
 118    :  namespace pe {
 119    :  
 120    :  // An intermediate reference representation used while parsing PE blocks.
 121    :  // This is necessary because at that point we haven't yet chunked the whole
 122    :  // image into blocks thus some references cannot be resolved.
 123    :  struct NewDecomposer::IntermediateReference {
 124    :    RelativeAddress src_addr;
 125    :    BlockGraph::ReferenceType type;
 126    :    BlockGraph::Size size;
 127    :    RelativeAddress dst_addr;
 128    :  };
 129    :  
 130    :  namespace {
 131    :  
 132    :  using base::win::ScopedBstr;
 133    :  using base::win::ScopedComPtr;
 134    :  using block_graph::BlockGraph;
 135    :  using builder::Callback;
 136    :  using builder::Opt;
 137    :  using builder::Or;
 138    :  using builder::Seq;
 139    :  using builder::Star;
 140    :  using core::AbsoluteAddress;
 141    :  using core::RelativeAddress;
 142    :  
 143    :  typedef BlockGraph::Block Block;
 144    :  typedef BlockGraph::BlockType BlockType;
 145    :  typedef BlockGraph::Offset Offset;
 146    :  typedef BlockGraph::Reference Reference;
 147    :  typedef BlockGraph::ReferenceType ReferenceType;
 148    :  typedef core::AddressRange<RelativeAddress, size_t> RelativeRange;
 149    :  typedef NewDecomposer::IntermediateReference IntermediateReference;
 150    :  typedef NewDecomposer::IntermediateReferences IntermediateReferences;
 151    :  typedef pcrecpp::RE RE;
 152    :  typedef std::vector<OMAP> OMAPs;
 153    :  typedef std::vector<pdb::PdbFixup> PdbFixups;
 154    :  
 155    :  const char kJumpTable[] = "<jump-table>";
 156    :  const char kCaseTable[] = "<case-table>";
 157    :  
 158    :  // Some helper functions for testing ranges.
 159    :  template<typename T1, typename T2, typename T3>
 160  E :  bool InRange(T1 value, T2 lower_bound_incl, T3 length_excl) {
 161  E :    T1 upper_bound_excl = static_cast<T1>(lower_bound_incl) + length_excl;
 162    :    return static_cast<T1>(lower_bound_incl) <= value &&
 163  E :        value < static_cast<T2>(upper_bound_excl);
 164  E :  }
 165    :  template<typename T1, typename T2, typename T3>
 166  E :  bool InRangeIncl(T1 value, T2 lower_bound_incl, T3 length_incl) {
 167  E :    T1 upper_bound_incl = static_cast<T1>(lower_bound_incl) + length_incl;
 168    :    return static_cast<T1>(lower_bound_incl) <= value &&
 169  E :        value <= upper_bound_incl;
 170  E :  }
 171    :  
 172    :  bool InitializeDia(const PEFile& image_file,
 173    :                     const FilePath& pdb_path,
 174    :                     IDiaDataSource** dia_source,
 175    :                     IDiaSession** dia_session,
 176  E :                     IDiaSymbol** global) {
 177  E :    DCHECK(*dia_source == NULL);
 178  E :    DCHECK(*dia_session == NULL);
 179  E :    DCHECK(*global == NULL);
 180    :  
 181  E :    if (!CreateDiaSource(dia_source))
 182  i :      return false;
 183  E :    DCHECK(*dia_source != NULL);
 184    :  
 185    :    // We create the session using the PDB file directly, as we've already
 186    :    // validated that it matches the module.
 187  E :    if (!CreateDiaSession(pdb_path, *dia_source, dia_session))
 188  i :      return false;
 189  E :    DCHECK(*dia_session != NULL);
 190    :  
 191  E :    HRESULT hr = (*dia_session)->get_globalScope(global);
 192  E :    if (hr != S_OK) {
 193  i :      LOG(ERROR) << "Failed to get the DIA global scope: "
 194    :                 << com::LogHr(hr) << ".";
 195  i :      return false;
 196    :    }
 197    :  
 198  E :    return true;
 199  E :  }
 200    :  
 201    :  enum SectionType {
 202    :    kSectionCode,
 203    :    kSectionData,
 204    :    kSectionUnknown
 205    :  };
 206    :  
 207    :  // Determines the type of a section based on its attributes. This is used to
 208    :  // tag blocks with an appropriate type.
 209  E :  SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
 210  E :    DCHECK(header != NULL);
 211  E :    if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
 212  E :      return kSectionCode;
 213  E :    if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
 214  E :      return kSectionData;
 215  i :    return kSectionUnknown;
 216  E :  }
 217    :  
 218    :  // Given a compiland, returns its compiland details.
 219    :  bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
 220  E :                                       IDiaSymbol** compiland_details) {
 221  E :    DCHECK(compiland != NULL);
 222  E :    DCHECK(compiland_details != NULL);
 223  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 224  E :    DCHECK(*compiland_details == NULL);
 225    :  
 226    :    // Get the enumeration of compiland details.
 227  E :    ScopedComPtr<IDiaEnumSymbols> enum_symbols;
 228    :    HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
 229  E :                                         enum_symbols.Receive());
 230  E :    DCHECK_EQ(S_OK, hr);
 231    :  
 232    :    // We expect there to be compiland details. For compilands built by
 233    :    // non-standard toolchains, there usually aren't any.
 234  E :    LONG count = 0;
 235  E :    hr = enum_symbols->get_Count(&count);
 236  E :    DCHECK_EQ(S_OK, hr);
 237  E :    if (count == 0) {
 238    :      // We don't log here because we see this quite often.
 239  i :      return false;
 240    :    }
 241    :  
 242    :    // We do sometimes encounter more than one compiland detail. In fact, for
 243    :    // import and export tables we get one compiland detail per table entry.
 244    :    // They are all marked as having been generated by the linker, so using the
 245    :    // first one is sufficient.
 246    :  
 247    :    // Get the compiland details.
 248  E :    ULONG fetched = 0;
 249  E :    hr = enum_symbols->Next(1, compiland_details, &fetched);
 250  E :    DCHECK_EQ(S_OK, hr);
 251  E :    DCHECK_EQ(1u, fetched);
 252    :  
 253  E :    return true;
 254  E :  }
 255    :  
 256    :  // Stores information regarding known compilers.
 257    :  struct KnownCompilerInfo {
 258    :    wchar_t* compiler_name;
 259    :    bool supported;
 260    :  };
 261    :  
 262    :  // A list of known compilers, and their status as being supported or not.
 263    :  KnownCompilerInfo kKnownCompilerInfos[] = {
 264    :    { L"Microsoft (R) Macro Assembler", false },
 265    :    { L"Microsoft (R) Optimizing Compiler", true },
 266    :    { L"Microsoft (R) LINK", false }
 267    :  };
 268    :  
 269    :  // Given a compiland, determines whether the compiler used is one of those that
 270    :  // we whitelist.
 271  E :  bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
 272  E :    DCHECK(compiland != NULL);
 273  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 274    :  
 275  E :    ScopedComPtr<IDiaSymbol> compiland_details;
 276    :    if (!GetCompilandDetailsForCompiland(compiland,
 277  E :                                         compiland_details.Receive())) {
 278    :      // If the compiland has no compiland details we assume the compiler is not
 279    :      // supported.
 280  i :      ScopedBstr compiland_name;
 281  i :      if (compiland->get_name(compiland_name.Receive()) == S_OK) {
 282  i :        VLOG(1) << "Compiland has no compiland details: "
 283    :                << com::ToString(compiland_name);
 284    :      }
 285  i :      return false;
 286    :    }
 287  E :    DCHECK(compiland_details.get() != NULL);
 288    :  
 289    :    // Get the compiler name.
 290  E :    ScopedBstr compiler_name;
 291  E :    HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
 292  E :    DCHECK_EQ(S_OK, hr);
 293    :  
 294    :    // Check the compiler name against the list of known compilers.
 295  E :    for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
 296  E :      if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
 297  E :        return kKnownCompilerInfos[i].supported;
 298    :      }
 299  E :    }
 300    :  
 301    :    // Anything we don't explicitly know about is not supported.
 302  E :    VLOG(1) << "Encountered unknown compiler: " << compiler_name;
 303  E :    return false;
 304  E :  }
 305    :  
 306    :  // Adds an intermediate reference to the provided vector. The vector is
 307    :  // specified as the first parameter (in slight violation of our coding
 308    :  // standards) because this function is intended to be used by Bind.
 309    :  bool AddIntermediateReference(IntermediateReferences* references,
 310    :                                RelativeAddress src_addr,
 311    :                                ReferenceType type,
 312    :                                BlockGraph::Size size,
 313  E :                                RelativeAddress dst_addr) {
 314  E :    DCHECK(references != NULL);
 315  E :    IntermediateReference ref = { src_addr, type, size, dst_addr };
 316  E :    references->push_back(ref);
 317  E :    return true;
 318  E :  }
 319    :  
 320    :  // Create a reference as specified. Ignores existing references if they are of
 321    :  // the exact same type.
 322    :  bool CreateReference(RelativeAddress src_addr,
 323    :                       BlockGraph::Size ref_size,
 324    :                       ReferenceType ref_type,
 325    :                       RelativeAddress base_addr,
 326    :                       RelativeAddress dst_addr,
 327  E :                       BlockGraph::AddressSpace* image) {
 328  E :    DCHECK(image != NULL);
 329    :  
 330    :    // Get the source block and offset, and ensure that the reference fits
 331    :    // within it.
 332  E :    Block* src_block = image->GetBlockByAddress(src_addr);
 333  E :    if (src_block == NULL) {
 334  i :      LOG(ERROR) << "Unable to find block for reference originating at "
 335    :                 << src_addr << ".";
 336  i :      return false;
 337    :    }
 338  E :    RelativeAddress src_block_addr;
 339  E :    CHECK(image->GetAddressOf(src_block, &src_block_addr));
 340  E :    Offset src_block_offset = src_addr - src_block_addr;
 341  E :    if (src_block_offset + ref_size > src_block->size()) {
 342  i :      LOG(ERROR) << "Reference originating at " << src_addr
 343    :                 << " extends beyond block \"" << src_block->name() << "\".";
 344  i :      return false;
 345    :    }
 346    :  
 347    :    // Get the destination block and offset.
 348  E :    Block* dst_block = image->GetBlockByAddress(base_addr);
 349  E :    if (dst_block == NULL) {
 350  i :      LOG(ERROR) << "Unable to find block for reference pointing at "
 351    :                  << base_addr << ".";
 352  i :      return false;
 353    :    }
 354  E :    RelativeAddress dst_block_addr;
 355  E :    CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
 356  E :    Offset base = base_addr - dst_block_addr;
 357  E :    Offset offset = dst_addr - dst_block_addr;
 358    :  
 359  E :    Reference ref(ref_type, ref_size, dst_block, offset, base);
 360    :  
 361    :    // Check if a reference already exists at this offset.
 362    :    Block::ReferenceMap::const_iterator ref_it =
 363  E :        src_block->references().find(src_block_offset);
 364  E :    if (ref_it != src_block->references().end()) {
 365    :      // If an identical reference already exists then we're done.
 366  E :      if (ref == ref_it->second)
 367  E :        return true;
 368  i :      LOG(ERROR) << "Block \"" << src_block->name() << "\" has a conflicting "
 369    :                  << "reference at offset " << src_block_offset << ".";
 370  i :      return false;
 371    :    }
 372    :  
 373  E :    CHECK(src_block->SetReference(src_block_offset, ref));
 374    :  
 375  E :    return true;
 376  E :  }
 377    :  
 378    :  // Loads FIXUP and OMAP_FROM debug streams.
 379    :  bool LoadDebugStreams(IDiaSession* dia_session,
 380    :                        PdbFixups* pdb_fixups,
 381  E :                        OMAPs* omap_from) {
 382  E :    DCHECK(dia_session != NULL);
 383  E :    DCHECK(pdb_fixups != NULL);
 384  E :    DCHECK(omap_from != NULL);
 385    :  
 386    :    // Load the fixups. These must exist.
 387    :    SearchResult search_result = FindAndLoadDiaDebugStreamByName(
 388  E :        kFixupDiaDebugStreamName, dia_session, pdb_fixups);
 389  E :    if (search_result != kSearchSucceeded) {
 390  i :      if (search_result == kSearchFailed) {
 391  i :        LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
 392    :                      "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
 393    :      }
 394  i :      return false;
 395    :    }
 396    :  
 397    :    // Load the omap_from table. It is not necessary that one exist.
 398    :    search_result = FindAndLoadDiaDebugStreamByName(
 399  E :        kOmapFromDiaDebugStreamName, dia_session, omap_from);
 400  E :    if (search_result == kSearchErrored) {
 401  i :      LOG(ERROR) << "Error trying to read " << kOmapFromDiaDebugStreamName
 402    :                 << " stream.";
 403  i :      return false;
 404    :    }
 405    :  
 406  E :    return true;
 407  E :  }
 408    :  
 409    :  bool GetFixupDestinationAndType(const PEFile& image_file,
 410    :                                  const pdb::PdbFixup& fixup,
 411    :                                  RelativeAddress* dst_addr,
 412  E :                                  ReferenceType* ref_type) {
 413  E :    DCHECK(dst_addr != NULL);
 414  E :    DCHECK(ref_type != NULL);
 415    :  
 416  E :    RelativeAddress src_addr(fixup.rva_location);
 417    :  
 418    :    // Get the destination address from the actual image itself. We only see
 419    :    // fixups for 32-bit references.
 420  E :    uint32 data = 0;
 421  E :    if (!image_file.ReadImage(src_addr, &data, sizeof(data))) {
 422  i :      LOG(ERROR) << "Unable to read image data for fixup with source address "
 423    :                  << "at" << src_addr << ".";
 424  i :      return false;
 425    :    }
 426    :  
 427    :    // Translate this to a relative address.
 428  E :    switch (fixup.type) {
 429    :      case pdb::PdbFixup::TYPE_ABSOLUTE: {
 430  E :        *ref_type = BlockGraph::ABSOLUTE_REF;
 431  E :        AbsoluteAddress dst_addr_abs(data);
 432  E :        if (!image_file.Translate(dst_addr_abs, dst_addr)) {
 433  i :          LOG(ERROR) << "Unable to translate " << dst_addr_abs << ".";
 434  i :          return false;
 435    :        }
 436  E :        break;
 437    :      }
 438    :  
 439    :      case pdb::PdbFixup::TYPE_PC_RELATIVE: {
 440  E :        *ref_type = BlockGraph::PC_RELATIVE_REF;
 441  E :        *dst_addr = RelativeAddress(fixup.rva_location) + sizeof(data) + data;
 442  E :        break;
 443    :      }
 444    :  
 445    :      case pdb::PdbFixup::TYPE_RELATIVE: {
 446  E :        *ref_type = BlockGraph::RELATIVE_REF;
 447  E :        *dst_addr = RelativeAddress(data);
 448  E :        break;
 449    :      }
 450    :  
 451    :      default: {
 452  i :        LOG(ERROR) << "Unexpected fixup type (" << fixup.type << ").";
 453  i :        return false;
 454    :      }
 455    :    }
 456    :  
 457  E :    return true;
 458  E :  }
 459    :  
 460    :  // Creates references from the @p pdb_fixups (translating them via the
 461    :  // provided @p omap_from information if it is not empty), all while removing the
 462    :  // corresponding entries from @p reloc_set. If @p reloc_set is not empty after
 463    :  // this then the PDB fixups are out of sync with the image and we are unable to
 464    :  // safely decompose.
 465    :  //
 466    :  // @note This function deliberately ignores fixup information for the resource
 467    :  //     section. This is because chrome.dll gets modified by a manifest tool
 468    :  //     which doesn't update the FIXUPs in the corresponding PDB. They are thus
 469    :  //     out of sync. Even if they were in sync this doesn't harm us as we have no
 470    :  //     need to reach in and modify resource data.
 471    :  bool CreateReferencesFromFixupsImpl(
 472    :      const PEFile& image_file,
 473    :      const PdbFixups& pdb_fixups,
 474    :      const OMAPs& omap_from,
 475    :      PEFile::RelocSet* reloc_set,
 476  E :      BlockGraph::AddressSpace* image) {
 477  E :    DCHECK(reloc_set != NULL);
 478  E :    DCHECK(image != NULL);
 479    :  
 480  E :    bool have_omap = omap_from.size() != 0;
 481  E :    size_t fixups_used = 0;
 482    :  
 483    :    // The resource section in Chrome is modified post-link by a tool that adds a
 484    :    // manifest to it. This causes all of the fixups in the resource section (and
 485    :    // anything beyond it) to be invalid. As long as the resource section is the
 486    :    // last section in the image, this is not a problem (we can safely ignore the
 487    :    // .rsrc fixups, which we know how to parse without them). However, if there
 488    :    // is a section after the resource section, things will have been shifted
 489    :    // and potentially crucial fixups will be invalid.
 490    :    const IMAGE_SECTION_HEADER* rsrc_header = image_file.GetSectionHeader(
 491  E :        kResourceSectionName);
 492  E :    RelativeAddress rsrc_start(0xffffffff);
 493  E :    RelativeAddress rsrc_end(0xffffffff);
 494  E :    if (rsrc_header != NULL) {
 495  E :      rsrc_start = RelativeAddress(rsrc_header->VirtualAddress);
 496  E :      rsrc_end = rsrc_start + rsrc_header->Misc.VirtualSize;
 497    :    }
 498    :  
 499    :    // Ensure the fixups are all valid.
 500  E :    size_t skipped = 0;
 501  E :    for (size_t i = 0; i < pdb_fixups.size(); ++i) {
 502  E :      if (!pdb_fixups[i].ValidHeader()) {
 503  i :        LOG(ERROR) << "Unknown fixup header: "
 504    :                   << StringPrintf("0x%08X.", pdb_fixups[i].header);
 505  i :        return false;
 506    :      }
 507    :  
 508    :      // For now, we skip any offset fixups. We've only seen this in the context
 509    :      // of TLS data access, and we don't mess with TLS structures.
 510  E :      if (pdb_fixups[i].is_offset())
 511  E :        continue;
 512    :  
 513    :      // All fixups we handle should be full size pointers.
 514  E :      DCHECK_EQ(Reference::kMaximumSize, pdb_fixups[i].size());
 515    :  
 516    :      // Get the original addresses, and map them through OMAP information.
 517    :      // Normally DIA takes care of this for us, but there is no API for
 518    :      // getting DIA to give us FIXUP information, so we have to do it manually.
 519  E :      RelativeAddress src_addr(pdb_fixups[i].rva_location);
 520  E :      RelativeAddress base_addr(pdb_fixups[i].rva_base);
 521  E :      if (have_omap) {
 522  i :        src_addr = pdb::TranslateAddressViaOmap(omap_from, src_addr);
 523  i :        base_addr = pdb::TranslateAddressViaOmap(omap_from, base_addr);
 524    :      }
 525    :  
 526    :      // If the reference originates beyond the .rsrc section then we can't
 527    :      // trust it.
 528  E :      if (src_addr >= rsrc_end) {
 529  i :        LOG(ERROR) << "Found fixup originating beyond .rsrc section.";
 530  i :        return false;
 531    :      }
 532    :  
 533    :      // If the reference originates from a part of the .rsrc section, ignore it.
 534  E :      if (src_addr >= rsrc_start)
 535  E :        continue;
 536    :  
 537    :      // Get the destination address of the fixup. This logs verbosely for us.
 538  E :      RelativeAddress dst_addr;
 539  E :      ReferenceType type = BlockGraph::RELATIVE_REF;
 540    :      if (!GetFixupDestinationAndType(image_file, pdb_fixups[i], &dst_addr,
 541  E :                                      &type)) {
 542  i :        return false;
 543    :      }
 544    :  
 545    :      // Finally, create the reference. This logs verbosely for us on failure.
 546    :      if (!CreateReference(src_addr, Reference::kMaximumSize, type, base_addr,
 547  E :                           dst_addr, image)) {
 548  i :        return false;
 549    :      }
 550    :  
 551    :      // Remove this reference from the relocs.
 552  E :      PEFile::RelocSet::iterator reloc_it = reloc_set->find(src_addr);
 553  E :      if (reloc_it != reloc_set->end()) {
 554    :        // We should only find a reloc if the fixup was of absolute type.
 555  E :        if (type != BlockGraph::ABSOLUTE_REF) {
 556  i :          LOG(ERROR) << "Found a reloc corresponding to a non-absolute fixup.";
 557  i :          return false;
 558    :        }
 559    :  
 560  E :        reloc_set->erase(reloc_it);
 561    :      }
 562    :  
 563  E :      ++fixups_used;
 564  E :    }
 565    :  
 566  E :    LOG(INFO) << "Used " << fixups_used << " of " << pdb_fixups.size() << ".";
 567    :  
 568  E :    return true;
 569  E :  }
 570    :  
 571  E :  bool GetDataSymbolSize(IDiaSymbol* symbol, size_t* length) {
 572  E :    DCHECK(symbol != NULL);
 573  E :    DCHECK(length != NULL);
 574    :  
 575  E :    *length = 0;
 576  E :    ScopedComPtr<IDiaSymbol> type;
 577  E :    HRESULT hr = symbol->get_type(type.Receive());
 578    :    // This happens if the symbol has no type information.
 579  E :    if (hr == S_FALSE)
 580  E :      return true;
 581  E :    if (hr != S_OK) {
 582  i :      LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
 583  i :      return false;
 584    :    }
 585    :  
 586  E :    ULONGLONG ull_length = 0;
 587  E :    hr = type->get_length(&ull_length);
 588  E :    if (hr != S_OK) {
 589  i :      LOG(ERROR) << "Failed to retrieve type length properties: "
 590    :                 << com::LogHr(hr) << ".";
 591  i :      return false;
 592    :    }
 593  E :    DCHECK_LE(ull_length, 0xFFFFFFFF);
 594  E :    *length = static_cast<size_t>(ull_length);
 595    :  
 596  E :    return true;
 597  E :  }
 598    :  
 599    :  bool ScopeSymTagToLabelProperties(enum SymTagEnum sym_tag,
 600    :                                    size_t scope_count,
 601    :                                    BlockGraph::LabelAttributes* attr,
 602  E :                                    std::string* name) {
 603  E :    DCHECK(attr != NULL);
 604  E :    DCHECK(name != NULL);
 605    :  
 606  E :    switch (sym_tag) {
 607    :      case SymTagFuncDebugStart: {
 608  E :        *attr = BlockGraph::DEBUG_START_LABEL;
 609  E :        *name = "<debug-start>";
 610  E :        return true;
 611    :      }
 612    :      case SymTagFuncDebugEnd: {
 613  E :        *attr = BlockGraph::DEBUG_END_LABEL;
 614  E :        *name = "<debug-end>";
 615  E :        return true;
 616    :      }
 617    :      case SymTagBlock: {
 618  E :        *attr = BlockGraph::SCOPE_START_LABEL;
 619  E :        *name = base::StringPrintf("<scope-start-%d>", scope_count);
 620  E :        return true;
 621    :      }
 622    :      default:
 623  i :      return false;
 624    :    }
 625  i :    return false;
 626  E :  }
 627    :  
 628    :  bool AddLabelToBlock(Offset offset,
 629    :                       const base::StringPiece& name,
 630    :                       BlockGraph::LabelAttributes label_attributes,
 631  E :                       Block* block) {
 632  E :    DCHECK(block != NULL);
 633    :  
 634    :    // It is possible for labels to be attached to the first byte past a block
 635    :    // (things like debug end, scope end, etc). It is up to the caller to be more
 636    :    // strict about the offset if need be.
 637  E :    DCHECK_LE(0, offset);
 638  E :    DCHECK_LE(offset, static_cast<Offset>(block->size()));
 639    :  
 640    :    // Try to create the label.
 641  E :    if (block->SetLabel(offset, name, label_attributes))
 642  E :      return true;
 643    :  
 644    :    // If we get here there's an already existing label. Update it.
 645  E :    BlockGraph::Label label;
 646  E :    CHECK(block->GetLabel(offset, &label));
 647    :  
 648    :    // Merge the names if this isn't a repeated name.
 649  E :    std::string name_str = name.as_string();
 650  E :    std::string new_name = label.name();
 651  E :    std::vector<std::string> names;
 652    :    base::SplitStringUsingSubstr(label.name(), NewDecomposer::kLabelNameSep,
 653  E :                                 &names);
 654  E :    if (std::find(names.begin(), names.end(), name_str) == names.end()) {
 655  E :      names.push_back(name_str);
 656  E :      new_name.append(NewDecomposer::kLabelNameSep);
 657  E :      new_name.append(name_str);
 658    :    }
 659    :  
 660    :    // Merge the attributes.
 661    :    BlockGraph::LabelAttributes new_label_attr = label.attributes() |
 662  E :        label_attributes;
 663    :  
 664    :    // Update the label.
 665  E :    label = BlockGraph::Label(new_name, new_label_attr);
 666  E :    CHECK(block->RemoveLabel(offset));
 667  E :    CHECK(block->SetLabel(offset, label));
 668    :  
 669  E :    return true;
 670  E :  }
 671    :  
 672    :  // Reads the linker module symbol stream from the given PDB file. This should
 673    :  // always exist as the last module.
 674    :  scoped_refptr<pdb::PdbStream> GetLinkerSymbolStream(
 675  E :      const pdb::PdbFile& pdb_file) {
 676    :    static const char kLinkerModuleName[] = "* Linker *";
 677    :  
 678    :    scoped_refptr<pdb::PdbStream> dbi_stream =
 679  E :        pdb_file.GetStream(pdb::kDbiStream);
 680  E :    if (dbi_stream.get() == NULL) {
 681  i :      LOG(ERROR) << "PDB does not contain a DBI stream.";
 682  i :      return false;
 683    :    }
 684    :  
 685  E :    pdb::DbiStream dbi;
 686  E :    if (!dbi.Read(dbi_stream.get())) {
 687  i :      LOG(ERROR) << "Unable to parse DBI stream.";
 688  i :      return false;
 689    :    }
 690    :  
 691  E :    if (dbi.modules().empty()) {
 692  i :      LOG(ERROR) << "DBI stream contains no modules.";
 693  i :      return false;
 694    :    }
 695    :  
 696    :    // The last module has always been observed to be the linker module.
 697  E :    const pdb::DbiModuleInfo& linker = dbi.modules().back();
 698  E :    if (linker.module_name() != kLinkerModuleName) {
 699  i :      LOG(ERROR) << "Last module is not the linker module.";
 700  i :      return false;
 701    :    }
 702    :  
 703    :    scoped_refptr<pdb::PdbStream> symbols = pdb_file.GetStream(
 704  E :        linker.module_info_base().stream);
 705  E :    if (symbols.get() == NULL) {
 706  i :      LOG(ERROR) << "Unable to open linker symbol stream.";
 707  i :      return false;
 708    :    }
 709    :  
 710  E :    return symbols;
 711  E :  }
 712    :  
 713    :  // Parses a symbol from a PDB symbol stream. The @p buffer is populated with the
 714    :  // data and upon success this returns the symbol directly cast onto the
 715    :  // @p buffer data. On failure this returns NULL.
 716    :  template<typename SymbolType>
 717    :  const SymbolType* ParseSymbol(uint16 symbol_length,
 718    :                                pdb::PdbStream* stream,
 719  E :                                std::vector<uint8>* buffer) {
 720  E :    DCHECK(stream != NULL);
 721  E :    DCHECK(buffer != NULL);
 722    :  
 723  E :    buffer->clear();
 724    :  
 725  E :    if (symbol_length < sizeof(SymbolType)) {
 726  i :      LOG(ERROR) << "Symbol too small for casting.";
 727  i :      return NULL;
 728    :    }
 729    :  
 730  E :    if (!stream->Read(buffer, symbol_length)) {
 731  i :      LOG(ERROR) << "Failed to read symbol.";
 732  i :      return NULL;
 733    :    }
 734    :  
 735  E :    return reinterpret_cast<const SymbolType*>(buffer->data());
 736  E :  }
 737    :  
 738    :  bool VisitNonControlFlowInstruction(const _DInst& instr,
 739    :                                      AbsoluteAddress block_addr,
 740    :                                      AbsoluteAddress instr_addr,
 741  E :                                      Block* block) {
 742  E :    DCHECK_NE(0u, block_addr.value());
 743  E :    DCHECK_NE(0u, instr_addr.value());
 744  E :    DCHECK_LE(block_addr, instr_addr);
 745  E :    DCHECK(block != NULL);
 746    :  
 747    :    // TODO(chrisha): We could walk the operands and follow references
 748    :    //     explicitly. If any of them are of reference type and there's no
 749    :    //     matching reference, this would be cause to blow up and die (we
 750    :    //     should get all of these as relocs and/or fixups).
 751    :  
 752  E :    Offset instr_offset = instr_addr - block_addr;
 753    :    Block::ReferenceMap::const_iterator ref_it =
 754  E :        block->references().upper_bound(instr_offset);
 755    :    Block::ReferenceMap::const_iterator ref_end =
 756  E :        block->references().lower_bound(instr_offset + instr.size);
 757    :  
 758  E :    for (; ref_it != ref_end; ++ref_it) {
 759  E :      const Block* ref_block = ref_it->second.referenced();
 760    :  
 761    :      // We only care about inter-block references.
 762  E :      if (ref_block == block)
 763  E :        continue;
 764    :  
 765    :      // There should be no cross-block references to the middle of other
 766    :      // code blocks (to the top is fine, as we could be passing around a
 767    :      // function pointer). The exception is if the remote block is not
 768    :      // generated by cl.exe. In this case, there could be arbitrary labels
 769    :      // that act like functions within the body of that block, and referring
 770    :      // to them is perfectly fine.
 771    :      if (ref_block->type() == BlockGraph::CODE_BLOCK &&
 772    :          ref_it->second.base() != 0 &&
 773  E :          (block->attributes() & BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER)) {
 774  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 775  i :        LOG(WARNING) << "Found a non-control-flow code-block to "
 776    :                     << "middle-of-code-block reference from "
 777    :                     << BlockInfo(block, block_addr) << " to "
 778    :                     << BlockInfo(ref_block) << ".";
 779  i :        return true;
 780    :      }
 781  E :    }
 782    :  
 783  E :    return true;
 784  E :  }
 785    :  
 786    :  bool VisitPcRelativeControlFlowInstruction(bool create_missing_refs,
 787    :                                             const _DInst& instr,
 788    :                                             AbsoluteAddress image_addr,
 789    :                                             AbsoluteAddress block_addr,
 790    :                                             AbsoluteAddress instr_addr,
 791    :                                             BlockGraph::AddressSpace* image,
 792  E :                                             Block* block) {
 793  E :    DCHECK_NE(0u, image_addr.value());
 794  E :    DCHECK_NE(0u, block_addr.value());
 795  E :    DCHECK_NE(0u, instr_addr.value());
 796  E :    DCHECK_LT(image_addr, block_addr);
 797  E :    DCHECK_LE(block_addr, instr_addr);
 798  E :    DCHECK(image != NULL);
 799  E :    DCHECK(block != NULL);
 800    :  
 801  E :    int fc = META_GET_FC(instr.meta);
 802  E :    DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
 803  E :    DCHECK_EQ(O_PC, instr.ops[0].type);
 804  E :    DCHECK_EQ(O_NONE, instr.ops[1].type);
 805  E :    DCHECK_EQ(O_NONE, instr.ops[2].type);
 806  E :    DCHECK_EQ(O_NONE, instr.ops[3].type);
 807    :    DCHECK(instr.ops[0].size == 8 ||
 808    :           instr.ops[0].size == 16 ||
 809  E :           instr.ops[0].size == 32);
 810    :  
 811    :    // Distorm gives us size in bits, we want bytes.
 812  E :    BlockGraph::Size size = instr.ops[0].size / 8;
 813    :  
 814    :    // Get the reference's address. Note we assume it's in the instruction's
 815    :    // tail end - I don't know of a case where a PC-relative offset in a branch
 816    :    // or call is not the very last thing in an x86 instruction.
 817  E :    AbsoluteAddress abs_src = instr_addr + instr.size - size;
 818    :    AbsoluteAddress abs_dst = instr_addr + instr.size +
 819  E :        static_cast<size_t>(instr.imm.addr);
 820  E :    RelativeAddress rel_dst(abs_dst.value() - image_addr.value());
 821  E :    Offset offset_src = abs_src - block_addr;
 822    :  
 823  E :    Block* dst_block = block;
 824  E :    RelativeAddress dst_block_addr(block_addr.value() - image_addr.value());
 825    :  
 826    :    // Is the reference to something outside this block?
 827  E :    if (abs_dst < block_addr || abs_dst >= block_addr + block->size()) {
 828    :      // Short PC-relative references should be to this block, otherwise this
 829    :      // block is not MSVC-like.
 830  E :      if (size < Reference::kMaximumSize) {
 831  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 832  i :        Offset offset_instr = instr_addr - block_addr;
 833  i :        LOG(WARNING) << "Found a " << size << "-byte PC-relative instruction to "
 834    :                     << "an external " << abs_dst << " at offset "
 835    :                     << offset_instr << " of " << BlockInfo(block, block_addr)
 836    :                     << ".";
 837  i :        return true;
 838  i :      } else {
 839    :        // Long PC-relative references to other blocks should have been given to
 840    :        // us via FIXUPs, otherwise we risk breaking the world when moving blocks
 841    :        // around!
 842  E :        if (block->references().find(offset_src) == block->references().end()) {
 843  i :          LOG(ERROR) << "Missing fixup for a " << size << "-byte PC-relative "
 844    :                     << "reference to " << abs_dst << " at offset "
 845    :                     << offset_src << " of " << BlockInfo(block, block_addr)
 846    :                     << ".";
 847  i :          return false;
 848    :        }
 849    :      }
 850    :  
 851    :      // Find the destination block and its address.
 852  E :      dst_block = image->GetContainingBlock(rel_dst, 1);
 853  E :      CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
 854  E :      if (dst_block == NULL) {
 855  i :        LOG(ERROR) << "Found a " << size << "-byte PC-relative reference to a "
 856    :                   << abs_dst << " outside of the image at offset "
 857    :                   << offset_src << " of " << BlockInfo(block, block_addr) << ".";
 858  i :        return false;
 859    :      }
 860    :    }
 861    :  
 862    :    // Create the missing reference if need be. These are found by basic-block
 863    :    // disassembly so aren't strictly needed, but are useful debug information.
 864  E :    if (!create_missing_refs)
 865  E :      return true;
 866    :  
 867  E :    Offset offset_dst = rel_dst - dst_block_addr;
 868    :    Reference ref(BlockGraph::PC_RELATIVE_REF, size, dst_block, offset_dst,
 869  E :                  offset_dst);
 870  E :    block->SetReference(offset_src, ref);
 871    :  
 872  E :    return true;
 873  E :  }
 874    :  
 875    :  bool VisitInstruction(bool create_missing_refs,
 876    :                        const _DInst& instr,
 877    :                        AbsoluteAddress image_addr,
 878    :                        AbsoluteAddress block_addr,
 879    :                        AbsoluteAddress instr_addr,
 880    :                        BlockGraph::AddressSpace* image,
 881  E :                        Block* block) {
 882  E :    DCHECK_NE(0u, image_addr.value());
 883  E :    DCHECK_NE(0u, block_addr.value());
 884  E :    DCHECK_NE(0u, instr_addr.value());
 885  E :    DCHECK_LT(image_addr, block_addr);
 886  E :    DCHECK_LE(block_addr, instr_addr);
 887  E :    DCHECK(image != NULL);
 888  E :    DCHECK(block != NULL);
 889    :  
 890  E :    int fc = META_GET_FC(instr.meta);
 891    :  
 892  E :    if (fc == FC_NONE) {
 893    :      return VisitNonControlFlowInstruction(
 894  E :          instr, block_addr, instr_addr, block);
 895    :    }
 896    :  
 897    :    if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
 898  E :        instr.ops[0].type == O_PC) {
 899    :      return VisitPcRelativeControlFlowInstruction(create_missing_refs,
 900  E :          instr, image_addr, block_addr, instr_addr, image, block);
 901    :    }
 902    :  
 903  E :    return true;
 904  E :  }
 905    :  
 906    :  bool DisassembleCodeBlockAndLabelData(bool create_missing_refs,
 907    :                                        AbsoluteAddress image_addr,
 908    :                                        AbsoluteAddress block_addr,
 909    :                                        BlockGraph::AddressSpace* image,
 910  E :                                        Block* block) {
 911  E :    DCHECK(image != NULL);
 912  E :    DCHECK(block != NULL);
 913  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 914    :  
 915    :    // We simultaneously walk through the block's references while disassembling
 916    :    // instructions. This is used to determine when (if) data starts. MSVC
 917    :    // always places jump tables first, which consist of absolute references.
 918  E :    const Block::ReferenceMap& ref_map(block->references());
 919  E :    Block::ReferenceMap::const_iterator ref_it = ref_map.begin();
 920    :  
 921    :    // We keep track of any self-references. If the block contains data these
 922    :    // are used as beginning points of tables. We rely on the sorted nature of
 923    :    // std::set when using these later on.
 924  E :    std::set<Offset> self_refs;
 925    :  
 926  E :    const uint8* data = block->data();
 927  E :    const uint8* data_end = block->data() + block->data_size();
 928    :  
 929    :    // If some of the data in this block is implicit then make it explicit for
 930    :    // ease of decoding.
 931  E :    std::vector<uint8> data_copy;
 932  E :    if (block->data_size() < block->size()) {
 933  i :      data_copy.resize(block->size(), 0);
 934  i :      ::memcpy(data_copy.data(), block->data(), block->data_size());
 935  i :      data = data_copy.data();
 936  i :      data_end = data + data_copy.size();
 937    :    }
 938    :  
 939    :    // Decode instructions one by one.
 940  E :    AbsoluteAddress addr(block_addr);
 941  E :    Offset offset = 0;
 942  E :    while (true) {
 943    :      // Stop the disassembly if we're at the end of the data.
 944  E :      if (data == data_end)
 945  E :        return true;
 946    :  
 947  E :      if (ref_it != ref_map.end()) {
 948    :        // Step past any references.
 949  E :        while (ref_it != ref_map.end() && ref_it->first < offset)
 950  E :          ++ref_it;
 951    :  
 952    :        // Stop the disassembly if the next byte is data. Namely, it coincides
 953    :        // with a reference.
 954  E :        if (ref_it->first == offset)
 955  E :          break;
 956    :      }
 957    :  
 958    :      // If we can't decode an instruction then we mark the block as not safe
 959    :      // for disassembly.
 960  E :      _DInst inst = { 0 };
 961    :      if (!core::DecodeOneInstruction(addr.value(), data, data_end - data,
 962  E :                                      &inst)) {
 963  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 964  i :        VLOG(1) << "Unable to decode instruction at offset " << offset
 965    :                << " of " << BlockInfo(block, block_addr) << ".";
 966  i :        return true;
 967    :      }
 968    :  
 969    :      // Visit the instruction itself. This validates that the instruction is of
 970    :      // a type we expect to encounter, and may also cause internal references to
 971    :      // be created.
 972    :      if (!VisitInstruction(create_missing_refs, inst, image_addr, block_addr,
 973  E :                            addr, image, block)) {
 974  i :        return false;
 975    :      }
 976    :  
 977    :      // Step past the instruction.
 978  E :      addr += inst.size;
 979  E :      data += inst.size;
 980  E :      offset += inst.size;
 981    :  
 982    :      // References to data are by absolute pointer, for which we always receive
 983    :      // a reloc/fixup, thus no need to parse the instruction. Moreover, ref_it
 984    :      // points to the first reference after the beginning of the instruction at
 985    :      // this point.
 986  E :      if (ref_it != ref_map.end() && ref_it->first < offset) {
 987    :        // The reference should be wholly contained in the instruction.
 988  E :        if (static_cast<Offset>(ref_it->first + ref_it->second.size()) > offset) {
 989  i :          block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 990  i :          VLOG(1) << "Unexpected reference in instruction at offset "
 991    :                  << ref_it->first << " of " << BlockInfo(block, block_addr)
 992    :                  << ".";
 993  i :          return true;
 994    :        }
 995    :  
 996    :        // Store self-references to locations beyond our current cursor.
 997    :        if (ref_it->second.referenced() == block &&
 998  E :            ref_it->second.offset() > offset) {
 999  E :          self_refs.insert(ref_it->second.offset());
1000    :        }
1001    :  
1002  E :        ++ref_it;
1003    :      }
1004  E :    }
1005    :  
1006    :    // If we get here then we've encountered data. We need to label data
1007    :    // sections as appropriate.
1008    :  
1009  E :    bool data_label_added = false;
1010  E :    Offset end_of_code_offset = offset;
1011    :  
1012  E :    std::set<Offset>::const_iterator off_it = self_refs.begin();
1013  E :    for (; off_it != self_refs.end(); ++off_it) {
1014  E :      Offset referred_offset = *off_it;
1015    :  
1016    :      // References to data must be beyond the decoded instructions.
1017  E :      if (referred_offset < end_of_code_offset)
1018  E :        continue;
1019    :  
1020    :      // Determine if this offset points at another reference.
1021  E :      bool ref_at_offset = false;
1022  E :      if (ref_it != ref_map.end()) {
1023    :        // Step past any references.
1024  E :        while (ref_it != ref_map.end() && ref_it->first < referred_offset)
1025  E :          ++ref_it;
1026    :  
1027    :        // Stop the disassembly if the next byte is data. Namely, it coincides
1028    :        // with a reference.
1029  E :        if (ref_it->first == referred_offset)
1030  E :          ref_at_offset = true;
1031    :      }
1032    :  
1033    :      // Build and set the data label.
1034  E :      BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1035  E :      const char* name = NULL;
1036  E :      if (ref_at_offset) {
1037  E :        name = kJumpTable;
1038  E :        attr |= BlockGraph::JUMP_TABLE_LABEL;
1039  E :      } else {
1040  E :        name = kCaseTable;
1041  E :        attr |= BlockGraph::CASE_TABLE_LABEL;
1042    :      }
1043  E :      if (!AddLabelToBlock(referred_offset, name, attr, block))
1044  i :        return false;
1045  E :      data_label_added = true;
1046  E :    }
1047    :  
1048  E :    if (!data_label_added) {
1049  i :      block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
1050  i :      VLOG(1) << "Disassembled into data but found no references to it for "
1051    :              << BlockInfo(block, block_addr) << ".";
1052  i :      return true;
1053    :    }
1054    :  
1055  E :    return true;
1056  E :  }
1057    :  
1058    :  bool JumpAndCaseTableAlreadyLabelled(const Block* block,
1059    :                                       Offset offset,
1060  E :                                       BlockGraph::LabelAttributes attr) {
1061  E :    DCHECK(block != NULL);
1062    :  
1063    :    // We can't say anything about blocks that we were not able to disassemble.
1064  E :    if (block->attributes() & BlockGraph::ERRORED_DISASSEMBLY)
1065  i :      return true;
1066    :  
1067  E :    BlockGraph::Label label;
1068  E :    if (!block->GetLabel(offset, &label)) {
1069  i :      LOG(ERROR) << "Expected data label at offset " << offset << " of "
1070    :                 << BlockInfo(block) << ".";
1071  i :      return false;
1072    :    }
1073    :  
1074  E :    if ((label.attributes() & attr) == attr)
1075  E :      return true;
1076    :  
1077  i :    LOG(ERROR) << "Label at offset " << offset << " of " << BlockInfo(block)
1078    :               << " has attributes "
1079    :               << BlockGraph::BlockAttributesToString(block->attributes())
1080    :               << " but expected at least "
1081    :               << BlockGraph::BlockAttributesToString(attr) << ".";
1082    :  
1083  i :    return false;
1084  E :  }
1085    :  
1086    :  }  // namespace
1087    :  
1088    :  // We use ", " as a separator between symbol names. We sometimes see commas
1089    :  // in symbol names but do not see whitespace. Thus, this provides a useful
1090    :  // separator that is also human friendly to read.
1091    :  const char NewDecomposer::kLabelNameSep[] = ", ";
1092    :  
1093    :  // This is by CreateBlocksFromCoffGroups to communicate shared state to
1094    :  // VisitLinkerSymbol via the VisitSymbols helper function.
1095    :  struct NewDecomposer::VisitLinkerSymbolContext {
1096    :    int current_group_index;
1097    :    std::string current_group_prefix;
1098    :    RelativeAddress current_group_start;
1099    :  
1100    :    // These are the set of patterns that indicate bracketing groups. They
1101    :    // should match both the opening and the closing symbol, and have at least
1102    :    // one match group returning the common prefix.
1103    :    std::vector<RE> bracketing_groups;
1104    :  
1105  E :    VisitLinkerSymbolContext() : current_group_index(-1) {
1106    :      // Matches groups like: .CRT$XCA -> .CRT$XCZ
1107  E :      bracketing_groups.push_back(RE("(\\.CRT\\$X.)[AZ]"));
1108    :      // Matches groups like: .rtc$IAA -> .rtc$IZZ
1109  E :      bracketing_groups.push_back(RE("(\\.rtc\\$.*)(AA|ZZ)"));
1110    :      // Matches exactly: ATL$__a -> ATL$__z
1111  E :      bracketing_groups.push_back(RE("(ATL\\$__)[az]"));
1112    :      // Matches exactly: .tls -> .tls$ZZZ
1113  E :      bracketing_groups.push_back(RE("(\\.tls)(\\$ZZZ)?"));
1114  E :    }
1115    :  
1116    :   private:
1117    :    DISALLOW_COPY_AND_ASSIGN(VisitLinkerSymbolContext);
1118    :  };
1119    :  
1120    :  NewDecomposer::NewDecomposer(const PEFile& image_file)
1121    :      : image_file_(image_file), parse_debug_info_(true), image_layout_(NULL),
1122  E :        image_(NULL), current_block_(NULL), current_scope_count_(0) {
1123  E :  }
1124    :  
1125  E :  bool NewDecomposer::Decompose(ImageLayout* image_layout) {
1126  E :    DCHECK(image_layout != NULL);
1127    :  
1128    :    // The temporaries should be NULL.
1129  E :    DCHECK(image_layout_ == NULL);
1130  E :    DCHECK(image_ == NULL);
1131    :  
1132    :    // We start by finding the PDB path.
1133  E :    if (!FindAndValidatePdbPath())
1134  E :      return false;
1135  E :    DCHECK(!pdb_path_.empty());
1136    :  
1137    :    // Load the serialized block-graph from the PDB if it exists. This allows
1138    :    // round-trip decomposition.
1139  E :    bool stream_exists = false;
1140    :    if (LoadBlockGraphFromPdb(
1141  E :            pdb_path_, image_file_, image_layout, &stream_exists)) {
1142  E :      return true;
1143  E :    } else if (stream_exists) {
1144    :      // If the stream exists but hasn't been loaded we return an error. At this
1145    :      // point an error message has already been logged if there was one.
1146  i :      return false;
1147    :    }
1148    :  
1149    :    // At this point a full decomposition needs to be performed.
1150  E :    image_layout_ = image_layout;
1151  E :    image_ = &(image_layout->blocks);
1152  E :    bool success = DecomposeImpl();
1153  E :    image_layout_ = NULL;
1154  E :    image_ = NULL;
1155    :  
1156  E :    return success;
1157  E :  }
1158    :  
1159  E :  bool NewDecomposer::FindAndValidatePdbPath() {
1160    :    // Manually find the PDB path if it is not specified.
1161  E :    if (pdb_path_.empty()) {
1162    :      if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
1163  E :          pdb_path_.empty()) {
1164  i :        LOG(ERROR) << "Unable to find PDB file for module: "
1165    :                   << image_file_.path().value();
1166  i :        return false;
1167    :      }
1168    :    }
1169  E :    DCHECK(!pdb_path_.empty());
1170    :  
1171  E :    if (!file_util::PathExists(pdb_path_)) {
1172  E :      LOG(ERROR) << "Path not found: " << pdb_path_.value();
1173  E :      return false;
1174    :    }
1175    :  
1176  E :    if (!pe::PeAndPdbAreMatched(image_file_.path(), pdb_path_)) {
1177  i :      LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
1178    :                 << "module \"" << image_file_.path().value() << "\".";
1179  i :      return false;
1180    :    }
1181    :  
1182  E :    return true;
1183  E :  }
1184    :  
1185    :  bool NewDecomposer::LoadBlockGraphFromPdbStream(
1186    :      const PEFile& image_file,
1187    :      pdb::PdbStream* block_graph_stream,
1188  E :      ImageLayout* image_layout) {
1189  E :    DCHECK(block_graph_stream != NULL);
1190  E :    DCHECK(image_layout != NULL);
1191  E :    LOG(INFO) << "Reading block-graph and image layout from the PDB.";
1192    :  
1193    :    // Initialize an input archive pointing to the stream.
1194  E :    scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
1195  E :    if (!byte_stream->Init(block_graph_stream))
1196  i :      return false;
1197  E :    DCHECK(byte_stream.get() != NULL);
1198    :  
1199  E :    core::ScopedInStreamPtr pdb_in_stream;
1200    :    pdb_in_stream.reset(core::CreateByteInStream(
1201  E :        byte_stream->data(), byte_stream->data() + byte_stream->length()));
1202    :  
1203    :    // Read the header.
1204  E :    uint32 stream_version = 0;
1205  E :    unsigned char compressed = 0;
1206    :    if (!pdb_in_stream->Read(sizeof(stream_version),
1207    :                             reinterpret_cast<core::Byte*>(&stream_version)) ||
1208    :        !pdb_in_stream->Read(sizeof(compressed),
1209  E :                             reinterpret_cast<core::Byte*>(&compressed))) {
1210  i :      LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
1211  i :      return false;
1212    :    }
1213    :  
1214    :    // Check the stream version.
1215  E :    if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
1216  E :      LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
1217    :                 << " version (got " << stream_version << ", expected "
1218    :                 << pdb::kSyzygyBlockGraphStreamVersion << ").";
1219  E :      return false;
1220    :    }
1221    :  
1222    :    // If the stream is compressed insert the decompression filter.
1223  E :    core::InStream* in_stream = pdb_in_stream.get();
1224  E :    scoped_ptr<core::ZInStream> zip_in_stream;
1225  E :    if (compressed != 0) {
1226  E :      zip_in_stream.reset(new core::ZInStream(in_stream));
1227  E :      if (!zip_in_stream->Init()) {
1228  i :        LOG(ERROR) << "Unable to initialize ZInStream.";
1229  i :        return false;
1230    :      }
1231  E :      in_stream = zip_in_stream.get();
1232    :    }
1233    :  
1234    :    // Deserialize the image-layout.
1235  E :    core::NativeBinaryInArchive in_archive(in_stream);
1236  E :    block_graph::BlockGraphSerializer::Attributes attributes = 0;
1237    :    if (!LoadBlockGraphAndImageLayout(
1238  E :        image_file, &attributes, image_layout, &in_archive)) {
1239  i :      LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
1240  i :      return false;
1241    :    }
1242    :  
1243  E :    return true;
1244  E :  }
1245    :  
1246    :  bool NewDecomposer::LoadBlockGraphFromPdb(const FilePath& pdb_path,
1247    :                                            const PEFile& image_file,
1248    :                                            ImageLayout* image_layout,
1249  E :                                            bool* stream_exists) {
1250  E :    DCHECK(image_layout != NULL);
1251  E :    DCHECK(stream_exists != NULL);
1252    :  
1253  E :    pdb::PdbFile pdb_file;
1254  E :    pdb::PdbReader pdb_reader;
1255  E :    if (!pdb_reader.Read(pdb_path, &pdb_file)) {
1256  i :      LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
1257    :                 << "\".";
1258  i :      return NULL;
1259    :    }
1260    :  
1261    :    // Try to get the block-graph stream from the PDB.
1262  E :    scoped_refptr<pdb::PdbStream> block_graph_stream;
1263    :    if (!pdb::LoadNamedStreamFromPdbFile(pdb::kSyzygyBlockGraphStreamName,
1264    :                                         &pdb_file,
1265    :                                         &block_graph_stream) ||
1266  E :        block_graph_stream.get() == NULL) {
1267  E :      *stream_exists = false;
1268  E :      return false;
1269    :    }
1270  E :    if (block_graph_stream->length() == 0) {
1271  i :      *stream_exists = false;
1272  i :      LOG(WARNING) << "The block-graph stream is empty, ignoring it.";
1273  i :      return false;
1274    :    }
1275    :  
1276    :    // The PDB contains a block-graph stream, the block-graph and the image layout
1277    :    // will be read from this stream.
1278  E :    *stream_exists = true;
1279    :    if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
1280  E :                                     image_layout)) {
1281  i :      return false;
1282    :    }
1283    :  
1284  E :    return true;
1285  E :  }
1286    :  
1287  E :  bool NewDecomposer::DecomposeImpl() {
1288    :    // Instantiate and initialize our Debug Interface Access session. This logs
1289    :    // verbosely for us.
1290  E :    ScopedComPtr<IDiaDataSource> dia_source;
1291  E :    ScopedComPtr<IDiaSession> dia_session;
1292  E :    ScopedComPtr<IDiaSymbol> global;
1293    :    if (!InitializeDia(image_file_, pdb_path_, dia_source.Receive(),
1294  E :                       dia_session.Receive(), global.Receive())) {
1295  i :      return false;
1296    :    }
1297    :  
1298    :    // Copy the image headers to the layout.
1299    :    CopySectionHeadersToImageLayout(
1300    :        image_file_.nt_headers()->FileHeader.NumberOfSections,
1301    :        image_file_.section_headers(),
1302  E :        &(image_layout_->sections));
1303    :  
1304    :    // Create the sections in the underlying block-graph.
1305  E :    if (!CreateBlockGraphSections())
1306  i :      return false;
1307    :  
1308    :    // We scope the first few operations so that we don't keep the intermediate
1309    :    // references around any longer than we have to.
1310    :    {
1311  E :      IntermediateReferences references;
1312    :  
1313    :      // First we parse out the PE blocks.
1314  E :      if (!CreatePEImageBlocksAndReferences(&references))
1315  i :        return false;
1316    :  
1317    :      // Now we parse the COFF group symbols from the linker's symbol stream.
1318    :      // These indicate things like static initializers, which must stay together
1319    :      // in a single block.
1320  E :      if (!CreateBlocksFromCoffGroups())
1321  i :        return false;
1322    :  
1323    :      // Next we parse out section contributions. Some of these may coincide with
1324    :      // existing PE parsed blocks, but when they do we expect them to be exact
1325    :      // collisions.
1326  E :      if (!CreateBlocksFromSectionContribs(dia_session.get()))
1327  i :        return false;
1328    :  
1329    :      // Flesh out the rest of the image with gap blocks.
1330  E :      if (!CreateGapBlocks())
1331  i :        return false;
1332    :  
1333    :      // Finalize the PE-parsed intermediate references.
1334  E :      if (!FinalizeIntermediateReferences(references))
1335  i :        return false;
1336  E :    }
1337    :  
1338    :    // Parse the fixups and use them to create references.
1339  E :    if (!CreateReferencesFromFixups(dia_session.get()))
1340  i :      return false;
1341    :  
1342    :    // Disassemble code blocks and use the results to infer case and jump tables.
1343  E :    if (!DisassembleCodeBlocksAndLabelData())
1344  i :      return false;
1345    :  
1346    :    // Annotate the block-graph with symbol information.
1347  E :    if (parse_debug_info_ && !ProcessSymbols(global.get()))
1348  i :      return false;
1349    :  
1350  E :    return true;
1351  E :  }
1352    :  
1353  E :  bool NewDecomposer::CreateBlockGraphSections() {
1354    :    // Iterate through the image sections, and create sections in the BlockGraph.
1355  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1356  E :    for (size_t i = 0; i < num_sections; ++i) {
1357  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1358  E :      std::string name = pe::PEFile::GetSectionName(*header);
1359    :      BlockGraph::Section* section = image_->graph()->AddSection(
1360  E :          name, header->Characteristics);
1361  E :      DCHECK(section != NULL);
1362    :  
1363    :      // For now, we expect them to have been created with the same IDs as those
1364    :      // in the original image.
1365  E :      if (section->id() != i) {
1366  i :        LOG(ERROR) << "Unexpected section ID.";
1367  i :        return false;
1368    :      }
1369  E :    }
1370    :  
1371  E :    return true;
1372  E :  }
1373    :  
1374    :  bool NewDecomposer::CreatePEImageBlocksAndReferences(
1375  E :      IntermediateReferences* references) {
1376  E :    DCHECK(references != NULL);
1377    :  
1378    :    PEFileParser::AddReferenceCallback add_reference(
1379  E :        base::Bind(&AddIntermediateReference, base::Unretained(references)));
1380  E :    PEFileParser parser(image_file_, image_, add_reference);
1381  E :    PEFileParser::PEHeader header;
1382  E :    if (!parser.ParseImage(&header)) {
1383  i :      LOG(ERROR) << "Unable to parse PE image.";
1384  i :      return false;
1385    :    }
1386    :  
1387  E :    return true;
1388  E :  }
1389    :  
1390  E :  bool NewDecomposer::CreateBlocksFromCoffGroups() {
1391  E :    pdb::PdbFile pdb_file;
1392  E :    pdb::PdbReader pdb_reader;
1393  E :    if (!pdb_reader.Read(pdb_path_, &pdb_file)) {
1394  i :      LOG(ERROR) << "Failed to load PDB: " << pdb_path_.value();
1395  i :      return false;
1396    :    }
1397    :  
1398  E :    scoped_refptr<pdb::PdbStream> symbols = GetLinkerSymbolStream(pdb_file);
1399    :  
1400    :    // Process the symbols in the linker module symbol stream.
1401  E :    VisitLinkerSymbolContext context;
1402    :    pdb::VisitSymbolsCallback callback = base::Bind(
1403    :        &NewDecomposer::VisitLinkerSymbol,
1404    :        base::Unretained(this),
1405  E :        base::Unretained(&context));
1406  E :    if (!pdb::VisitSymbols(callback, symbols->length(), true, symbols.get()))
1407  i :      return false;
1408    :  
1409    :    // Bail if we did not encounter a closing bracketing symbol where one was
1410    :    // expected.
1411  E :    if (context.current_group_index != -1) {
1412  i :      LOG(ERROR) << "Unable to close bracketed COFF group \""
1413    :                 << context.current_group_prefix << "\".";
1414  i :      return false;
1415    :    }
1416    :  
1417  E :    return true;
1418  E :  }
1419    :  
1420  E :  bool NewDecomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1421  E :    ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1422    :    SearchResult search_result = FindDiaTable(session,
1423  E :                                              section_contribs.Receive());
1424  E :    if (search_result != kSearchSucceeded) {
1425  i :      if (search_result == kSearchFailed)
1426  i :        LOG(ERROR) << "No section contribution table found.";
1427  i :      return false;
1428    :    }
1429    :  
1430  E :    size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1431    :  
1432  E :    LONG count = 0;
1433  E :    if (section_contribs->get_Count(&count) != S_OK) {
1434  i :      LOG(ERROR) << "Failed to get section contributions enumeration length.";
1435  i :      return false;
1436    :    }
1437    :  
1438  E :    for (LONG visited = 0; visited < count; ++visited) {
1439  E :      ScopedComPtr<IDiaSectionContrib> section_contrib;
1440  E :      ULONG fetched = 0;
1441  E :      HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1442    :      // The standard way to end an enumeration (according to the docs) is by
1443    :      // returning S_FALSE and setting fetched to 0. We don't actually see this,
1444    :      // but it wouldn't be an error if we did.
1445  E :      if (hr == S_FALSE && fetched == 0)
1446  i :        break;
1447  E :      if (hr != S_OK) {
1448  i :        LOG(ERROR) << "Failed to get DIA section contribution: "
1449    :                   << com::LogHr(hr) << ".";
1450  i :        return false;
1451    :      }
1452    :      // We actually end up seeing S_OK and fetched == 0 when the enumeration
1453    :      // terminates, which goes against the publishes documentations.
1454  E :      if (fetched == 0)
1455  i :        break;
1456    :  
1457  E :      DWORD rva = 0;
1458  E :      DWORD length = 0;
1459  E :      DWORD section_id = 0;
1460  E :      BOOL code = FALSE;
1461  E :      ScopedComPtr<IDiaSymbol> compiland;
1462  E :      ScopedBstr bstr_name;
1463    :      if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1464    :          (hr = section_contrib->get_length(&length)) != S_OK ||
1465    :          (hr = section_contrib->get_addressSection(&section_id)) != S_OK ||
1466    :          (hr = section_contrib->get_code(&code)) != S_OK ||
1467    :          (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1468  E :          (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1469  i :        LOG(ERROR) << "Failed to get section contribution properties: "
1470    :                   << com::LogHr(hr) << ".";
1471  i :        return false;
1472    :      }
1473    :  
1474    :      // Determine if this function was built by a supported compiler.
1475    :      bool is_built_by_supported_compiler =
1476  E :          IsBuiltBySupportedCompiler(compiland.get());
1477    :  
1478    :      // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1479  E :      DCHECK_LT(0u, section_id);
1480  E :      --section_id;
1481    :  
1482    :      // We don't parse the resource section, as it is parsed by the PEFileParser.
1483  E :      if (section_id == rsrc_id)
1484  E :        continue;
1485    :  
1486  E :      std::string name;
1487  E :      if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1488  i :        LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1489  i :        return false;
1490    :      }
1491    :  
1492    :      // TODO(chrisha): We see special section contributions with the name
1493    :      //     "* CIL *". These are concatenations of data symbols and can very
1494    :      //     likely be chunked using symbols directly. A cursory visual inspection
1495    :      //     of symbol names hints that these might be related to WPO.
1496    :  
1497    :      // Create the block.
1498    :      BlockType block_type =
1499  E :          code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1500    :      Block* block = CreateBlockOrFindCoveringPeBlock(
1501  E :          block_type, RelativeAddress(rva), length, name);
1502  E :      if (block == NULL) {
1503  i :        LOG(ERROR) << "Unable to create block for compiland \"" << name << "\".";
1504  i :        return false;
1505    :      }
1506    :  
1507    :      // Set the block attributes.
1508  E :      block->set_attribute(BlockGraph::SECTION_CONTRIB);
1509  E :      if (!is_built_by_supported_compiler)
1510  E :        block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1511  E :    }
1512    :  
1513  E :    return true;
1514  E :  }
1515    :  
1516  E :  bool NewDecomposer::CreateGapBlocks() {
1517  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1518    :  
1519    :    // Iterate through all the image sections.
1520  E :    for (size_t i = 0; i < num_sections; ++i) {
1521  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1522  E :      DCHECK(header != NULL);
1523    :  
1524  E :      BlockType type = BlockGraph::CODE_BLOCK;
1525  E :      const char* section_type = NULL;
1526  E :      switch (GetSectionType(header)) {
1527    :        case kSectionCode:
1528  E :          type = BlockGraph::CODE_BLOCK;
1529  E :          section_type = "code";
1530  E :          break;
1531    :  
1532    :        case kSectionData:
1533  E :          type = BlockGraph::DATA_BLOCK;
1534  E :          section_type = "data";
1535  E :          break;
1536    :  
1537    :        default:
1538  i :          continue;
1539    :      }
1540    :  
1541  E :      if (!CreateSectionGapBlocks(header, type)) {
1542  i :        LOG(ERROR) << "Unable to create gap blocks for " << section_type
1543    :                   << " section \"" << header->Name << "\".";
1544  i :        return false;
1545    :      }
1546  E :    }
1547    :  
1548  E :    return true;
1549  E :  }
1550    :  
1551    :  bool NewDecomposer::FinalizeIntermediateReferences(
1552  E :      const IntermediateReferences& references) {
1553  E :    for (size_t i = 0; i < references.size(); ++i) {
1554    :      // This logs verbosely for us.
1555    :      if (!CreateReference(references[i].src_addr,
1556    :                           references[i].size,
1557    :                           references[i].type,
1558    :                           references[i].dst_addr,
1559    :                           references[i].dst_addr,
1560  E :                           image_)) {
1561  i :        return false;
1562    :      }
1563  E :    }
1564  E :    return true;
1565  E :  }
1566    :  
1567  E :  bool NewDecomposer::DisassembleCodeBlocksAndLabelData() {
1568  E :    DCHECK(image_ != NULL);
1569    :  
1570    :    const BlockGraph::Block* dos_header_block =
1571  E :        image_->GetBlockByAddress(RelativeAddress(0));
1572  E :    DCHECK(dos_header_block != NULL);
1573    :  
1574    :    const BlockGraph::Block* nt_headers_block =
1575  E :        GetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
1576  E :    if (nt_headers_block == NULL) {
1577  i :      LOG(ERROR) << "Unable to get NT headers block for image.";
1578  i :      return false;
1579    :    }
1580    :  
1581    :    // GetNtHeadersBlockFromDosHeaderBlock sanity checks things so we can cast
1582    :    // with impunity.
1583    :    const IMAGE_NT_HEADERS* nt_headers =
1584  E :        reinterpret_cast<const IMAGE_NT_HEADERS*>(nt_headers_block->data());
1585  E :    core::AbsoluteAddress image_base(nt_headers->OptionalHeader.ImageBase);
1586    :  
1587    :    // Walk through the blocks and disassemble each one of them.
1588  E :    BlockGraph::AddressSpace::RangeMapConstIter it = image_->begin();
1589  E :    for (; it != image_->end(); ++it) {
1590  E :      BlockGraph::Block* block = it->second;
1591    :  
1592  E :      if (block->type() != BlockGraph::CODE_BLOCK)
1593  E :        continue;
1594    :  
1595  E :      core::AbsoluteAddress abs_addr(image_base + it->first.start().value());
1596    :      if (!DisassembleCodeBlockAndLabelData(
1597  E :          parse_debug_info_, image_base, abs_addr, image_, block)) {
1598  i :        return false;
1599    :      }
1600  E :    }
1601    :  
1602  E :    return true;
1603  E :  }
1604    :  
1605  E :  bool NewDecomposer::CreateReferencesFromFixups(IDiaSession* session) {
1606  E :    DCHECK(session != NULL);
1607    :  
1608  E :    PEFile::RelocSet reloc_set;
1609  E :    if (!image_file_.DecodeRelocs(&reloc_set))
1610  i :      return false;
1611    :  
1612  E :    OMAPs omap_from;
1613  E :    PdbFixups fixups;
1614  E :    if (!LoadDebugStreams(session, &fixups, &omap_from))
1615  i :      return false;
1616    :  
1617    :    // While creating references from the fixups this removes the
1618    :    // corresponding reference data from the relocs. We use this as a kind of
1619    :    // double-entry bookkeeping to ensure all is well and right in the world.
1620    :    if (!CreateReferencesFromFixupsImpl(image_file_, fixups, omap_from,
1621  E :                                        &reloc_set, image_)) {
1622  i :      return false;
1623    :    }
1624    :  
1625  E :    if (!reloc_set.empty()) {
1626  i :      LOG(ERROR) << "Found reloc entries without matching FIXUP entries.";
1627  i :      return false;
1628    :    }
1629    :  
1630  E :    return true;
1631  E :  }
1632    :  
1633  E :  bool NewDecomposer::ProcessSymbols(IDiaSymbol* root) {
1634  E :    DCHECK(root != NULL);
1635    :  
1636    :    DiaBrowser::MatchCallback on_push_function_or_thunk_symbol(
1637    :        base::Bind(&NewDecomposer::OnPushFunctionOrThunkSymbol,
1638  E :                   base::Unretained(this)));
1639    :    DiaBrowser::MatchCallback on_pop_function_or_thunk_symbol(
1640    :        base::Bind(&NewDecomposer::OnPopFunctionOrThunkSymbol,
1641  E :                   base::Unretained(this)));
1642    :    DiaBrowser::MatchCallback on_function_child_symbol(
1643    :        base::Bind(&NewDecomposer::OnFunctionChildSymbol,
1644  E :                   base::Unretained(this)));
1645    :    DiaBrowser::MatchCallback on_data_symbol(
1646  E :        base::Bind(&NewDecomposer::OnDataSymbol, base::Unretained(this)));
1647    :    DiaBrowser::MatchCallback on_public_symbol(
1648  E :        base::Bind(&NewDecomposer::OnPublicSymbol, base::Unretained(this)));
1649    :    DiaBrowser::MatchCallback on_label_symbol(
1650  E :        base::Bind(&NewDecomposer::OnLabelSymbol, base::Unretained(this)));
1651    :  
1652  E :    DiaBrowser dia_browser;
1653    :  
1654    :    // Find thunks.
1655    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagThunk),
1656    :                           on_push_function_or_thunk_symbol,
1657  E :                           on_pop_function_or_thunk_symbol);
1658    :  
1659    :    // Find functions and all data, labels, callsites, debug start/end and block
1660    :    // symbols below them. This is done in one single pattern so that the
1661    :    // function pushes/pops happen in the right order.
1662    :    dia_browser.AddPattern(
1663    :        Seq(Opt(SymTagCompiland),
1664    :            Callback(Or(SymTagFunction, SymTagThunk),
1665    :                     on_push_function_or_thunk_symbol,
1666    :                     on_pop_function_or_thunk_symbol),
1667    :            Star(SymTagBlock),
1668    :            Or(SymTagData,
1669    :               SymTagLabel,
1670    :               SymTagBlock,
1671    :               SymTagFuncDebugStart,
1672    :               SymTagFuncDebugEnd,
1673    :               SymTagCallSite)),
1674  E :        on_function_child_symbol);
1675    :  
1676    :    // Global data and code label symbols.
1677    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagLabel),
1678  E :                           on_label_symbol);
1679    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1680  E :                           on_data_symbol);
1681    :  
1682    :    // Public symbols. These provide decorated names without any type info, but
1683    :    // are useful for debugging.
1684  E :    dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
1685    :  
1686  E :    return dia_browser.Browse(root);
1687  E :  }
1688    :  
1689    :  bool NewDecomposer::VisitLinkerSymbol(VisitLinkerSymbolContext* context,
1690    :                                        uint16 symbol_length,
1691    :                                        uint16 symbol_type,
1692  E :                                        pdb::PdbStream* stream) {
1693  E :    DCHECK(context != NULL);
1694  E :    DCHECK(stream != NULL);
1695    :  
1696  E :    if (symbol_type != cci::S_COFFGROUP)
1697  E :      return true;
1698    :  
1699  E :    std::vector<uint8> buffer;
1700    :    const cci::CoffGroupSym* coffgroup =
1701  E :        ParseSymbol<cci::CoffGroupSym>(symbol_length, stream, &buffer);
1702  E :    if (coffgroup == NULL)
1703  i :      return false;
1704    :  
1705    :    // The PDB numbers sections starting at index 1 but we use index 0.
1706    :    RelativeAddress rva(image_layout_->sections[coffgroup->seg - 1].addr +
1707  E :        coffgroup->off);
1708    :  
1709    :    // We are looking for an opening symbol.
1710  E :    if (context->current_group_index == -1) {
1711  E :      for (size_t i = 0; i < context->bracketing_groups.size(); ++i) {
1712  E :        std::string prefix;
1713  E :        if (context->bracketing_groups[i].FullMatch(coffgroup->name, &prefix)) {
1714  E :          context->current_group_index = i;
1715  E :          context->current_group_prefix = prefix;
1716  E :          context->current_group_start = rva;
1717  E :          return true;
1718    :        }
1719  E :      }
1720    :  
1721    :      // No opening symbol was encountered. We can safely ignore this
1722    :      // COFF group symbol.
1723  E :      return true;
1724    :    }
1725    :  
1726    :    // If we get here we've found an opening symbol and we're looking for the
1727    :    // matching closing symbol.
1728  E :    std::string prefix;
1729    :    if (!context->bracketing_groups[context->current_group_index].FullMatch(
1730  E :            coffgroup->name, &prefix)) {
1731  E :      return true;
1732    :    }
1733    :  
1734  E :    if (prefix != context->current_group_prefix) {
1735    :      // We see another symbol open/close while already in an opened symbol.
1736    :      // This indicates nested bracketing information, which we've never seen
1737    :      // before.
1738  i :      LOG(ERROR) << "Encountered nested bracket symbol \"" << prefix
1739    :                 << "\" while in \"" << context->current_group_prefix << "\".";
1740  i :      return false;
1741    :    }
1742    :  
1743  E :    RelativeAddress end = rva + coffgroup->cb;
1744  E :    DCHECK_LT(context->current_group_start, end);
1745    :  
1746    :    // Create a block for this bracketed COFF group.
1747    :    BlockGraph::Block* block = CreateBlock(
1748    :        BlockGraph::DATA_BLOCK,
1749    :        context->current_group_start,
1750    :        end - context->current_group_start,
1751  E :        base::StringPrintf("Bracketed COFF group: %s", prefix.c_str()));
1752  E :    if (block == NULL) {
1753  i :      LOG(ERROR) << "Failed to create bracketed COFF group \""
1754    :                 << prefix << "\".";
1755  i :      return false;
1756    :    }
1757  E :    block->set_attribute(BlockGraph::COFF_GROUP);
1758    :  
1759    :    // Indicate that this block is closed and we're looking for another opening
1760    :    // bracket symbol.
1761  E :    context->current_group_index = -1;
1762  E :    context->current_group_prefix.clear();
1763  E :    context->current_group_start = RelativeAddress(0);
1764    :  
1765  E :    return true;
1766  E :  }
1767    :  
1768    :  DiaBrowser::BrowserDirective NewDecomposer::OnPushFunctionOrThunkSymbol(
1769    :      const DiaBrowser& dia_browser,
1770    :      const DiaBrowser::SymTagVector& sym_tags,
1771  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1772  E :    DCHECK(!symbols.empty());
1773  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1774  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
1775    :  
1776  E :    DCHECK(current_block_ == NULL);
1777  E :    DCHECK_EQ(current_address_, RelativeAddress(0));
1778  E :    DCHECK_EQ(0u, current_scope_count_);
1779    :  
1780  E :    HRESULT hr = E_FAIL;
1781  E :    DWORD location_type = LocIsNull;
1782  E :    DWORD rva = 0;
1783  E :    ULONGLONG length = 0;
1784  E :    ScopedBstr name_bstr;
1785    :    if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1786    :        FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1787    :        FAILED(hr = symbol->get_length(&length)) ||
1788  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1789  i :      LOG(ERROR) << "Failed to get function/thunk properties: " << com::LogHr(hr)
1790    :                 << ".";
1791  i :      return DiaBrowser::kBrowserAbort;
1792    :    }
1793    :  
1794    :    // We only care about functions with static storage. We can stop looking at
1795    :    // things below this node, as we won't be able to resolve them either.
1796  E :    if (location_type != LocIsStatic)
1797  i :      return DiaBrowser::kBrowserTerminatePath;
1798    :  
1799  E :    RelativeAddress addr(rva);
1800  E :    Block* block = image_->GetBlockByAddress(addr);
1801  E :    CHECK(block != NULL);
1802  E :    RelativeAddress block_addr;
1803  E :    CHECK(image_->GetAddressOf(block, &block_addr));
1804  E :    DCHECK(InRange(addr, block_addr, block->size()));
1805    :  
1806  E :    std::string name;
1807  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1808  i :      LOG(ERROR) << "Failed to convert function/thunk name to UTF8.";
1809  i :      return DiaBrowser::kBrowserAbort;
1810    :    }
1811    :  
1812    :    // We know the function starts in this block but we need to make sure its
1813    :    // end does not extend past the end of the block.
1814  E :    if (addr + length > block_addr + block->size()) {
1815  i :      LOG(ERROR) << "Got function/thunk \"" << name << "\" that is not contained "
1816    :                 << "by section contribution \"" << block->name() << "\".";
1817  i :      return DiaBrowser::kBrowserAbort;
1818    :    }
1819    :  
1820  E :    Offset offset = addr - block_addr;
1821  E :    if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
1822  i :      return DiaBrowser::kBrowserAbort;
1823    :  
1824    :    // Keep track of the generated block. We will use this when parsing symbols
1825    :    // that belong to this function. This prevents us from having to do repeated
1826    :    // lookups and also allows us to associate labels outside of the block to the
1827    :    // correct block.
1828  E :    current_block_ = block;
1829  E :    current_address_ = block_addr;
1830    :  
1831    :    // Certain properties are not defined on all blocks, so the following calls
1832    :    // may return S_FALSE.
1833  E :    BOOL no_return = FALSE;
1834  E :    if (symbol->get_noReturn(&no_return) != S_OK)
1835  E :      no_return = FALSE;
1836    :  
1837  E :    BOOL has_inl_asm = FALSE;
1838  E :    if (symbol->get_hasInlAsm(&has_inl_asm) != S_OK)
1839  E :      has_inl_asm = FALSE;
1840    :  
1841  E :    BOOL has_eh = FALSE;
1842  E :    if (symbol->get_hasEH(&has_eh) != S_OK)
1843  E :      has_eh = FALSE;
1844    :  
1845  E :    BOOL has_seh = FALSE;
1846  E :    if (symbol->get_hasSEH(&has_seh) != S_OK)
1847  E :      has_seh = FALSE;
1848    :  
1849    :    // Set the block attributes.
1850  E :    if (no_return == TRUE)
1851  E :      block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1852  E :    if (has_inl_asm == TRUE)
1853  E :      block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1854  E :    if (has_eh || has_seh)
1855  E :      block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1856  E :    if (IsSymTag(symbol, SymTagThunk))
1857  E :      block->set_attribute(BlockGraph::THUNK);
1858    :  
1859  E :    return DiaBrowser::kBrowserContinue;
1860  E :  }
1861    :  
1862    :  DiaBrowser::BrowserDirective NewDecomposer::OnPopFunctionOrThunkSymbol(
1863    :      const DiaBrowser& dia_browser,
1864    :      const DiaBrowser::SymTagVector& sym_tags,
1865  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1866    :    // Simply clean up the current function block and address.
1867  E :    current_block_ = NULL;
1868  E :    current_address_ = RelativeAddress(0);
1869  E :    current_scope_count_ = 0;
1870  E :    return DiaBrowser::kBrowserContinue;
1871  E :  }
1872    :  
1873    :  DiaBrowser::BrowserDirective NewDecomposer::OnFunctionChildSymbol(
1874    :        const DiaBrowser& dia_browser,
1875    :        const DiaBrowser::SymTagVector& sym_tags,
1876  E :        const DiaBrowser::SymbolPtrVector& symbols) {
1877  E :    DCHECK(!symbols.empty());
1878  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1879    :  
1880    :    // This can only be called from the context of a function, so we expect the
1881    :    // parent function block to be set and remembered.
1882  E :    DCHECK(current_block_ != NULL);
1883    :  
1884    :    // The set of sym tags here should match the pattern used in the DiaBrowser
1885    :    // instance set up in ProcessSymbols.
1886  E :    switch (sym_tags.back()) {
1887    :      case SymTagData:
1888  E :        return OnDataSymbol(dia_browser, sym_tags, symbols);
1889    :  
1890    :      case SymTagLabel:
1891  E :        return OnLabelSymbol(dia_browser, sym_tags, symbols);
1892    :  
1893    :      case SymTagBlock:
1894    :      case SymTagFuncDebugStart:
1895    :      case SymTagFuncDebugEnd:
1896  E :        return OnScopeSymbol(sym_tags.back(), symbols.back());
1897    :  
1898    :      case SymTagCallSite:
1899  E :        return OnCallSiteSymbol(symbols.back());
1900    :  
1901    :      default:
1902    :        break;
1903    :    }
1904    :  
1905  i :    LOG(ERROR) << "Unhandled function child symbol: " << sym_tags.back() << ".";
1906  i :    return DiaBrowser::kBrowserAbort;
1907  E :  }
1908    :  
1909    :  DiaBrowser::BrowserDirective NewDecomposer::OnDataSymbol(
1910    :      const DiaBrowser& dia_browser,
1911    :      const DiaBrowser::SymTagVector& sym_tags,
1912  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1913  E :    DCHECK(!symbols.empty());
1914  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1915  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
1916    :  
1917  E :    HRESULT hr = E_FAIL;
1918  E :    DWORD location_type = LocIsNull;
1919  E :    DWORD rva = 0;
1920  E :    ScopedBstr name_bstr;
1921    :    if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1922    :        FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1923  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1924  i :      LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1925  i :      return DiaBrowser::kBrowserAbort;
1926    :    }
1927    :  
1928    :    // Symbols with an address of zero are essentially invalid. They appear to
1929    :    // have been optimized away by the compiler, but they are still reported.
1930  E :    if (rva == 0)
1931  E :      return DiaBrowser::kBrowserTerminatePath;
1932    :  
1933    :    // We only care about functions with static storage. We can stop looking at
1934    :    // things below this node, as we won't be able to resolve them either.
1935  E :    if (location_type != LocIsStatic)
1936  i :      return DiaBrowser::kBrowserTerminatePath;
1937    :  
1938    :    // Get the size of this datum from its type info.
1939  E :    size_t length = 0;
1940  E :    if (!GetDataSymbolSize(symbol, &length))
1941  i :      return DiaBrowser::kBrowserAbort;
1942    :  
1943    :    // Reuse the parent function block if we can. This acts as small lookup
1944    :    // cache.
1945  E :    RelativeAddress addr(rva);
1946  E :    Block* block = current_block_;
1947  E :    RelativeAddress block_addr(current_address_);
1948  E :    if (block == NULL || !InRange(addr, block_addr, block->size())) {
1949  E :      block = image_->GetBlockByAddress(addr);
1950  E :      CHECK(block != NULL);
1951  E :      CHECK(image_->GetAddressOf(block, &block_addr));
1952  E :      DCHECK(InRange(addr, block_addr, block->size()));
1953    :    }
1954    :  
1955  E :    std::string name;
1956  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1957  i :      LOG(ERROR) << "Failed to convert label name to UTF8.";
1958  i :      return DiaBrowser::kBrowserAbort;
1959    :    }
1960    :  
1961    :    // Zero-length data symbols mark case/jump tables, or are forward declares.
1962  E :    BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1963  E :    Offset offset = addr - block_addr;
1964  E :    if (length == 0) {
1965    :      // Jump and case tables come in as data symbols with no name. Jump tables
1966    :      // are always an array of pointers, thus they coincide exactly with a
1967    :      // reference. Case tables are simple arrays of integer values (themselves
1968    :      // indices into a jump table), thus do not coincide with a reference.
1969  E :      if (name.empty() && block->type() == BlockGraph::CODE_BLOCK) {
1970  E :        if (block->references().find(offset) != block->references().end()) {
1971  E :          name = kJumpTable;
1972  E :          attr |= BlockGraph::JUMP_TABLE_LABEL;
1973  E :        } else {
1974  E :          name = kCaseTable;
1975  E :          attr |= BlockGraph::CASE_TABLE_LABEL;
1976    :        }
1977    :  
1978    :        // We expect jump and case tables to already have been discovered by
1979    :        // the disassembly operation. If this is not the case then our decoding
1980    :        // step is in error and its results can't be trusted.
1981  E :        if (!JumpAndCaseTableAlreadyLabelled(block, offset, attr))
1982  i :          return DiaBrowser::kBrowserAbort;
1983  E :      } else {
1984    :        // Zero-length data symbols act as 'forward declares' in some sense. They
1985    :        // are always followed by a non-zero length data symbol with the same name
1986    :        // and location.
1987  E :        return DiaBrowser::kBrowserTerminatePath;
1988    :      }
1989    :    }
1990    :  
1991    :    // Verify that the data symbol does not exceed the size of the block.
1992  E :    if (addr + length > block_addr + block->size()) {
1993    :      // The data symbol can exceed the size of the block in the case of data
1994    :      // imports. For some reason the toolchain emits a global data symbol with
1995    :      // type information equal to the type of the data *pointed* to by the import
1996    :      // entry rather than the type of the entry itself. Thus, if the data type
1997    :      // is bigger than the entire IAT this symbol will exceed it. To complicate
1998    :      // matters even more, a poorly written module can import its own export in
1999    :      // which case a linker generated pseudo-import-entry block will be
2000    :      // generated. This won't be part of the IAT, so we can't even filter based
2001    :      // on that. Instead, we simply ignore global data symbols that exceed the
2002    :      // block size.
2003  E :      base::StringPiece spname(name);
2004  E :      if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
2005  E :        VLOG(1) << "Encountered an imported data symbol \"" << name << "\" that "
2006    :                << "extends past its parent block \"" << block->name() << "\".";
2007  E :      } else {
2008  i :        LOG(ERROR) << "Received data symbol \"" << name << "\" that extends past "
2009    :                   << "its parent block \"" << block->name() << "\".";
2010  i :        return DiaBrowser::kBrowserAbort;
2011    :      }
2012    :    }
2013    :  
2014  E :    if (!AddLabelToBlock(offset, name, attr, block))
2015  i :      return DiaBrowser::kBrowserAbort;
2016    :  
2017  E :    return DiaBrowser::kBrowserContinue;
2018  E :  }
2019    :  
2020    :  DiaBrowser::BrowserDirective NewDecomposer::OnPublicSymbol(
2021    :      const DiaBrowser& dia_browser,
2022    :      const DiaBrowser::SymTagVector& sym_tags,
2023  E :      const DiaBrowser::SymbolPtrVector& symbols) {
2024  E :    DCHECK(!symbols.empty());
2025  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
2026  E :    DCHECK(current_block_ == NULL);
2027  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
2028    :  
2029  E :    HRESULT hr = E_FAIL;
2030  E :    DWORD rva = 0;
2031  E :    ScopedBstr name_bstr;
2032    :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
2033  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
2034  i :      LOG(ERROR) << "Failed to get public symbol properties: " << com::LogHr(hr)
2035    :                 << ".";
2036  i :      return DiaBrowser::kBrowserAbort;
2037    :    }
2038    :  
2039  E :    RelativeAddress addr(rva);
2040  E :    Block* block = image_->GetBlockByAddress(addr);
2041  E :    CHECK(block != NULL);
2042  E :    RelativeAddress block_addr;
2043  E :    CHECK(image_->GetAddressOf(block, &block_addr));
2044  E :    DCHECK(InRange(addr, block_addr, block->size()));
2045    :  
2046  E :    std::string name;
2047  E :    WideToUTF8(name_bstr, name_bstr.Length(), &name);
2048    :  
2049    :    // Public symbol names are mangled. Remove leading '_' as per
2050    :    // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
2051  E :    if (name[0] == '_')
2052  E :      name = name.substr(1);
2053    :  
2054  E :    Offset offset = addr - block_addr;
2055  E :    if (!AddLabelToBlock(offset, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
2056  i :      return DiaBrowser::kBrowserAbort;
2057    :  
2058  E :    return DiaBrowser::kBrowserContinue;
2059  E :  }
2060    :  
2061    :  DiaBrowser::BrowserDirective NewDecomposer::OnLabelSymbol(
2062    :      const DiaBrowser& dia_browser,
2063    :      const DiaBrowser::SymTagVector& sym_tags,
2064  E :      const DiaBrowser::SymbolPtrVector& symbols) {
2065  E :    DCHECK(!symbols.empty());
2066  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
2067  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
2068    :  
2069  E :    HRESULT hr = E_FAIL;
2070  E :    DWORD rva = 0;
2071  E :    ScopedBstr name_bstr;
2072    :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
2073  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
2074  i :      LOG(ERROR) << "Failed to get label symbol properties: " << com::LogHr(hr)
2075    :                 << ".";
2076  i :      return DiaBrowser::kBrowserAbort;
2077    :    }
2078    :  
2079    :    // If we have a current_block_ the label should lie within its scope.
2080  E :    RelativeAddress addr(rva);
2081  E :    Block* block = current_block_;
2082  E :    RelativeAddress block_addr(current_address_);
2083  E :    if (block != NULL) {
2084  E :      if (!InRangeIncl(addr, current_address_, current_block_->size())) {
2085  i :        LOG(ERROR) << "Label falls outside of current block \""
2086    :                   << current_block_->name() << "\".";
2087  i :        return DiaBrowser::kBrowserAbort;
2088    :      }
2089  E :    } else {
2090    :      // If there is no current block this is a compiland scope label.
2091  E :      block = image_->GetBlockByAddress(addr);
2092  E :      CHECK(block != NULL);
2093  E :      CHECK(image_->GetAddressOf(block, &block_addr));
2094  E :      DCHECK(InRange(addr, block_addr, block->size()));
2095    :  
2096    :      // TODO(chrisha): This label is in compiland scope, so we should be
2097    :      //     finding the block whose section contribution shares the same
2098    :      //     compiland.
2099    :    }
2100    :  
2101  E :    std::string name;
2102  E :    WideToUTF8(name_bstr, name_bstr.Length(), &name);
2103    :  
2104  E :    Offset offset = addr - block_addr;
2105  E :    if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
2106  i :      return DiaBrowser::kBrowserAbort;
2107    :  
2108  E :    return DiaBrowser::kBrowserContinue;
2109  E :  }
2110    :  
2111    :  DiaBrowser::BrowserDirective NewDecomposer::OnScopeSymbol(
2112  E :      enum SymTagEnum type, DiaBrowser::SymbolPtr symbol) {
2113    :    // We should only get here via the successful exploration of a SymTagFunction,
2114    :    // so current_block_ should be set.
2115  E :    DCHECK(current_block_ != NULL);
2116    :  
2117  E :    HRESULT hr = E_FAIL;
2118  E :    DWORD rva = 0;
2119  E :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
2120  i :      LOG(ERROR) << "Failed to get scope symbol properties: " << com::LogHr(hr)
2121    :                 << ".";
2122  i :      return DiaBrowser::kBrowserAbort;
2123    :    }
2124    :  
2125    :    // The label may potentially lay at the first byte past the function.
2126  E :    RelativeAddress addr(rva);
2127  E :    DCHECK_LE(current_address_, addr);
2128  E :    DCHECK_LE(addr, current_address_ + current_block_->size());
2129    :  
2130    :    // Get the attributes for this label.
2131  E :    BlockGraph::LabelAttributes attr = 0;
2132  E :    std::string name;
2133  E :    CHECK(ScopeSymTagToLabelProperties(type, current_scope_count_, &attr, &name));
2134    :  
2135    :    // Add the label.
2136  E :    Offset offset = addr - current_address_;
2137  E :    if (!AddLabelToBlock(offset, name, attr, current_block_))
2138  i :      return DiaBrowser::kBrowserAbort;
2139    :  
2140    :    // If this is a scope we extract the length and explicitly add a corresponding
2141    :    // end label.
2142  E :    if (type == SymTagBlock) {
2143  E :      ULONGLONG length = 0;
2144  E :      if (symbol->get_length(&length) != S_OK) {
2145  i :        LOG(ERROR) << "Failed to extract code scope length for block \""
2146    :                    << current_block_->name() << "\".";
2147  i :        return DiaBrowser::kBrowserAbort;
2148    :      }
2149  E :      DCHECK_LE(static_cast<size_t>(offset + length), current_block_->size());
2150  E :      name = base::StringPrintf("<scope-end-%d>", current_scope_count_);
2151  E :      ++current_scope_count_;
2152    :      if (!AddLabelToBlock(offset + length, name,
2153  E :                           BlockGraph::SCOPE_END_LABEL, current_block_)) {
2154  i :        return DiaBrowser::kBrowserAbort;
2155    :      }
2156    :    }
2157    :  
2158  E :    return DiaBrowser::kBrowserContinue;
2159  E :  }
2160    :  
2161    :  DiaBrowser::BrowserDirective NewDecomposer::OnCallSiteSymbol(
2162  E :      DiaBrowser::SymbolPtr symbol) {
2163    :    // We should only get here via the successful exploration of a SymTagFunction,
2164    :    // so current_block_ should be set.
2165  E :    DCHECK(current_block_ != NULL);
2166    :  
2167  E :    HRESULT hr = E_FAIL;
2168  E :    DWORD rva = 0;
2169  E :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
2170  i :      LOG(ERROR) << "Failed to get call site symbol properties: "
2171    :                 << com::LogHr(hr) << ".";
2172  i :      return DiaBrowser::kBrowserAbort;
2173    :    }
2174    :  
2175  E :    RelativeAddress addr(rva);
2176  E :    if (!InRange(addr, current_address_, current_block_->size())) {
2177  i :      LOG(ERROR) << "Call site falls outside of current block \""
2178    :                 << current_block_->name() << "\".";
2179  i :      return DiaBrowser::kBrowserAbort;
2180    :    }
2181    :  
2182  E :    Offset offset = addr - current_address_;
2183    :    if (!AddLabelToBlock(offset, "<call-site>", BlockGraph::CALL_SITE_LABEL,
2184  E :                         current_block_)) {
2185  i :      return DiaBrowser::kBrowserAbort;
2186    :    }
2187    :  
2188  E :    return DiaBrowser::kBrowserContinue;
2189  E :  }
2190    :  
2191    :  Block* NewDecomposer::CreateBlock(BlockType type,
2192    :                                    RelativeAddress address,
2193    :                                    BlockGraph::Size size,
2194  E :                                    const base::StringPiece& name) {
2195  E :    Block* block = image_->AddBlock(type, address, size, name);
2196  E :    if (block == NULL) {
2197  i :      LOG(ERROR) << "Unable to add block \"" << name.as_string() << "\" at "
2198    :                 << address << " with size " << size << ".";
2199  i :      return NULL;
2200    :    }
2201    :  
2202    :    // Mark the source range from whence this block originates. This is assuming
2203    :    // an untransformed image. To handle transformed images we'd have to use the
2204    :    // OMAP information to do this properly.
2205    :    bool pushed = block->source_ranges().Push(
2206    :        Block::DataRange(0, size),
2207  E :        Block::SourceRange(address, size));
2208  E :    DCHECK(pushed);
2209    :  
2210  E :    BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2211  E :    if (section == BlockGraph::kInvalidSectionId) {
2212  i :      LOG(ERROR) << "Block \"" << name.as_string() << "\" at " << address
2213    :                 << " with size " << size << " lies outside of all sections.";
2214  i :      return NULL;
2215    :    }
2216  E :    block->set_section(section);
2217    :  
2218  E :    const uint8* data = image_file_.GetImageData(address, size);
2219  E :    if (data != NULL)
2220  E :      block->SetData(data, size);
2221    :  
2222  E :    return block;
2223  E :  }
2224    :  
2225    :  Block* NewDecomposer::CreateBlockOrFindCoveringPeBlock(
2226    :      BlockType type,
2227    :      RelativeAddress addr,
2228    :      BlockGraph::Size size,
2229  E :      const base::StringPiece& name) {
2230  E :    Block* block = image_->GetBlockByAddress(addr);
2231  E :    if (block != NULL) {
2232  E :      RelativeAddress block_addr;
2233  E :      CHECK(image_->GetAddressOf(block, &block_addr));
2234    :  
2235  E :      RelativeRange existing_block(block_addr, block->size());
2236    :  
2237    :      // If this is not a PE parsed or COFF group block that covers us entirely,
2238    :      // then this is an error.
2239    :      static const BlockGraph::BlockAttributes kCoveringAttributes =
2240    :          BlockGraph::PE_PARSED | BlockGraph::COFF_GROUP;
2241    :      if ((block->attributes() & kCoveringAttributes) == 0 ||
2242  E :          !existing_block.Contains(addr, size)) {
2243  i :        LOG(ERROR) << "Trying to create block \"" << name.as_string() << "\" at "
2244    :                   << addr.value() << " with size " << size << " that conflicts "
2245    :                   << "with existing block \"" << block->name() << " at "
2246    :                   << block_addr << " with size " << block->size() << ".";
2247  i :        return NULL;
2248    :      }
2249    :  
2250  E :      return block;
2251    :    }
2252  E :    DCHECK(block == NULL);
2253    :  
2254  E :    return CreateBlock(type, addr, size, name);
2255  E :  }
2256    :  
2257    :  bool NewDecomposer::CreateGapBlock(BlockType block_type,
2258    :                                     RelativeAddress address,
2259  E :                                     BlockGraph::Size size) {
2260    :    Block* block = CreateBlock(block_type, address, size,
2261  E :        StringPrintf("Gap Block 0x%08X", address.value()).c_str());
2262  E :    if (block == NULL) {
2263  i :      LOG(ERROR) << "Unable to create gap block.";
2264  i :      return false;
2265    :    }
2266  E :    block->set_attribute(BlockGraph::GAP_BLOCK);
2267    :  
2268  E :    return true;
2269  E :  }
2270    :  
2271    :  bool NewDecomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
2272  E :                                             BlockType block_type) {
2273  E :    RelativeAddress section_begin(header->VirtualAddress);
2274  E :    RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
2275    :    RelativeAddress image_end(
2276  E :        image_file_.nt_headers()->OptionalHeader.SizeOfImage);
2277    :  
2278    :    // Search for the first and last blocks interesting from the start and end
2279    :    // of the section to the end of the image.
2280    :    BlockGraph::AddressSpace::RangeMap::const_iterator it(
2281    :        image_->address_space_impl().FindFirstIntersection(
2282    :            BlockGraph::AddressSpace::Range(section_begin,
2283  E :                                            image_end - section_begin)));
2284    :    BlockGraph::AddressSpace::RangeMap::const_iterator end(
2285    :        image_->address_space_impl().FindFirstIntersection(
2286    :            BlockGraph::AddressSpace::Range(section_end,
2287  E :                                            image_end - section_end)));
2288    :  
2289    :    // The whole section is missing. Cover it with one gap block.
2290  E :    if (it == end)
2291    :      return CreateGapBlock(
2292  i :          block_type, section_begin, section_end - section_begin);
2293    :  
2294    :    // Create the head gap block if need be.
2295  E :    if (section_begin < it->first.start()) {
2296    :      if (!CreateGapBlock(
2297  i :          block_type, section_begin, it->first.start() - section_begin)) {
2298  i :        return false;
2299    :      }
2300    :    }
2301    :  
2302    :    // Now iterate the blocks and fill in gaps.
2303  E :    for (; it != end; ++it) {
2304  E :      const Block* block = it->second;
2305  E :      DCHECK(block != NULL);
2306  E :      RelativeAddress block_end = it->first.start() + block->size();
2307  E :      if (block_end >= section_end)
2308  E :        break;
2309    :  
2310    :      // Walk to the next address in turn.
2311  E :      BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
2312  E :      ++next;
2313  E :      if (next == end) {
2314    :        // We're at the end of the list. Create the tail gap block.
2315  E :        DCHECK_GT(section_end, block_end);
2316  E :        if (!CreateGapBlock(block_type, block_end, section_end - block_end))
2317  i :          return false;
2318  E :        break;
2319    :      }
2320    :  
2321    :      // Create the interstitial gap block.
2322  E :      if (block_end < next->first.start())
2323    :        if (!CreateGapBlock(
2324  E :            block_type, block_end, next->first.start() - block_end)) {
2325  i :          return false;
2326    :        }
2327  E :    }
2328    :  
2329  E :    return true;
2330  E :  }
2331    :  
2332    :  }  // namespace pe

Coverage information generated Thu Mar 14 11:53:36 2013.