Coverage for /Syzygy/pe/new_decomposer.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
79.2%87010990.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/pe/new_decomposer.h"
  16    :  
  17    :  #include "pcrecpp.h"  // NOLINT
  18    :  #include "base/bind.h"
  19    :  #include "base/stringprintf.h"
  20    :  #include "base/utf_string_conversions.h"
  21    :  #include "base/strings/string_split.h"
  22    :  #include "base/win/scoped_bstr.h"
  23    :  #include "base/win/scoped_comptr.h"
  24    :  #include "syzygy/core/disassembler_util.h"
  25    :  #include "syzygy/core/zstream.h"
  26    :  #include "syzygy/pdb/omap.h"
  27    :  #include "syzygy/pdb/pdb_byte_stream.h"
  28    :  #include "syzygy/pdb/pdb_constants.h"
  29    :  #include "syzygy/pdb/pdb_dbi_stream.h"
  30    :  #include "syzygy/pdb/pdb_file.h"
  31    :  #include "syzygy/pdb/pdb_reader.h"
  32    :  #include "syzygy/pdb/pdb_symbol_record.h"
  33    :  #include "syzygy/pdb/pdb_util.h"
  34    :  #include "syzygy/pe/dia_util.h"
  35    :  #include "syzygy/pe/find.h"
  36    :  #include "syzygy/pe/pe_file_parser.h"
  37    :  #include "syzygy/pe/pe_utils.h"
  38    :  #include "syzygy/pe/serialization.h"
  39    :  #include "third_party/cci/Files/CvInfo.h"
  40    :  
  41    :  namespace cci = Microsoft_Cci_Pdb;
  42    :  
  43    :  namespace {
  44    :  
  45    :  // A small helper struct for dumping block information to log messages.
  46    :  // TODO(chrisha): Move this to block_graph and reuse it everywhere!
  47    :  struct BlockInfo {
  48    :    enum AddressType {
  49    :      kNoAddress,
  50    :      kAbsoluteAddress,
  51    :      kFileOffsetAddress,
  52    :      kRelativeAddress,
  53    :    };
  54    :  
  55  i :    explicit BlockInfo(const block_graph::BlockGraph::Block* block)
  56    :        : block(block), type(kNoAddress) {
  57  i :      DCHECK(block != NULL);
  58  i :    }
  59    :  
  60  i :    BlockInfo(const block_graph::BlockGraph::Block* block,
  61    :              core::AbsoluteAddress address)
  62    :        : block(block), type(kAbsoluteAddress), abs_addr(address) {
  63  i :      DCHECK(block != NULL);
  64  i :    }
  65    :    BlockInfo(const block_graph::BlockGraph::Block* block,
  66    :              core::FileOffsetAddress address)
  67    :        : block(block), type(kFileOffsetAddress), file_addr(address) {
  68    :      DCHECK(block != NULL);
  69    :    }
  70    :    BlockInfo(const block_graph::BlockGraph::Block* block,
  71    :              core::RelativeAddress address)
  72    :        : block(block), type(kRelativeAddress), rel_addr(address) {
  73    :      DCHECK(block != NULL);
  74    :    }
  75    :  
  76    :    const block_graph::BlockGraph::Block* block;
  77    :    AddressType type;
  78    :  
  79    :    // Ideally these would be in a union but because they have non-trivial
  80    :    // constructors they are not allowed.
  81    :    core::AbsoluteAddress abs_addr;
  82    :    core::FileOffsetAddress file_addr;
  83    :    core::RelativeAddress rel_addr;
  84    :  
  85    :   private:
  86    :    DISALLOW_COPY_AND_ASSIGN(BlockInfo);
  87    :  };
  88    :  
  89    :  }  // anonymous namespace
  90    :  
  91    :  // Pretty prints a BlockInfo to an ostream. This has to be outside of any
  92    :  // namespaces so that operator<< is found properly.
  93  i :  std::ostream& operator<<(std::ostream& os, const BlockInfo& bi) {
  94    :    os << "Block(id=" << bi.block->id() << ", name=\"" << bi.block->name()
  95  i :       << "\", size=" << bi.block->size();
  96  i :    if (bi.type != BlockInfo::kNoAddress) {
  97  i :      os << ", address=";
  98  i :      switch (bi.type) {
  99    :        case BlockInfo::kAbsoluteAddress: {
 100  i :          os << bi.abs_addr;
 101  i :          break;
 102    :        }
 103    :        case BlockInfo::kFileOffsetAddress: {
 104  i :          os << bi.file_addr;
 105  i :          break;
 106    :        }
 107    :        case BlockInfo::kRelativeAddress: {
 108  i :          os << bi.rel_addr;
 109    :          break;
 110    :        }
 111    :        default: break;
 112    :      }
 113    :    }
 114  i :    os << ")";
 115  i :    return os;
 116  i :  }
 117    :  
 118    :  namespace pe {
 119    :  
 120    :  // An intermediate reference representation used while parsing PE blocks.
 121    :  // This is necessary because at that point we haven't yet chunked the whole
 122    :  // image into blocks thus some references cannot be resolved.
 123    :  struct NewDecomposer::IntermediateReference {
 124    :    RelativeAddress src_addr;
 125    :    BlockGraph::ReferenceType type;
 126    :    BlockGraph::Size size;
 127    :    RelativeAddress dst_addr;
 128    :  };
 129    :  
 130    :  namespace {
 131    :  
 132    :  using base::win::ScopedBstr;
 133    :  using base::win::ScopedComPtr;
 134    :  using block_graph::BlockGraph;
 135    :  using builder::Callback;
 136    :  using builder::Opt;
 137    :  using builder::Or;
 138    :  using builder::Seq;
 139    :  using builder::Star;
 140    :  using core::AbsoluteAddress;
 141    :  using core::RelativeAddress;
 142    :  
 143    :  typedef BlockGraph::Block Block;
 144    :  typedef BlockGraph::BlockType BlockType;
 145    :  typedef BlockGraph::Offset Offset;
 146    :  typedef BlockGraph::Reference Reference;
 147    :  typedef BlockGraph::ReferenceType ReferenceType;
 148    :  typedef core::AddressRange<RelativeAddress, size_t> RelativeRange;
 149    :  typedef NewDecomposer::IntermediateReference IntermediateReference;
 150    :  typedef NewDecomposer::IntermediateReferences IntermediateReferences;
 151    :  typedef pcrecpp::RE RE;
 152    :  typedef std::vector<OMAP> OMAPs;
 153    :  typedef std::vector<pdb::PdbFixup> PdbFixups;
 154    :  
 155    :  const char kJumpTable[] = "<jump-table>";
 156    :  const char kCaseTable[] = "<case-table>";
 157    :  
 158    :  // Some helper functions for testing ranges.
 159    :  template<typename T1, typename T2, typename T3>
 160  E :  bool InRange(T1 value, T2 lower_bound_incl, T3 length_excl) {
 161  E :    T1 upper_bound_excl = static_cast<T1>(lower_bound_incl) + length_excl;
 162    :    return static_cast<T1>(lower_bound_incl) <= value &&
 163  E :        value < static_cast<T2>(upper_bound_excl);
 164  E :  }
 165    :  template<typename T1, typename T2, typename T3>
 166  E :  bool InRangeIncl(T1 value, T2 lower_bound_incl, T3 length_incl) {
 167  E :    T1 upper_bound_incl = static_cast<T1>(lower_bound_incl) + length_incl;
 168    :    return static_cast<T1>(lower_bound_incl) <= value &&
 169  E :        value <= upper_bound_incl;
 170  E :  }
 171    :  
 172    :  bool InitializeDia(const PEFile& image_file,
 173    :                     const base::FilePath& pdb_path,
 174    :                     IDiaDataSource** dia_source,
 175    :                     IDiaSession** dia_session,
 176  E :                     IDiaSymbol** global) {
 177  E :    DCHECK(*dia_source == NULL);
 178  E :    DCHECK(*dia_session == NULL);
 179  E :    DCHECK(*global == NULL);
 180    :  
 181  E :    if (!CreateDiaSource(dia_source))
 182  i :      return false;
 183  E :    DCHECK(*dia_source != NULL);
 184    :  
 185    :    // We create the session using the PDB file directly, as we've already
 186    :    // validated that it matches the module.
 187  E :    if (!CreateDiaSession(pdb_path, *dia_source, dia_session))
 188  i :      return false;
 189  E :    DCHECK(*dia_session != NULL);
 190    :  
 191  E :    HRESULT hr = (*dia_session)->get_globalScope(global);
 192  E :    if (hr != S_OK) {
 193  i :      LOG(ERROR) << "Failed to get the DIA global scope: "
 194    :                 << com::LogHr(hr) << ".";
 195  i :      return false;
 196    :    }
 197    :  
 198  E :    return true;
 199  E :  }
 200    :  
 201    :  enum SectionType {
 202    :    kSectionCode,
 203    :    kSectionData,
 204    :    kSectionUnknown
 205    :  };
 206    :  
 207    :  // Determines the type of a section based on its attributes. This is used to
 208    :  // tag blocks with an appropriate type.
 209  E :  SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
 210  E :    DCHECK(header != NULL);
 211  E :    if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
 212  E :      return kSectionCode;
 213  E :    if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
 214  E :      return kSectionData;
 215  i :    return kSectionUnknown;
 216  E :  }
 217    :  
 218    :  // Given a compiland, returns its compiland details.
 219    :  bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
 220  E :                                       IDiaSymbol** compiland_details) {
 221  E :    DCHECK(compiland != NULL);
 222  E :    DCHECK(compiland_details != NULL);
 223  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 224  E :    DCHECK(*compiland_details == NULL);
 225    :  
 226    :    // Get the enumeration of compiland details.
 227  E :    ScopedComPtr<IDiaEnumSymbols> enum_symbols;
 228    :    HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
 229  E :                                         enum_symbols.Receive());
 230  E :    DCHECK_EQ(S_OK, hr);
 231    :  
 232    :    // We expect there to be compiland details. For compilands built by
 233    :    // non-standard toolchains, there usually aren't any.
 234  E :    LONG count = 0;
 235  E :    hr = enum_symbols->get_Count(&count);
 236  E :    DCHECK_EQ(S_OK, hr);
 237  E :    if (count == 0) {
 238    :      // We don't log here because we see this quite often.
 239  i :      return false;
 240    :    }
 241    :  
 242    :    // We do sometimes encounter more than one compiland detail. In fact, for
 243    :    // import and export tables we get one compiland detail per table entry.
 244    :    // They are all marked as having been generated by the linker, so using the
 245    :    // first one is sufficient.
 246    :  
 247    :    // Get the compiland details.
 248  E :    ULONG fetched = 0;
 249  E :    hr = enum_symbols->Next(1, compiland_details, &fetched);
 250  E :    DCHECK_EQ(S_OK, hr);
 251  E :    DCHECK_EQ(1u, fetched);
 252    :  
 253  E :    return true;
 254  E :  }
 255    :  
 256    :  // Stores information regarding known compilers.
 257    :  struct KnownCompilerInfo {
 258    :    wchar_t* compiler_name;
 259    :    bool supported;
 260    :  };
 261    :  
 262    :  // A list of known compilers, and their status as being supported or not.
 263    :  KnownCompilerInfo kKnownCompilerInfos[] = {
 264    :    { L"Microsoft (R) Macro Assembler", false },
 265    :    { L"Microsoft (R) Optimizing Compiler", true },
 266    :    { L"Microsoft (R) LINK", false }
 267    :  };
 268    :  
 269    :  // Given a compiland, determines whether the compiler used is one of those that
 270    :  // we whitelist.
 271  E :  bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
 272  E :    DCHECK(compiland != NULL);
 273  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 274    :  
 275  E :    ScopedComPtr<IDiaSymbol> compiland_details;
 276    :    if (!GetCompilandDetailsForCompiland(compiland,
 277  E :                                         compiland_details.Receive())) {
 278    :      // If the compiland has no compiland details we assume the compiler is not
 279    :      // supported.
 280  i :      ScopedBstr compiland_name;
 281  i :      if (compiland->get_name(compiland_name.Receive()) == S_OK) {
 282  i :        VLOG(1) << "Compiland has no compiland details: "
 283    :                << com::ToString(compiland_name);
 284    :      }
 285  i :      return false;
 286    :    }
 287  E :    DCHECK(compiland_details.get() != NULL);
 288    :  
 289    :    // Get the compiler name.
 290  E :    ScopedBstr compiler_name;
 291  E :    HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
 292  E :    DCHECK_EQ(S_OK, hr);
 293    :  
 294    :    // Check the compiler name against the list of known compilers.
 295  E :    for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
 296  E :      if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
 297  E :        return kKnownCompilerInfos[i].supported;
 298    :      }
 299  E :    }
 300    :  
 301    :    // Anything we don't explicitly know about is not supported.
 302  E :    VLOG(1) << "Encountered unknown compiler: " << compiler_name;
 303  E :    return false;
 304  E :  }
 305    :  
 306    :  // Adds an intermediate reference to the provided vector. The vector is
 307    :  // specified as the first parameter (in slight violation of our coding
 308    :  // standards) because this function is intended to be used by Bind.
 309    :  bool AddIntermediateReference(IntermediateReferences* references,
 310    :                                RelativeAddress src_addr,
 311    :                                ReferenceType type,
 312    :                                BlockGraph::Size size,
 313  E :                                RelativeAddress dst_addr) {
 314  E :    DCHECK(references != NULL);
 315  E :    IntermediateReference ref = { src_addr, type, size, dst_addr };
 316  E :    references->push_back(ref);
 317  E :    return true;
 318  E :  }
 319    :  
 320    :  // Create a reference as specified. Ignores existing references if they are of
 321    :  // the exact same type.
 322    :  bool CreateReference(RelativeAddress src_addr,
 323    :                       BlockGraph::Size ref_size,
 324    :                       ReferenceType ref_type,
 325    :                       RelativeAddress base_addr,
 326    :                       RelativeAddress dst_addr,
 327  E :                       BlockGraph::AddressSpace* image) {
 328  E :    DCHECK(image != NULL);
 329    :  
 330    :    // Get the source block and offset, and ensure that the reference fits
 331    :    // within it.
 332  E :    Block* src_block = image->GetBlockByAddress(src_addr);
 333  E :    if (src_block == NULL) {
 334  i :      LOG(ERROR) << "Unable to find block for reference originating at "
 335    :                 << src_addr << ".";
 336  i :      return false;
 337    :    }
 338  E :    RelativeAddress src_block_addr;
 339  E :    CHECK(image->GetAddressOf(src_block, &src_block_addr));
 340  E :    Offset src_block_offset = src_addr - src_block_addr;
 341  E :    if (src_block_offset + ref_size > src_block->size()) {
 342  i :      LOG(ERROR) << "Reference originating at " << src_addr
 343    :                 << " extends beyond block \"" << src_block->name() << "\".";
 344  i :      return false;
 345    :    }
 346    :  
 347    :    // Get the destination block and offset.
 348  E :    Block* dst_block = image->GetBlockByAddress(base_addr);
 349  E :    if (dst_block == NULL) {
 350  i :      LOG(ERROR) << "Unable to find block for reference pointing at "
 351    :                  << base_addr << ".";
 352  i :      return false;
 353    :    }
 354  E :    RelativeAddress dst_block_addr;
 355  E :    CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
 356  E :    Offset base = base_addr - dst_block_addr;
 357  E :    Offset offset = dst_addr - dst_block_addr;
 358    :  
 359  E :    Reference ref(ref_type, ref_size, dst_block, offset, base);
 360    :  
 361    :    // Check if a reference already exists at this offset.
 362    :    Block::ReferenceMap::const_iterator ref_it =
 363  E :        src_block->references().find(src_block_offset);
 364  E :    if (ref_it != src_block->references().end()) {
 365    :      // If an identical reference already exists then we're done.
 366  E :      if (ref == ref_it->second)
 367  E :        return true;
 368  i :      LOG(ERROR) << "Block \"" << src_block->name() << "\" has a conflicting "
 369    :                  << "reference at offset " << src_block_offset << ".";
 370  i :      return false;
 371    :    }
 372    :  
 373  E :    CHECK(src_block->SetReference(src_block_offset, ref));
 374    :  
 375  E :    return true;
 376  E :  }
 377    :  
 378    :  // Loads FIXUP and OMAP_FROM debug streams.
 379    :  bool LoadDebugStreams(IDiaSession* dia_session,
 380    :                        PdbFixups* pdb_fixups,
 381  E :                        OMAPs* omap_from) {
 382  E :    DCHECK(dia_session != NULL);
 383  E :    DCHECK(pdb_fixups != NULL);
 384  E :    DCHECK(omap_from != NULL);
 385    :  
 386    :    // Load the fixups. These must exist.
 387    :    SearchResult search_result = FindAndLoadDiaDebugStreamByName(
 388  E :        kFixupDiaDebugStreamName, dia_session, pdb_fixups);
 389  E :    if (search_result != kSearchSucceeded) {
 390  i :      if (search_result == kSearchFailed) {
 391  i :        LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
 392    :                      "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
 393    :      }
 394  i :      return false;
 395    :    }
 396    :  
 397    :    // Load the omap_from table. It is not necessary that one exist.
 398    :    search_result = FindAndLoadDiaDebugStreamByName(
 399  E :        kOmapFromDiaDebugStreamName, dia_session, omap_from);
 400  E :    if (search_result == kSearchErrored) {
 401  i :      LOG(ERROR) << "Error trying to read " << kOmapFromDiaDebugStreamName
 402    :                 << " stream.";
 403  i :      return false;
 404    :    }
 405    :  
 406  E :    return true;
 407  E :  }
 408    :  
 409    :  bool GetFixupDestinationAndType(const PEFile& image_file,
 410    :                                  const pdb::PdbFixup& fixup,
 411    :                                  RelativeAddress* dst_addr,
 412  E :                                  ReferenceType* ref_type) {
 413  E :    DCHECK(dst_addr != NULL);
 414  E :    DCHECK(ref_type != NULL);
 415    :  
 416  E :    RelativeAddress src_addr(fixup.rva_location);
 417    :  
 418    :    // Get the destination displacement from the actual image itself. We only see
 419    :    // fixups for 32-bit references.
 420  E :    uint32 data = 0;
 421  E :    if (!image_file.ReadImage(src_addr, &data, sizeof(data))) {
 422  i :      LOG(ERROR) << "Unable to read image data for fixup with source address "
 423    :                  << "at" << src_addr << ".";
 424  i :      return false;
 425    :    }
 426    :  
 427    :    // Translate this to a relative displacement value.
 428  E :    switch (fixup.type) {
 429    :      case pdb::PdbFixup::TYPE_ABSOLUTE: {
 430  E :        *ref_type = BlockGraph::ABSOLUTE_REF;
 431  E :        *dst_addr = RelativeAddress(image_file.AbsToRelDisplacement(data));
 432  E :        break;
 433    :      }
 434    :  
 435    :      case pdb::PdbFixup::TYPE_PC_RELATIVE: {
 436  E :        *ref_type = BlockGraph::PC_RELATIVE_REF;
 437  E :        *dst_addr = RelativeAddress(fixup.rva_location) + sizeof(data) + data;
 438  E :        break;
 439    :      }
 440    :  
 441    :      case pdb::PdbFixup::TYPE_RELATIVE: {
 442  E :        *ref_type = BlockGraph::RELATIVE_REF;
 443  E :        *dst_addr = RelativeAddress(data);
 444  E :        break;
 445    :      }
 446    :  
 447    :      default: {
 448  i :        LOG(ERROR) << "Unexpected fixup type (" << fixup.type << ").";
 449  i :        return false;
 450    :      }
 451    :    }
 452    :  
 453  E :    return true;
 454  E :  }
 455    :  
 456    :  // Creates references from the @p pdb_fixups (translating them via the
 457    :  // provided @p omap_from information if it is not empty), all while removing the
 458    :  // corresponding entries from @p reloc_set. If @p reloc_set is not empty after
 459    :  // this then the PDB fixups are out of sync with the image and we are unable to
 460    :  // safely decompose.
 461    :  //
 462    :  // @note This function deliberately ignores fixup information for the resource
 463    :  //     section. This is because chrome.dll gets modified by a manifest tool
 464    :  //     which doesn't update the FIXUPs in the corresponding PDB. They are thus
 465    :  //     out of sync. Even if they were in sync this doesn't harm us as we have no
 466    :  //     need to reach in and modify resource data.
 467    :  bool CreateReferencesFromFixupsImpl(
 468    :      const PEFile& image_file,
 469    :      const PdbFixups& pdb_fixups,
 470    :      const OMAPs& omap_from,
 471    :      PEFile::RelocSet* reloc_set,
 472  E :      BlockGraph::AddressSpace* image) {
 473  E :    DCHECK(reloc_set != NULL);
 474  E :    DCHECK(image != NULL);
 475    :  
 476  E :    bool have_omap = !omap_from.empty();
 477  E :    size_t fixups_used = 0;
 478    :  
 479    :    // The resource section in Chrome is modified post-link by a tool that adds a
 480    :    // manifest to it. This causes all of the fixups in the resource section (and
 481    :    // anything beyond it) to be invalid. As long as the resource section is the
 482    :    // last section in the image, this is not a problem (we can safely ignore the
 483    :    // .rsrc fixups, which we know how to parse without them). However, if there
 484    :    // is a section after the resource section, things will have been shifted
 485    :    // and potentially crucial fixups will be invalid.
 486    :    const IMAGE_SECTION_HEADER* rsrc_header = image_file.GetSectionHeader(
 487  E :        kResourceSectionName);
 488  E :    RelativeAddress rsrc_start(0xffffffff);
 489  E :    RelativeAddress rsrc_end(0xffffffff);
 490  E :    if (rsrc_header != NULL) {
 491  E :      rsrc_start = RelativeAddress(rsrc_header->VirtualAddress);
 492  E :      rsrc_end = rsrc_start + rsrc_header->Misc.VirtualSize;
 493    :    }
 494    :  
 495    :    // Ensure the fixups are all valid.
 496  E :    for (size_t i = 0; i < pdb_fixups.size(); ++i) {
 497  E :      if (!pdb_fixups[i].ValidHeader()) {
 498  i :        LOG(ERROR) << "Unknown fixup header: "
 499    :                   << base::StringPrintf("0x%08X.", pdb_fixups[i].header);
 500  i :        return false;
 501    :      }
 502    :  
 503    :      // For now, we skip any offset fixups. We've only seen this in the context
 504    :      // of TLS data access, and we don't mess with TLS structures.
 505  E :      if (pdb_fixups[i].is_offset())
 506  E :        continue;
 507    :  
 508    :      // All fixups we handle should be full size pointers.
 509  E :      DCHECK_EQ(Reference::kMaximumSize, pdb_fixups[i].size());
 510    :  
 511    :      // Get the original addresses, and map them through OMAP information.
 512    :      // Normally DIA takes care of this for us, but there is no API for
 513    :      // getting DIA to give us FIXUP information, so we have to do it manually.
 514  E :      RelativeAddress src_addr(pdb_fixups[i].rva_location);
 515  E :      RelativeAddress base_addr(pdb_fixups[i].rva_base);
 516  E :      if (have_omap) {
 517  i :        src_addr = pdb::TranslateAddressViaOmap(omap_from, src_addr);
 518  i :        base_addr = pdb::TranslateAddressViaOmap(omap_from, base_addr);
 519    :      }
 520    :  
 521    :      // If the reference originates beyond the .rsrc section then we can't
 522    :      // trust it.
 523  E :      if (src_addr >= rsrc_end) {
 524  i :        LOG(ERROR) << "Found fixup originating beyond .rsrc section.";
 525  i :        return false;
 526    :      }
 527    :  
 528    :      // If the reference originates from a part of the .rsrc section, ignore it.
 529  E :      if (src_addr >= rsrc_start)
 530  E :        continue;
 531    :  
 532    :      // Get the relative address/displacement of the fixup. This logs on failure.
 533  E :      RelativeAddress dst_addr;
 534  E :      ReferenceType type = BlockGraph::RELATIVE_REF;
 535    :      if (!GetFixupDestinationAndType(image_file, pdb_fixups[i], &dst_addr,
 536  E :                                      &type)) {
 537  i :        return false;
 538    :      }
 539    :  
 540    :      // Finally, create the reference. This logs verbosely for us on failure.
 541    :      if (!CreateReference(src_addr, Reference::kMaximumSize, type, base_addr,
 542  E :                           dst_addr, image)) {
 543  i :        return false;
 544    :      }
 545    :  
 546    :      // Remove this reference from the relocs.
 547  E :      PEFile::RelocSet::iterator reloc_it = reloc_set->find(src_addr);
 548  E :      if (reloc_it != reloc_set->end()) {
 549    :        // We should only find a reloc if the fixup was of absolute type.
 550  E :        if (type != BlockGraph::ABSOLUTE_REF) {
 551  i :          LOG(ERROR) << "Found a reloc corresponding to a non-absolute fixup.";
 552  i :          return false;
 553    :        }
 554    :  
 555  E :        reloc_set->erase(reloc_it);
 556    :      }
 557    :  
 558  E :      ++fixups_used;
 559  E :    }
 560    :  
 561  E :    LOG(INFO) << "Used " << fixups_used << " of " << pdb_fixups.size() << ".";
 562    :  
 563  E :    return true;
 564  E :  }
 565    :  
 566  E :  bool GetDataSymbolSize(IDiaSymbol* symbol, size_t* length) {
 567  E :    DCHECK(symbol != NULL);
 568  E :    DCHECK(length != NULL);
 569    :  
 570  E :    *length = 0;
 571  E :    ScopedComPtr<IDiaSymbol> type;
 572  E :    HRESULT hr = symbol->get_type(type.Receive());
 573    :    // This happens if the symbol has no type information.
 574  E :    if (hr == S_FALSE)
 575  E :      return true;
 576  E :    if (hr != S_OK) {
 577  i :      LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
 578  i :      return false;
 579    :    }
 580    :  
 581  E :    ULONGLONG ull_length = 0;
 582  E :    hr = type->get_length(&ull_length);
 583  E :    if (hr != S_OK) {
 584  i :      LOG(ERROR) << "Failed to retrieve type length properties: "
 585    :                 << com::LogHr(hr) << ".";
 586  i :      return false;
 587    :    }
 588  E :    DCHECK_LE(ull_length, 0xFFFFFFFF);
 589  E :    *length = static_cast<size_t>(ull_length);
 590    :  
 591  E :    return true;
 592  E :  }
 593    :  
 594    :  bool ScopeSymTagToLabelProperties(enum SymTagEnum sym_tag,
 595    :                                    size_t scope_count,
 596    :                                    BlockGraph::LabelAttributes* attr,
 597  E :                                    std::string* name) {
 598  E :    DCHECK(attr != NULL);
 599  E :    DCHECK(name != NULL);
 600    :  
 601  E :    switch (sym_tag) {
 602    :      case SymTagFuncDebugStart: {
 603  E :        *attr = BlockGraph::DEBUG_START_LABEL;
 604  E :        *name = "<debug-start>";
 605  E :        return true;
 606    :      }
 607    :      case SymTagFuncDebugEnd: {
 608  E :        *attr = BlockGraph::DEBUG_END_LABEL;
 609  E :        *name = "<debug-end>";
 610  E :        return true;
 611    :      }
 612    :      case SymTagBlock: {
 613  E :        *attr = BlockGraph::SCOPE_START_LABEL;
 614  E :        *name = base::StringPrintf("<scope-start-%d>", scope_count);
 615  E :        return true;
 616    :      }
 617    :      default:
 618  i :      return false;
 619    :    }
 620  i :    return false;
 621  E :  }
 622    :  
 623    :  bool AddLabelToBlock(Offset offset,
 624    :                       const base::StringPiece& name,
 625    :                       BlockGraph::LabelAttributes label_attributes,
 626  E :                       Block* block) {
 627  E :    DCHECK(block != NULL);
 628    :  
 629    :    // It is possible for labels to be attached to the first byte past a block
 630    :    // (things like debug end, scope end, etc). It is up to the caller to be more
 631    :    // strict about the offset if need be.
 632  E :    DCHECK_LE(0, offset);
 633  E :    DCHECK_LE(offset, static_cast<Offset>(block->size()));
 634    :  
 635    :    // Try to create the label.
 636  E :    if (block->SetLabel(offset, name, label_attributes))
 637  E :      return true;
 638    :  
 639    :    // If we get here there's an already existing label. Update it.
 640  E :    BlockGraph::Label label;
 641  E :    CHECK(block->GetLabel(offset, &label));
 642    :  
 643    :    // Merge the names if this isn't a repeated name.
 644  E :    std::string name_str = name.as_string();
 645  E :    std::string new_name = label.name();
 646  E :    std::vector<std::string> names;
 647    :    base::SplitStringUsingSubstr(label.name(), NewDecomposer::kLabelNameSep,
 648  E :                                 &names);
 649  E :    if (std::find(names.begin(), names.end(), name_str) == names.end()) {
 650  E :      names.push_back(name_str);
 651  E :      new_name.append(NewDecomposer::kLabelNameSep);
 652  E :      new_name.append(name_str);
 653    :    }
 654    :  
 655    :    // Merge the attributes.
 656    :    BlockGraph::LabelAttributes new_label_attr = label.attributes() |
 657  E :        label_attributes;
 658    :  
 659    :    // Update the label.
 660  E :    label = BlockGraph::Label(new_name, new_label_attr);
 661  E :    CHECK(block->RemoveLabel(offset));
 662  E :    CHECK(block->SetLabel(offset, label));
 663    :  
 664  E :    return true;
 665  E :  }
 666    :  
 667    :  // Reads the linker module symbol stream from the given PDB file. This should
 668    :  // always exist as the last module.
 669    :  scoped_refptr<pdb::PdbStream> GetLinkerSymbolStream(
 670  E :      const pdb::PdbFile& pdb_file) {
 671    :    static const char kLinkerModuleName[] = "* Linker *";
 672    :  
 673    :    scoped_refptr<pdb::PdbStream> dbi_stream =
 674  E :        pdb_file.GetStream(pdb::kDbiStream);
 675  E :    if (dbi_stream.get() == NULL) {
 676  i :      LOG(ERROR) << "PDB does not contain a DBI stream.";
 677  i :      return false;
 678    :    }
 679    :  
 680  E :    pdb::DbiStream dbi;
 681  E :    if (!dbi.Read(dbi_stream.get())) {
 682  i :      LOG(ERROR) << "Unable to parse DBI stream.";
 683  i :      return false;
 684    :    }
 685    :  
 686  E :    if (dbi.modules().empty()) {
 687  i :      LOG(ERROR) << "DBI stream contains no modules.";
 688  i :      return false;
 689    :    }
 690    :  
 691    :    // The last module has always been observed to be the linker module.
 692  E :    const pdb::DbiModuleInfo& linker = dbi.modules().back();
 693  E :    if (linker.module_name() != kLinkerModuleName) {
 694  i :      LOG(ERROR) << "Last module is not the linker module.";
 695  i :      return false;
 696    :    }
 697    :  
 698    :    scoped_refptr<pdb::PdbStream> symbols = pdb_file.GetStream(
 699  E :        linker.module_info_base().stream);
 700  E :    if (symbols.get() == NULL) {
 701  i :      LOG(ERROR) << "Unable to open linker symbol stream.";
 702  i :      return false;
 703    :    }
 704    :  
 705  E :    return symbols;
 706  E :  }
 707    :  
 708    :  // Parses a symbol from a PDB symbol stream. The @p buffer is populated with the
 709    :  // data and upon success this returns the symbol directly cast onto the
 710    :  // @p buffer data. On failure this returns NULL.
 711    :  template<typename SymbolType>
 712    :  const SymbolType* ParseSymbol(uint16 symbol_length,
 713    :                                pdb::PdbStream* stream,
 714  E :                                std::vector<uint8>* buffer) {
 715  E :    DCHECK(stream != NULL);
 716  E :    DCHECK(buffer != NULL);
 717    :  
 718  E :    buffer->clear();
 719    :  
 720  E :    if (symbol_length < sizeof(SymbolType)) {
 721  i :      LOG(ERROR) << "Symbol too small for casting.";
 722  i :      return NULL;
 723    :    }
 724    :  
 725  E :    if (!stream->Read(buffer, symbol_length)) {
 726  i :      LOG(ERROR) << "Failed to read symbol.";
 727  i :      return NULL;
 728    :    }
 729    :  
 730  E :    return reinterpret_cast<const SymbolType*>(buffer->data());
 731  E :  }
 732    :  
 733    :  bool VisitNonControlFlowInstruction(const _DInst& instr,
 734    :                                      AbsoluteAddress block_addr,
 735    :                                      AbsoluteAddress instr_addr,
 736  E :                                      Block* block) {
 737  E :    DCHECK_NE(0u, block_addr.value());
 738  E :    DCHECK_NE(0u, instr_addr.value());
 739  E :    DCHECK_LE(block_addr, instr_addr);
 740  E :    DCHECK(block != NULL);
 741    :  
 742    :    // TODO(chrisha): We could walk the operands and follow references
 743    :    //     explicitly. If any of them are of reference type and there's no
 744    :    //     matching reference, this would be cause to blow up and die (we
 745    :    //     should get all of these as relocs and/or fixups).
 746    :  
 747  E :    Offset instr_offset = instr_addr - block_addr;
 748    :    Block::ReferenceMap::const_iterator ref_it =
 749  E :        block->references().upper_bound(instr_offset);
 750    :    Block::ReferenceMap::const_iterator ref_end =
 751  E :        block->references().lower_bound(instr_offset + instr.size);
 752    :  
 753  E :    for (; ref_it != ref_end; ++ref_it) {
 754  E :      const Block* ref_block = ref_it->second.referenced();
 755    :  
 756    :      // We only care about inter-block references.
 757  E :      if (ref_block == block)
 758  E :        continue;
 759    :  
 760    :      // There should be no cross-block references to the middle of other
 761    :      // code blocks (to the top is fine, as we could be passing around a
 762    :      // function pointer). The exception is if the remote block is not
 763    :      // generated by cl.exe. In this case, there could be arbitrary labels
 764    :      // that act like functions within the body of that block, and referring
 765    :      // to them is perfectly fine.
 766    :      if (ref_block->type() == BlockGraph::CODE_BLOCK &&
 767    :          ref_it->second.base() != 0 &&
 768  E :          (block->attributes() & BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER)) {
 769  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 770  i :        LOG(WARNING) << "Found a non-control-flow code-block to "
 771    :                     << "middle-of-code-block reference from "
 772    :                     << BlockInfo(block, block_addr) << " to "
 773    :                     << BlockInfo(ref_block) << ".";
 774  i :        return true;
 775    :      }
 776  E :    }
 777    :  
 778  E :    return true;
 779  E :  }
 780    :  
 781    :  bool VisitPcRelativeControlFlowInstruction(bool create_missing_refs,
 782    :                                             const _DInst& instr,
 783    :                                             AbsoluteAddress image_addr,
 784    :                                             AbsoluteAddress block_addr,
 785    :                                             AbsoluteAddress instr_addr,
 786    :                                             BlockGraph::AddressSpace* image,
 787  E :                                             Block* block) {
 788  E :    DCHECK_NE(0u, image_addr.value());
 789  E :    DCHECK_NE(0u, block_addr.value());
 790  E :    DCHECK_NE(0u, instr_addr.value());
 791  E :    DCHECK_LT(image_addr, block_addr);
 792  E :    DCHECK_LE(block_addr, instr_addr);
 793  E :    DCHECK(image != NULL);
 794  E :    DCHECK(block != NULL);
 795    :  
 796  E :    int fc = META_GET_FC(instr.meta);
 797  E :    DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
 798  E :    DCHECK_EQ(O_PC, instr.ops[0].type);
 799  E :    DCHECK_EQ(O_NONE, instr.ops[1].type);
 800  E :    DCHECK_EQ(O_NONE, instr.ops[2].type);
 801  E :    DCHECK_EQ(O_NONE, instr.ops[3].type);
 802    :    DCHECK(instr.ops[0].size == 8 ||
 803    :           instr.ops[0].size == 16 ||
 804  E :           instr.ops[0].size == 32);
 805    :  
 806    :    // Distorm gives us size in bits, we want bytes.
 807  E :    BlockGraph::Size size = instr.ops[0].size / 8;
 808    :  
 809    :    // Get the reference's address. Note we assume it's in the instruction's
 810    :    // tail end - I don't know of a case where a PC-relative offset in a branch
 811    :    // or call is not the very last thing in an x86 instruction.
 812  E :    AbsoluteAddress abs_src = instr_addr + instr.size - size;
 813    :    AbsoluteAddress abs_dst = instr_addr + instr.size +
 814  E :        static_cast<size_t>(instr.imm.addr);
 815  E :    RelativeAddress rel_dst(abs_dst.value() - image_addr.value());
 816  E :    Offset offset_src = abs_src - block_addr;
 817    :  
 818  E :    Block* dst_block = block;
 819  E :    RelativeAddress dst_block_addr(block_addr.value() - image_addr.value());
 820    :  
 821    :    // Is the reference to something outside this block?
 822  E :    if (abs_dst < block_addr || abs_dst >= block_addr + block->size()) {
 823    :      // Short PC-relative references should be to this block, otherwise this
 824    :      // block is not MSVC-like.
 825  E :      if (size < Reference::kMaximumSize) {
 826  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 827  i :        Offset offset_instr = instr_addr - block_addr;
 828  i :        LOG(WARNING) << "Found a " << size << "-byte PC-relative instruction to "
 829    :                     << "an external " << abs_dst << " at offset "
 830    :                     << offset_instr << " of " << BlockInfo(block, block_addr)
 831    :                     << ".";
 832  i :        return true;
 833  i :      } else {
 834    :        // Long PC-relative references to other blocks should have been given to
 835    :        // us via FIXUPs, otherwise we risk breaking the world when moving blocks
 836    :        // around!
 837  E :        if (block->references().find(offset_src) == block->references().end()) {
 838  i :          LOG(ERROR) << "Missing fixup for a " << size << "-byte PC-relative "
 839    :                     << "reference to " << abs_dst << " at offset "
 840    :                     << offset_src << " of " << BlockInfo(block, block_addr)
 841    :                     << ".";
 842  i :          return false;
 843    :        }
 844    :      }
 845    :  
 846    :      // Find the destination block and its address.
 847  E :      dst_block = image->GetContainingBlock(rel_dst, 1);
 848  E :      CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
 849  E :      if (dst_block == NULL) {
 850  i :        LOG(ERROR) << "Found a " << size << "-byte PC-relative reference to a "
 851    :                   << abs_dst << " outside of the image at offset "
 852    :                   << offset_src << " of " << BlockInfo(block, block_addr) << ".";
 853  i :        return false;
 854    :      }
 855    :    }
 856    :  
 857    :    // Create the missing reference if need be. These are found by basic-block
 858    :    // disassembly so aren't strictly needed, but are useful debug information.
 859  E :    if (!create_missing_refs)
 860  E :      return true;
 861    :  
 862  E :    Offset offset_dst = rel_dst - dst_block_addr;
 863    :    Reference ref(BlockGraph::PC_RELATIVE_REF, size, dst_block, offset_dst,
 864  E :                  offset_dst);
 865  E :    block->SetReference(offset_src, ref);
 866    :  
 867  E :    return true;
 868  E :  }
 869    :  
 870    :  bool VisitInstruction(bool create_missing_refs,
 871    :                        const _DInst& instr,
 872    :                        AbsoluteAddress image_addr,
 873    :                        AbsoluteAddress block_addr,
 874    :                        AbsoluteAddress instr_addr,
 875    :                        BlockGraph::AddressSpace* image,
 876  E :                        Block* block) {
 877  E :    DCHECK_NE(0u, image_addr.value());
 878  E :    DCHECK_NE(0u, block_addr.value());
 879  E :    DCHECK_NE(0u, instr_addr.value());
 880  E :    DCHECK_LT(image_addr, block_addr);
 881  E :    DCHECK_LE(block_addr, instr_addr);
 882  E :    DCHECK(image != NULL);
 883  E :    DCHECK(block != NULL);
 884    :  
 885  E :    int fc = META_GET_FC(instr.meta);
 886    :  
 887  E :    if (fc == FC_NONE) {
 888    :      return VisitNonControlFlowInstruction(
 889  E :          instr, block_addr, instr_addr, block);
 890    :    }
 891    :  
 892    :    if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
 893  E :        instr.ops[0].type == O_PC) {
 894    :      return VisitPcRelativeControlFlowInstruction(create_missing_refs,
 895  E :          instr, image_addr, block_addr, instr_addr, image, block);
 896    :    }
 897    :  
 898  E :    return true;
 899  E :  }
 900    :  
 901    :  bool DisassembleCodeBlockAndLabelData(bool create_missing_refs,
 902    :                                        AbsoluteAddress image_addr,
 903    :                                        AbsoluteAddress block_addr,
 904    :                                        BlockGraph::AddressSpace* image,
 905  E :                                        Block* block) {
 906  E :    DCHECK(image != NULL);
 907  E :    DCHECK(block != NULL);
 908  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 909    :  
 910    :    // We simultaneously walk through the block's references while disassembling
 911    :    // instructions. This is used to determine when (if) data starts. MSVC
 912    :    // always places jump tables first, which consist of absolute references.
 913  E :    const Block::ReferenceMap& ref_map(block->references());
 914  E :    Block::ReferenceMap::const_iterator ref_it = ref_map.begin();
 915    :  
 916    :    // We keep track of any self-references. If the block contains data these
 917    :    // are used as beginning points of tables. We rely on the sorted nature of
 918    :    // std::set when using these later on.
 919  E :    std::set<Offset> self_refs;
 920    :  
 921  E :    const uint8* data = block->data();
 922  E :    const uint8* data_end = block->data() + block->data_size();
 923    :  
 924    :    // If some of the data in this block is implicit then make it explicit for
 925    :    // ease of decoding.
 926  E :    std::vector<uint8> data_copy;
 927  E :    if (block->data_size() < block->size()) {
 928  i :      data_copy.resize(block->size(), 0);
 929  i :      ::memcpy(data_copy.data(), block->data(), block->data_size());
 930  i :      data = data_copy.data();
 931  i :      data_end = data + data_copy.size();
 932    :    }
 933    :  
 934    :    // Decode instructions one by one.
 935  E :    AbsoluteAddress addr(block_addr);
 936  E :    Offset offset = 0;
 937  E :    while (true) {
 938    :      // Stop the disassembly if we're at the end of the data.
 939  E :      if (data == data_end)
 940  E :        return true;
 941    :  
 942  E :      if (ref_it != ref_map.end()) {
 943    :        // Step past any references.
 944  E :        while (ref_it != ref_map.end() && ref_it->first < offset)
 945  E :          ++ref_it;
 946    :  
 947    :        // Stop the disassembly if the next byte is data. Namely, it coincides
 948    :        // with a reference.
 949  E :        if (ref_it->first == offset)
 950  E :          break;
 951    :      }
 952    :  
 953    :      // If we can't decode an instruction then we mark the block as not safe
 954    :      // for disassembly.
 955  E :      _DInst inst = { 0 };
 956    :      if (!core::DecodeOneInstruction(addr.value(), data, data_end - data,
 957  E :                                      &inst)) {
 958  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 959  i :        VLOG(1) << "Unable to decode instruction at offset " << offset
 960    :                << " of " << BlockInfo(block, block_addr) << ".";
 961  i :        return true;
 962    :      }
 963    :  
 964    :      // Visit the instruction itself. This validates that the instruction is of
 965    :      // a type we expect to encounter, and may also cause internal references to
 966    :      // be created.
 967    :      if (!VisitInstruction(create_missing_refs, inst, image_addr, block_addr,
 968  E :                            addr, image, block)) {
 969  i :        return false;
 970    :      }
 971    :  
 972    :      // Step past the instruction.
 973  E :      addr += inst.size;
 974  E :      data += inst.size;
 975  E :      offset += inst.size;
 976    :  
 977    :      // References to data are by absolute pointer, for which we always receive
 978    :      // a reloc/fixup, thus no need to parse the instruction. Moreover, ref_it
 979    :      // points to the first reference after the beginning of the instruction at
 980    :      // this point.
 981  E :      if (ref_it != ref_map.end() && ref_it->first < offset) {
 982    :        // The reference should be wholly contained in the instruction.
 983  E :        if (static_cast<Offset>(ref_it->first + ref_it->second.size()) > offset) {
 984  i :          block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
 985  i :          VLOG(1) << "Unexpected reference in instruction at offset "
 986    :                  << ref_it->first << " of " << BlockInfo(block, block_addr)
 987    :                  << ".";
 988  i :          return true;
 989    :        }
 990    :  
 991    :        // Store self-references to locations beyond our current cursor.
 992    :        if (ref_it->second.referenced() == block &&
 993  E :            ref_it->second.offset() > offset) {
 994  E :          self_refs.insert(ref_it->second.offset());
 995    :        }
 996    :  
 997  E :        ++ref_it;
 998    :      }
 999  E :    }
1000    :  
1001    :    // If we get here then we've encountered data. We need to label data
1002    :    // sections as appropriate.
1003    :  
1004  E :    bool data_label_added = false;
1005  E :    Offset end_of_code_offset = offset;
1006    :  
1007  E :    std::set<Offset>::const_iterator off_it = self_refs.begin();
1008  E :    for (; off_it != self_refs.end(); ++off_it) {
1009  E :      Offset referred_offset = *off_it;
1010    :  
1011    :      // References to data must be beyond the decoded instructions.
1012  E :      if (referred_offset < end_of_code_offset)
1013  E :        continue;
1014    :  
1015    :      // Determine if this offset points at another reference.
1016  E :      bool ref_at_offset = false;
1017  E :      if (ref_it != ref_map.end()) {
1018    :        // Step past any references.
1019  E :        while (ref_it != ref_map.end() && ref_it->first < referred_offset)
1020  E :          ++ref_it;
1021    :  
1022    :        // Stop the disassembly if the next byte is data. Namely, it coincides
1023    :        // with a reference.
1024  E :        if (ref_it->first == referred_offset)
1025  E :          ref_at_offset = true;
1026    :      }
1027    :  
1028    :      // Build and set the data label.
1029  E :      BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1030  E :      const char* name = NULL;
1031  E :      if (ref_at_offset) {
1032  E :        name = kJumpTable;
1033  E :        attr |= BlockGraph::JUMP_TABLE_LABEL;
1034  E :      } else {
1035  E :        name = kCaseTable;
1036  E :        attr |= BlockGraph::CASE_TABLE_LABEL;
1037    :      }
1038  E :      if (!AddLabelToBlock(referred_offset, name, attr, block))
1039  i :        return false;
1040  E :      data_label_added = true;
1041  E :    }
1042    :  
1043  E :    if (!data_label_added) {
1044  i :      block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
1045  i :      VLOG(1) << "Disassembled into data but found no references to it for "
1046    :              << BlockInfo(block, block_addr) << ".";
1047  i :      return true;
1048    :    }
1049    :  
1050  E :    return true;
1051  E :  }
1052    :  
1053    :  bool JumpAndCaseTableAlreadyLabelled(const Block* block,
1054    :                                       Offset offset,
1055  E :                                       BlockGraph::LabelAttributes attr) {
1056  E :    DCHECK(block != NULL);
1057    :  
1058    :    // We can't say anything about blocks that we were not able to disassemble.
1059  E :    if (block->attributes() & BlockGraph::ERRORED_DISASSEMBLY)
1060  i :      return true;
1061    :  
1062  E :    BlockGraph::Label label;
1063  E :    if (!block->GetLabel(offset, &label)) {
1064  i :      LOG(ERROR) << "Expected data label at offset " << offset << " of "
1065    :                 << BlockInfo(block) << ".";
1066  i :      return false;
1067    :    }
1068    :  
1069  E :    if ((label.attributes() & attr) == attr)
1070  E :      return true;
1071    :  
1072  i :    LOG(ERROR) << "Label at offset " << offset << " of " << BlockInfo(block)
1073    :               << " has attributes "
1074    :               << BlockGraph::BlockAttributesToString(block->attributes())
1075    :               << " but expected at least "
1076    :               << BlockGraph::BlockAttributesToString(attr) << ".";
1077    :  
1078  i :    return false;
1079  E :  }
1080    :  
1081    :  }  // namespace
1082    :  
1083    :  // We use ", " as a separator between symbol names. We sometimes see commas
1084    :  // in symbol names but do not see whitespace. Thus, this provides a useful
1085    :  // separator that is also human friendly to read.
1086    :  const char NewDecomposer::kLabelNameSep[] = ", ";
1087    :  
1088    :  // This is by CreateBlocksFromCoffGroups to communicate shared state to
1089    :  // VisitLinkerSymbol via the VisitSymbols helper function.
1090    :  struct NewDecomposer::VisitLinkerSymbolContext {
1091    :    int current_group_index;
1092    :    std::string current_group_prefix;
1093    :    RelativeAddress current_group_start;
1094    :  
1095    :    // These are the set of patterns that indicate bracketing groups. They
1096    :    // should match both the opening and the closing symbol, and have at least
1097    :    // one match group returning the common prefix.
1098    :    std::vector<RE> bracketing_groups;
1099    :  
1100  E :    VisitLinkerSymbolContext() : current_group_index(-1) {
1101    :      // Matches groups like: .CRT$XCA -> .CRT$XCZ
1102  E :      bracketing_groups.push_back(RE("(\\.CRT\\$X.)[AZ]"));
1103    :      // Matches groups like: .rtc$IAA -> .rtc$IZZ
1104  E :      bracketing_groups.push_back(RE("(\\.rtc\\$.*)(AA|ZZ)"));
1105    :      // Matches exactly: ATL$__a -> ATL$__z
1106  E :      bracketing_groups.push_back(RE("(ATL\\$__)[az]"));
1107    :      // Matches exactly: .tls -> .tls$ZZZ
1108  E :      bracketing_groups.push_back(RE("(\\.tls)(\\$ZZZ)?"));
1109  E :    }
1110    :  
1111    :   private:
1112    :    DISALLOW_COPY_AND_ASSIGN(VisitLinkerSymbolContext);
1113    :  };
1114    :  
1115    :  NewDecomposer::NewDecomposer(const PEFile& image_file)
1116    :      : image_file_(image_file), parse_debug_info_(true), image_layout_(NULL),
1117  E :        image_(NULL), current_block_(NULL), current_scope_count_(0) {
1118  E :  }
1119    :  
1120  E :  bool NewDecomposer::Decompose(ImageLayout* image_layout) {
1121  E :    DCHECK(image_layout != NULL);
1122    :  
1123    :    // The temporaries should be NULL.
1124  E :    DCHECK(image_layout_ == NULL);
1125  E :    DCHECK(image_ == NULL);
1126    :  
1127    :    // We start by finding the PDB path.
1128  E :    if (!FindAndValidatePdbPath())
1129  E :      return false;
1130  E :    DCHECK(!pdb_path_.empty());
1131    :  
1132    :    // Load the serialized block-graph from the PDB if it exists. This allows
1133    :    // round-trip decomposition.
1134  E :    bool stream_exists = false;
1135    :    if (LoadBlockGraphFromPdb(
1136  E :            pdb_path_, image_file_, image_layout, &stream_exists)) {
1137  E :      return true;
1138  E :    } else if (stream_exists) {
1139    :      // If the stream exists but hasn't been loaded we return an error. At this
1140    :      // point an error message has already been logged if there was one.
1141  i :      return false;
1142    :    }
1143    :  
1144    :    // At this point a full decomposition needs to be performed.
1145  E :    image_layout_ = image_layout;
1146  E :    image_ = &(image_layout->blocks);
1147  E :    bool success = DecomposeImpl();
1148  E :    image_layout_ = NULL;
1149  E :    image_ = NULL;
1150    :  
1151  E :    return success;
1152  E :  }
1153    :  
1154  E :  bool NewDecomposer::FindAndValidatePdbPath() {
1155    :    // Manually find the PDB path if it is not specified.
1156  E :    if (pdb_path_.empty()) {
1157    :      if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
1158  E :          pdb_path_.empty()) {
1159  i :        LOG(ERROR) << "Unable to find PDB file for module: "
1160    :                   << image_file_.path().value();
1161  i :        return false;
1162    :      }
1163    :    }
1164  E :    DCHECK(!pdb_path_.empty());
1165    :  
1166  E :    if (!file_util::PathExists(pdb_path_)) {
1167  E :      LOG(ERROR) << "Path not found: " << pdb_path_.value();
1168  E :      return false;
1169    :    }
1170    :  
1171  E :    if (!pe::PeAndPdbAreMatched(image_file_.path(), pdb_path_)) {
1172  i :      LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
1173    :                 << "module \"" << image_file_.path().value() << "\".";
1174  i :      return false;
1175    :    }
1176    :  
1177  E :    return true;
1178  E :  }
1179    :  
1180    :  bool NewDecomposer::LoadBlockGraphFromPdbStream(
1181    :      const PEFile& image_file,
1182    :      pdb::PdbStream* block_graph_stream,
1183  E :      ImageLayout* image_layout) {
1184  E :    DCHECK(block_graph_stream != NULL);
1185  E :    DCHECK(image_layout != NULL);
1186  E :    LOG(INFO) << "Reading block-graph and image layout from the PDB.";
1187    :  
1188    :    // Initialize an input archive pointing to the stream.
1189  E :    scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
1190  E :    if (!byte_stream->Init(block_graph_stream))
1191  i :      return false;
1192  E :    DCHECK(byte_stream.get() != NULL);
1193    :  
1194  E :    core::ScopedInStreamPtr pdb_in_stream;
1195    :    pdb_in_stream.reset(core::CreateByteInStream(
1196  E :        byte_stream->data(), byte_stream->data() + byte_stream->length()));
1197    :  
1198    :    // Read the header.
1199  E :    uint32 stream_version = 0;
1200  E :    unsigned char compressed = 0;
1201    :    if (!pdb_in_stream->Read(sizeof(stream_version),
1202    :                             reinterpret_cast<core::Byte*>(&stream_version)) ||
1203    :        !pdb_in_stream->Read(sizeof(compressed),
1204  E :                             reinterpret_cast<core::Byte*>(&compressed))) {
1205  i :      LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
1206  i :      return false;
1207    :    }
1208    :  
1209    :    // Check the stream version.
1210  E :    if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
1211  E :      LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
1212    :                 << " version (got " << stream_version << ", expected "
1213    :                 << pdb::kSyzygyBlockGraphStreamVersion << ").";
1214  E :      return false;
1215    :    }
1216    :  
1217    :    // If the stream is compressed insert the decompression filter.
1218  E :    core::InStream* in_stream = pdb_in_stream.get();
1219  E :    scoped_ptr<core::ZInStream> zip_in_stream;
1220  E :    if (compressed != 0) {
1221  E :      zip_in_stream.reset(new core::ZInStream(in_stream));
1222  E :      if (!zip_in_stream->Init()) {
1223  i :        LOG(ERROR) << "Unable to initialize ZInStream.";
1224  i :        return false;
1225    :      }
1226  E :      in_stream = zip_in_stream.get();
1227    :    }
1228    :  
1229    :    // Deserialize the image-layout.
1230  E :    core::NativeBinaryInArchive in_archive(in_stream);
1231  E :    block_graph::BlockGraphSerializer::Attributes attributes = 0;
1232    :    if (!LoadBlockGraphAndImageLayout(
1233  E :        image_file, &attributes, image_layout, &in_archive)) {
1234  i :      LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
1235  i :      return false;
1236    :    }
1237    :  
1238  E :    return true;
1239  E :  }
1240    :  
1241    :  bool NewDecomposer::LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
1242    :                                            const PEFile& image_file,
1243    :                                            ImageLayout* image_layout,
1244  E :                                            bool* stream_exists) {
1245  E :    DCHECK(image_layout != NULL);
1246  E :    DCHECK(stream_exists != NULL);
1247    :  
1248  E :    pdb::PdbFile pdb_file;
1249  E :    pdb::PdbReader pdb_reader;
1250  E :    if (!pdb_reader.Read(pdb_path, &pdb_file)) {
1251  i :      LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
1252    :                 << "\".";
1253  i :      return NULL;
1254    :    }
1255    :  
1256    :    // Try to get the block-graph stream from the PDB.
1257  E :    scoped_refptr<pdb::PdbStream> block_graph_stream;
1258    :    if (!pdb::LoadNamedStreamFromPdbFile(pdb::kSyzygyBlockGraphStreamName,
1259    :                                         &pdb_file,
1260    :                                         &block_graph_stream) ||
1261  E :        block_graph_stream.get() == NULL) {
1262  E :      *stream_exists = false;
1263  E :      return false;
1264    :    }
1265  E :    if (block_graph_stream->length() == 0) {
1266  i :      *stream_exists = false;
1267  i :      LOG(WARNING) << "The block-graph stream is empty, ignoring it.";
1268  i :      return false;
1269    :    }
1270    :  
1271    :    // The PDB contains a block-graph stream, the block-graph and the image layout
1272    :    // will be read from this stream.
1273  E :    *stream_exists = true;
1274    :    if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
1275  E :                                     image_layout)) {
1276  i :      return false;
1277    :    }
1278    :  
1279  E :    return true;
1280  E :  }
1281    :  
1282  E :  bool NewDecomposer::DecomposeImpl() {
1283    :    // Instantiate and initialize our Debug Interface Access session. This logs
1284    :    // verbosely for us.
1285  E :    ScopedComPtr<IDiaDataSource> dia_source;
1286  E :    ScopedComPtr<IDiaSession> dia_session;
1287  E :    ScopedComPtr<IDiaSymbol> global;
1288    :    if (!InitializeDia(image_file_, pdb_path_, dia_source.Receive(),
1289  E :                       dia_session.Receive(), global.Receive())) {
1290  i :      return false;
1291    :    }
1292    :  
1293    :    // Copy the image headers to the layout.
1294    :    CopySectionHeadersToImageLayout(
1295    :        image_file_.nt_headers()->FileHeader.NumberOfSections,
1296    :        image_file_.section_headers(),
1297  E :        &(image_layout_->sections));
1298    :  
1299    :    // Create the sections in the underlying block-graph.
1300  E :    if (!CreateBlockGraphSections())
1301  i :      return false;
1302    :  
1303    :    // We scope the first few operations so that we don't keep the intermediate
1304    :    // references around any longer than we have to.
1305    :    {
1306  E :      IntermediateReferences references;
1307    :  
1308    :      // First we parse out the PE blocks.
1309  E :      if (!CreatePEImageBlocksAndReferences(&references))
1310  i :        return false;
1311    :  
1312    :      // Now we parse the COFF group symbols from the linker's symbol stream.
1313    :      // These indicate things like static initializers, which must stay together
1314    :      // in a single block.
1315  E :      if (!CreateBlocksFromCoffGroups())
1316  i :        return false;
1317    :  
1318    :      // Next we parse out section contributions. Some of these may coincide with
1319    :      // existing PE parsed blocks, but when they do we expect them to be exact
1320    :      // collisions.
1321  E :      if (!CreateBlocksFromSectionContribs(dia_session.get()))
1322  i :        return false;
1323    :  
1324    :      // Flesh out the rest of the image with gap blocks.
1325  E :      if (!CreateGapBlocks())
1326  i :        return false;
1327    :  
1328    :      // Finalize the PE-parsed intermediate references.
1329  E :      if (!FinalizeIntermediateReferences(references))
1330  i :        return false;
1331  E :    }
1332    :  
1333    :    // Parse the fixups and use them to create references.
1334  E :    if (!CreateReferencesFromFixups(dia_session.get()))
1335  i :      return false;
1336    :  
1337    :    // Disassemble code blocks and use the results to infer case and jump tables.
1338  E :    if (!DisassembleCodeBlocksAndLabelData())
1339  i :      return false;
1340    :  
1341    :    // Annotate the block-graph with symbol information.
1342  E :    if (parse_debug_info_ && !ProcessSymbols(global.get()))
1343  i :      return false;
1344    :  
1345  E :    return true;
1346  E :  }
1347    :  
1348  E :  bool NewDecomposer::CreateBlockGraphSections() {
1349    :    // Iterate through the image sections, and create sections in the BlockGraph.
1350  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1351  E :    for (size_t i = 0; i < num_sections; ++i) {
1352  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1353  E :      std::string name = pe::PEFile::GetSectionName(*header);
1354    :      BlockGraph::Section* section = image_->graph()->AddSection(
1355  E :          name, header->Characteristics);
1356  E :      DCHECK(section != NULL);
1357    :  
1358    :      // For now, we expect them to have been created with the same IDs as those
1359    :      // in the original image.
1360  E :      if (section->id() != i) {
1361  i :        LOG(ERROR) << "Unexpected section ID.";
1362  i :        return false;
1363    :      }
1364  E :    }
1365    :  
1366  E :    return true;
1367  E :  }
1368    :  
1369    :  bool NewDecomposer::CreatePEImageBlocksAndReferences(
1370  E :      IntermediateReferences* references) {
1371  E :    DCHECK(references != NULL);
1372    :  
1373    :    PEFileParser::AddReferenceCallback add_reference(
1374  E :        base::Bind(&AddIntermediateReference, base::Unretained(references)));
1375  E :    PEFileParser parser(image_file_, image_, add_reference);
1376  E :    PEFileParser::PEHeader header;
1377  E :    if (!parser.ParseImage(&header)) {
1378  i :      LOG(ERROR) << "Unable to parse PE image.";
1379  i :      return false;
1380    :    }
1381    :  
1382  E :    return true;
1383  E :  }
1384    :  
1385  E :  bool NewDecomposer::CreateBlocksFromCoffGroups() {
1386  E :    pdb::PdbFile pdb_file;
1387  E :    pdb::PdbReader pdb_reader;
1388  E :    if (!pdb_reader.Read(pdb_path_, &pdb_file)) {
1389  i :      LOG(ERROR) << "Failed to load PDB: " << pdb_path_.value();
1390  i :      return false;
1391    :    }
1392    :  
1393  E :    scoped_refptr<pdb::PdbStream> symbols = GetLinkerSymbolStream(pdb_file);
1394    :  
1395    :    // Process the symbols in the linker module symbol stream.
1396  E :    VisitLinkerSymbolContext context;
1397    :    pdb::VisitSymbolsCallback callback = base::Bind(
1398    :        &NewDecomposer::VisitLinkerSymbol,
1399    :        base::Unretained(this),
1400  E :        base::Unretained(&context));
1401  E :    if (!pdb::VisitSymbols(callback, symbols->length(), true, symbols.get()))
1402  i :      return false;
1403    :  
1404    :    // Bail if we did not encounter a closing bracketing symbol where one was
1405    :    // expected.
1406  E :    if (context.current_group_index != -1) {
1407  i :      LOG(ERROR) << "Unable to close bracketed COFF group \""
1408    :                 << context.current_group_prefix << "\".";
1409  i :      return false;
1410    :    }
1411    :  
1412  E :    return true;
1413  E :  }
1414    :  
1415  E :  bool NewDecomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1416  E :    ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1417    :    SearchResult search_result = FindDiaTable(session,
1418  E :                                              section_contribs.Receive());
1419  E :    if (search_result != kSearchSucceeded) {
1420  i :      if (search_result == kSearchFailed)
1421  i :        LOG(ERROR) << "No section contribution table found.";
1422  i :      return false;
1423    :    }
1424    :  
1425  E :    size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1426    :  
1427  E :    LONG count = 0;
1428  E :    if (section_contribs->get_Count(&count) != S_OK) {
1429  i :      LOG(ERROR) << "Failed to get section contributions enumeration length.";
1430  i :      return false;
1431    :    }
1432    :  
1433  E :    for (LONG visited = 0; visited < count; ++visited) {
1434  E :      ScopedComPtr<IDiaSectionContrib> section_contrib;
1435  E :      ULONG fetched = 0;
1436  E :      HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1437    :      // The standard way to end an enumeration (according to the docs) is by
1438    :      // returning S_FALSE and setting fetched to 0. We don't actually see this,
1439    :      // but it wouldn't be an error if we did.
1440  E :      if (hr == S_FALSE && fetched == 0)
1441  i :        break;
1442  E :      if (hr != S_OK) {
1443  i :        LOG(ERROR) << "Failed to get DIA section contribution: "
1444    :                   << com::LogHr(hr) << ".";
1445  i :        return false;
1446    :      }
1447    :      // We actually end up seeing S_OK and fetched == 0 when the enumeration
1448    :      // terminates, which goes against the publishes documentations.
1449  E :      if (fetched == 0)
1450  i :        break;
1451    :  
1452  E :      DWORD rva = 0;
1453  E :      DWORD length = 0;
1454  E :      DWORD section_id = 0;
1455  E :      BOOL code = FALSE;
1456  E :      ScopedComPtr<IDiaSymbol> compiland;
1457  E :      ScopedBstr bstr_name;
1458    :      if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1459    :          (hr = section_contrib->get_length(&length)) != S_OK ||
1460    :          (hr = section_contrib->get_addressSection(&section_id)) != S_OK ||
1461    :          (hr = section_contrib->get_code(&code)) != S_OK ||
1462    :          (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1463  E :          (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1464  i :        LOG(ERROR) << "Failed to get section contribution properties: "
1465    :                   << com::LogHr(hr) << ".";
1466  i :        return false;
1467    :      }
1468    :  
1469    :      // Determine if this function was built by a supported compiler.
1470    :      bool is_built_by_supported_compiler =
1471  E :          IsBuiltBySupportedCompiler(compiland.get());
1472    :  
1473    :      // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1474  E :      DCHECK_LT(0u, section_id);
1475  E :      --section_id;
1476    :  
1477    :      // We don't parse the resource section, as it is parsed by the PEFileParser.
1478  E :      if (section_id == rsrc_id)
1479  E :        continue;
1480    :  
1481  E :      std::string name;
1482  E :      if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1483  i :        LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1484  i :        return false;
1485    :      }
1486    :  
1487    :      // TODO(chrisha): We see special section contributions with the name
1488    :      //     "* CIL *". These are concatenations of data symbols and can very
1489    :      //     likely be chunked using symbols directly. A cursory visual inspection
1490    :      //     of symbol names hints that these might be related to WPO.
1491    :  
1492    :      // Create the block.
1493    :      BlockType block_type =
1494  E :          code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1495    :      Block* block = CreateBlockOrFindCoveringPeBlock(
1496  E :          block_type, RelativeAddress(rva), length, name);
1497  E :      if (block == NULL) {
1498  i :        LOG(ERROR) << "Unable to create block for compiland \"" << name << "\".";
1499  i :        return false;
1500    :      }
1501    :  
1502    :      // Set the block attributes.
1503  E :      block->set_attribute(BlockGraph::SECTION_CONTRIB);
1504  E :      if (!is_built_by_supported_compiler)
1505  E :        block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1506  E :    }
1507    :  
1508  E :    return true;
1509  E :  }
1510    :  
1511  E :  bool NewDecomposer::CreateGapBlocks() {
1512  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1513    :  
1514    :    // Iterate through all the image sections.
1515  E :    for (size_t i = 0; i < num_sections; ++i) {
1516  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1517  E :      DCHECK(header != NULL);
1518    :  
1519  E :      BlockType type = BlockGraph::CODE_BLOCK;
1520  E :      const char* section_type = NULL;
1521  E :      switch (GetSectionType(header)) {
1522    :        case kSectionCode:
1523  E :          type = BlockGraph::CODE_BLOCK;
1524  E :          section_type = "code";
1525  E :          break;
1526    :  
1527    :        case kSectionData:
1528  E :          type = BlockGraph::DATA_BLOCK;
1529  E :          section_type = "data";
1530  E :          break;
1531    :  
1532    :        default:
1533  i :          continue;
1534    :      }
1535    :  
1536  E :      if (!CreateSectionGapBlocks(header, type)) {
1537  i :        LOG(ERROR) << "Unable to create gap blocks for " << section_type
1538    :                   << " section \"" << header->Name << "\".";
1539  i :        return false;
1540    :      }
1541  E :    }
1542    :  
1543  E :    return true;
1544  E :  }
1545    :  
1546    :  bool NewDecomposer::FinalizeIntermediateReferences(
1547  E :      const IntermediateReferences& references) {
1548  E :    for (size_t i = 0; i < references.size(); ++i) {
1549    :      // This logs verbosely for us.
1550    :      if (!CreateReference(references[i].src_addr,
1551    :                           references[i].size,
1552    :                           references[i].type,
1553    :                           references[i].dst_addr,
1554    :                           references[i].dst_addr,
1555  E :                           image_)) {
1556  i :        return false;
1557    :      }
1558  E :    }
1559  E :    return true;
1560  E :  }
1561    :  
1562  E :  bool NewDecomposer::DisassembleCodeBlocksAndLabelData() {
1563  E :    DCHECK(image_ != NULL);
1564    :  
1565    :    const BlockGraph::Block* dos_header_block =
1566  E :        image_->GetBlockByAddress(RelativeAddress(0));
1567  E :    DCHECK(dos_header_block != NULL);
1568    :  
1569    :    const BlockGraph::Block* nt_headers_block =
1570  E :        GetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
1571  E :    if (nt_headers_block == NULL) {
1572  i :      LOG(ERROR) << "Unable to get NT headers block for image.";
1573  i :      return false;
1574    :    }
1575    :  
1576    :    // GetNtHeadersBlockFromDosHeaderBlock sanity checks things so we can cast
1577    :    // with impunity.
1578    :    const IMAGE_NT_HEADERS* nt_headers =
1579  E :        reinterpret_cast<const IMAGE_NT_HEADERS*>(nt_headers_block->data());
1580  E :    core::AbsoluteAddress image_base(nt_headers->OptionalHeader.ImageBase);
1581    :  
1582    :    // Walk through the blocks and disassemble each one of them.
1583  E :    BlockGraph::AddressSpace::RangeMapConstIter it = image_->begin();
1584  E :    for (; it != image_->end(); ++it) {
1585  E :      BlockGraph::Block* block = it->second;
1586    :  
1587  E :      if (block->type() != BlockGraph::CODE_BLOCK)
1588  E :        continue;
1589    :  
1590  E :      core::AbsoluteAddress abs_addr(image_base + it->first.start().value());
1591    :      if (!DisassembleCodeBlockAndLabelData(
1592  E :          parse_debug_info_, image_base, abs_addr, image_, block)) {
1593  i :        return false;
1594    :      }
1595  E :    }
1596    :  
1597  E :    return true;
1598  E :  }
1599    :  
1600  E :  bool NewDecomposer::CreateReferencesFromFixups(IDiaSession* session) {
1601  E :    DCHECK(session != NULL);
1602    :  
1603  E :    PEFile::RelocSet reloc_set;
1604  E :    if (!image_file_.DecodeRelocs(&reloc_set))
1605  i :      return false;
1606    :  
1607  E :    OMAPs omap_from;
1608  E :    PdbFixups fixups;
1609  E :    if (!LoadDebugStreams(session, &fixups, &omap_from))
1610  i :      return false;
1611    :  
1612    :    // While creating references from the fixups this removes the
1613    :    // corresponding reference data from the relocs. We use this as a kind of
1614    :    // double-entry bookkeeping to ensure all is well and right in the world.
1615    :    if (!CreateReferencesFromFixupsImpl(image_file_, fixups, omap_from,
1616  E :                                        &reloc_set, image_)) {
1617  i :      return false;
1618    :    }
1619    :  
1620  E :    if (!reloc_set.empty()) {
1621  i :      LOG(ERROR) << "Found reloc entries without matching FIXUP entries.";
1622  i :      return false;
1623    :    }
1624    :  
1625  E :    return true;
1626  E :  }
1627    :  
1628  E :  bool NewDecomposer::ProcessSymbols(IDiaSymbol* root) {
1629  E :    DCHECK(root != NULL);
1630    :  
1631    :    DiaBrowser::MatchCallback on_push_function_or_thunk_symbol(
1632    :        base::Bind(&NewDecomposer::OnPushFunctionOrThunkSymbol,
1633  E :                   base::Unretained(this)));
1634    :    DiaBrowser::MatchCallback on_pop_function_or_thunk_symbol(
1635    :        base::Bind(&NewDecomposer::OnPopFunctionOrThunkSymbol,
1636  E :                   base::Unretained(this)));
1637    :    DiaBrowser::MatchCallback on_function_child_symbol(
1638    :        base::Bind(&NewDecomposer::OnFunctionChildSymbol,
1639  E :                   base::Unretained(this)));
1640    :    DiaBrowser::MatchCallback on_data_symbol(
1641  E :        base::Bind(&NewDecomposer::OnDataSymbol, base::Unretained(this)));
1642    :    DiaBrowser::MatchCallback on_public_symbol(
1643  E :        base::Bind(&NewDecomposer::OnPublicSymbol, base::Unretained(this)));
1644    :    DiaBrowser::MatchCallback on_label_symbol(
1645  E :        base::Bind(&NewDecomposer::OnLabelSymbol, base::Unretained(this)));
1646    :  
1647  E :    DiaBrowser dia_browser;
1648    :  
1649    :    // Find thunks.
1650    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagThunk),
1651    :                           on_push_function_or_thunk_symbol,
1652  E :                           on_pop_function_or_thunk_symbol);
1653    :  
1654    :    // Find functions and all data, labels, callsites, debug start/end and block
1655    :    // symbols below them. This is done in one single pattern so that the
1656    :    // function pushes/pops happen in the right order.
1657    :    dia_browser.AddPattern(
1658    :        Seq(Opt(SymTagCompiland),
1659    :            Callback(Or(SymTagFunction, SymTagThunk),
1660    :                     on_push_function_or_thunk_symbol,
1661    :                     on_pop_function_or_thunk_symbol),
1662    :            Star(SymTagBlock),
1663    :            Or(SymTagData,
1664    :               SymTagLabel,
1665    :               SymTagBlock,
1666    :               SymTagFuncDebugStart,
1667    :               SymTagFuncDebugEnd,
1668    :               SymTagCallSite)),
1669  E :        on_function_child_symbol);
1670    :  
1671    :    // Global data and code label symbols.
1672    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagLabel),
1673  E :                           on_label_symbol);
1674    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1675  E :                           on_data_symbol);
1676    :  
1677    :    // Public symbols. These provide decorated names without any type info, but
1678    :    // are useful for debugging.
1679  E :    dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
1680    :  
1681  E :    return dia_browser.Browse(root);
1682  E :  }
1683    :  
1684    :  bool NewDecomposer::VisitLinkerSymbol(VisitLinkerSymbolContext* context,
1685    :                                        uint16 symbol_length,
1686    :                                        uint16 symbol_type,
1687  E :                                        pdb::PdbStream* stream) {
1688  E :    DCHECK(context != NULL);
1689  E :    DCHECK(stream != NULL);
1690    :  
1691  E :    if (symbol_type != cci::S_COFFGROUP)
1692  E :      return true;
1693    :  
1694  E :    std::vector<uint8> buffer;
1695    :    const cci::CoffGroupSym* coffgroup =
1696  E :        ParseSymbol<cci::CoffGroupSym>(symbol_length, stream, &buffer);
1697  E :    if (coffgroup == NULL)
1698  i :      return false;
1699    :  
1700    :    // The PDB numbers sections starting at index 1 but we use index 0.
1701    :    RelativeAddress rva(image_layout_->sections[coffgroup->seg - 1].addr +
1702  E :        coffgroup->off);
1703    :  
1704    :    // We are looking for an opening symbol.
1705  E :    if (context->current_group_index == -1) {
1706  E :      for (size_t i = 0; i < context->bracketing_groups.size(); ++i) {
1707  E :        std::string prefix;
1708  E :        if (context->bracketing_groups[i].FullMatch(coffgroup->name, &prefix)) {
1709  E :          context->current_group_index = i;
1710  E :          context->current_group_prefix = prefix;
1711  E :          context->current_group_start = rva;
1712  E :          return true;
1713    :        }
1714  E :      }
1715    :  
1716    :      // No opening symbol was encountered. We can safely ignore this
1717    :      // COFF group symbol.
1718  E :      return true;
1719    :    }
1720    :  
1721    :    // If we get here we've found an opening symbol and we're looking for the
1722    :    // matching closing symbol.
1723  E :    std::string prefix;
1724    :    if (!context->bracketing_groups[context->current_group_index].FullMatch(
1725  E :            coffgroup->name, &prefix)) {
1726  E :      return true;
1727    :    }
1728    :  
1729  E :    if (prefix != context->current_group_prefix) {
1730    :      // We see another symbol open/close while already in an opened symbol.
1731    :      // This indicates nested bracketing information, which we've never seen
1732    :      // before.
1733  i :      LOG(ERROR) << "Encountered nested bracket symbol \"" << prefix
1734    :                 << "\" while in \"" << context->current_group_prefix << "\".";
1735  i :      return false;
1736    :    }
1737    :  
1738  E :    RelativeAddress end = rva + coffgroup->cb;
1739  E :    DCHECK_LT(context->current_group_start, end);
1740    :  
1741    :    // Create a block for this bracketed COFF group.
1742    :    BlockGraph::Block* block = CreateBlock(
1743    :        BlockGraph::DATA_BLOCK,
1744    :        context->current_group_start,
1745    :        end - context->current_group_start,
1746  E :        base::StringPrintf("Bracketed COFF group: %s", prefix.c_str()));
1747  E :    if (block == NULL) {
1748  i :      LOG(ERROR) << "Failed to create bracketed COFF group \""
1749    :                 << prefix << "\".";
1750  i :      return false;
1751    :    }
1752  E :    block->set_attribute(BlockGraph::COFF_GROUP);
1753    :  
1754    :    // Indicate that this block is closed and we're looking for another opening
1755    :    // bracket symbol.
1756  E :    context->current_group_index = -1;
1757  E :    context->current_group_prefix.clear();
1758  E :    context->current_group_start = RelativeAddress(0);
1759    :  
1760  E :    return true;
1761  E :  }
1762    :  
1763    :  DiaBrowser::BrowserDirective NewDecomposer::OnPushFunctionOrThunkSymbol(
1764    :      const DiaBrowser& dia_browser,
1765    :      const DiaBrowser::SymTagVector& sym_tags,
1766  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1767  E :    DCHECK(!symbols.empty());
1768  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1769  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
1770    :  
1771  E :    DCHECK(current_block_ == NULL);
1772  E :    DCHECK_EQ(current_address_, RelativeAddress(0));
1773  E :    DCHECK_EQ(0u, current_scope_count_);
1774    :  
1775  E :    HRESULT hr = E_FAIL;
1776  E :    DWORD location_type = LocIsNull;
1777  E :    DWORD rva = 0;
1778  E :    ULONGLONG length = 0;
1779  E :    ScopedBstr name_bstr;
1780    :    if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1781    :        FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1782    :        FAILED(hr = symbol->get_length(&length)) ||
1783  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1784  i :      LOG(ERROR) << "Failed to get function/thunk properties: " << com::LogHr(hr)
1785    :                 << ".";
1786  i :      return DiaBrowser::kBrowserAbort;
1787    :    }
1788    :  
1789    :    // We only care about functions with static storage. We can stop looking at
1790    :    // things below this node, as we won't be able to resolve them either.
1791  E :    if (location_type != LocIsStatic)
1792  i :      return DiaBrowser::kBrowserTerminatePath;
1793    :  
1794  E :    RelativeAddress addr(rva);
1795  E :    Block* block = image_->GetBlockByAddress(addr);
1796  E :    CHECK(block != NULL);
1797  E :    RelativeAddress block_addr;
1798  E :    CHECK(image_->GetAddressOf(block, &block_addr));
1799  E :    DCHECK(InRange(addr, block_addr, block->size()));
1800    :  
1801  E :    std::string name;
1802  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1803  i :      LOG(ERROR) << "Failed to convert function/thunk name to UTF8.";
1804  i :      return DiaBrowser::kBrowserAbort;
1805    :    }
1806    :  
1807    :    // We know the function starts in this block but we need to make sure its
1808    :    // end does not extend past the end of the block.
1809  E :    if (addr + length > block_addr + block->size()) {
1810  i :      LOG(ERROR) << "Got function/thunk \"" << name << "\" that is not contained "
1811    :                 << "by section contribution \"" << block->name() << "\".";
1812  i :      return DiaBrowser::kBrowserAbort;
1813    :    }
1814    :  
1815  E :    Offset offset = addr - block_addr;
1816  E :    if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
1817  i :      return DiaBrowser::kBrowserAbort;
1818    :  
1819    :    // Keep track of the generated block. We will use this when parsing symbols
1820    :    // that belong to this function. This prevents us from having to do repeated
1821    :    // lookups and also allows us to associate labels outside of the block to the
1822    :    // correct block.
1823  E :    current_block_ = block;
1824  E :    current_address_ = block_addr;
1825    :  
1826    :    // Certain properties are not defined on all blocks, so the following calls
1827    :    // may return S_FALSE.
1828  E :    BOOL no_return = FALSE;
1829  E :    if (symbol->get_noReturn(&no_return) != S_OK)
1830  E :      no_return = FALSE;
1831    :  
1832  E :    BOOL has_inl_asm = FALSE;
1833  E :    if (symbol->get_hasInlAsm(&has_inl_asm) != S_OK)
1834  E :      has_inl_asm = FALSE;
1835    :  
1836  E :    BOOL has_eh = FALSE;
1837  E :    if (symbol->get_hasEH(&has_eh) != S_OK)
1838  E :      has_eh = FALSE;
1839    :  
1840  E :    BOOL has_seh = FALSE;
1841  E :    if (symbol->get_hasSEH(&has_seh) != S_OK)
1842  E :      has_seh = FALSE;
1843    :  
1844    :    // Set the block attributes.
1845  E :    if (no_return == TRUE)
1846  E :      block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1847  E :    if (has_inl_asm == TRUE)
1848  E :      block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1849  E :    if (has_eh || has_seh)
1850  E :      block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1851  E :    if (IsSymTag(symbol, SymTagThunk))
1852  E :      block->set_attribute(BlockGraph::THUNK);
1853    :  
1854  E :    return DiaBrowser::kBrowserContinue;
1855  E :  }
1856    :  
1857    :  DiaBrowser::BrowserDirective NewDecomposer::OnPopFunctionOrThunkSymbol(
1858    :      const DiaBrowser& dia_browser,
1859    :      const DiaBrowser::SymTagVector& sym_tags,
1860  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1861    :    // Simply clean up the current function block and address.
1862  E :    current_block_ = NULL;
1863  E :    current_address_ = RelativeAddress(0);
1864  E :    current_scope_count_ = 0;
1865  E :    return DiaBrowser::kBrowserContinue;
1866  E :  }
1867    :  
1868    :  DiaBrowser::BrowserDirective NewDecomposer::OnFunctionChildSymbol(
1869    :        const DiaBrowser& dia_browser,
1870    :        const DiaBrowser::SymTagVector& sym_tags,
1871  E :        const DiaBrowser::SymbolPtrVector& symbols) {
1872  E :    DCHECK(!symbols.empty());
1873  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1874    :  
1875    :    // This can only be called from the context of a function, so we expect the
1876    :    // parent function block to be set and remembered.
1877  E :    DCHECK(current_block_ != NULL);
1878    :  
1879    :    // The set of sym tags here should match the pattern used in the DiaBrowser
1880    :    // instance set up in ProcessSymbols.
1881  E :    switch (sym_tags.back()) {
1882    :      case SymTagData:
1883  E :        return OnDataSymbol(dia_browser, sym_tags, symbols);
1884    :  
1885    :      case SymTagLabel:
1886  E :        return OnLabelSymbol(dia_browser, sym_tags, symbols);
1887    :  
1888    :      case SymTagBlock:
1889    :      case SymTagFuncDebugStart:
1890    :      case SymTagFuncDebugEnd:
1891  E :        return OnScopeSymbol(sym_tags.back(), symbols.back());
1892    :  
1893    :      case SymTagCallSite:
1894  E :        return OnCallSiteSymbol(symbols.back());
1895    :  
1896    :      default:
1897    :        break;
1898    :    }
1899    :  
1900  i :    LOG(ERROR) << "Unhandled function child symbol: " << sym_tags.back() << ".";
1901  i :    return DiaBrowser::kBrowserAbort;
1902  E :  }
1903    :  
1904    :  DiaBrowser::BrowserDirective NewDecomposer::OnDataSymbol(
1905    :      const DiaBrowser& dia_browser,
1906    :      const DiaBrowser::SymTagVector& sym_tags,
1907  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1908  E :    DCHECK(!symbols.empty());
1909  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1910  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
1911    :  
1912  E :    HRESULT hr = E_FAIL;
1913  E :    DWORD location_type = LocIsNull;
1914  E :    DWORD rva = 0;
1915  E :    ScopedBstr name_bstr;
1916    :    if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1917    :        FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1918  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1919  i :      LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1920  i :      return DiaBrowser::kBrowserAbort;
1921    :    }
1922    :  
1923    :    // Symbols with an address of zero are essentially invalid. They appear to
1924    :    // have been optimized away by the compiler, but they are still reported.
1925  E :    if (rva == 0)
1926  E :      return DiaBrowser::kBrowserTerminatePath;
1927    :  
1928    :    // We only care about functions with static storage. We can stop looking at
1929    :    // things below this node, as we won't be able to resolve them either.
1930  E :    if (location_type != LocIsStatic)
1931  i :      return DiaBrowser::kBrowserTerminatePath;
1932    :  
1933    :    // Get the size of this datum from its type info.
1934  E :    size_t length = 0;
1935  E :    if (!GetDataSymbolSize(symbol, &length))
1936  i :      return DiaBrowser::kBrowserAbort;
1937    :  
1938    :    // Reuse the parent function block if we can. This acts as small lookup
1939    :    // cache.
1940  E :    RelativeAddress addr(rva);
1941  E :    Block* block = current_block_;
1942  E :    RelativeAddress block_addr(current_address_);
1943  E :    if (block == NULL || !InRange(addr, block_addr, block->size())) {
1944  E :      block = image_->GetBlockByAddress(addr);
1945  E :      CHECK(block != NULL);
1946  E :      CHECK(image_->GetAddressOf(block, &block_addr));
1947  E :      DCHECK(InRange(addr, block_addr, block->size()));
1948    :    }
1949    :  
1950  E :    std::string name;
1951  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1952  i :      LOG(ERROR) << "Failed to convert label name to UTF8.";
1953  i :      return DiaBrowser::kBrowserAbort;
1954    :    }
1955    :  
1956    :    // Zero-length data symbols mark case/jump tables, or are forward declares.
1957  E :    BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1958  E :    Offset offset = addr - block_addr;
1959  E :    if (length == 0) {
1960    :      // Jump and case tables come in as data symbols with no name. Jump tables
1961    :      // are always an array of pointers, thus they coincide exactly with a
1962    :      // reference. Case tables are simple arrays of integer values (themselves
1963    :      // indices into a jump table), thus do not coincide with a reference.
1964  E :      if (name.empty() && block->type() == BlockGraph::CODE_BLOCK) {
1965  E :        if (block->references().find(offset) != block->references().end()) {
1966  E :          name = kJumpTable;
1967  E :          attr |= BlockGraph::JUMP_TABLE_LABEL;
1968  E :        } else {
1969  E :          name = kCaseTable;
1970  E :          attr |= BlockGraph::CASE_TABLE_LABEL;
1971    :        }
1972    :  
1973    :        // We expect jump and case tables to already have been discovered by
1974    :        // the disassembly operation. If this is not the case then our decoding
1975    :        // step is in error and its results can't be trusted.
1976  E :        if (!JumpAndCaseTableAlreadyLabelled(block, offset, attr))
1977  i :          return DiaBrowser::kBrowserAbort;
1978  E :      } else {
1979    :        // Zero-length data symbols act as 'forward declares' in some sense. They
1980    :        // are always followed by a non-zero length data symbol with the same name
1981    :        // and location.
1982  E :        return DiaBrowser::kBrowserTerminatePath;
1983    :      }
1984    :    }
1985    :  
1986    :    // Verify that the data symbol does not exceed the size of the block.
1987  E :    if (addr + length > block_addr + block->size()) {
1988    :      // The data symbol can exceed the size of the block in the case of data
1989    :      // imports. For some reason the toolchain emits a global data symbol with
1990    :      // type information equal to the type of the data *pointed* to by the import
1991    :      // entry rather than the type of the entry itself. Thus, if the data type
1992    :      // is bigger than the entire IAT this symbol will exceed it. To complicate
1993    :      // matters even more, a poorly written module can import its own export in
1994    :      // which case a linker generated pseudo-import-entry block will be
1995    :      // generated. This won't be part of the IAT, so we can't even filter based
1996    :      // on that. Instead, we simply ignore global data symbols that exceed the
1997    :      // block size.
1998  E :      base::StringPiece spname(name);
1999  E :      if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
2000  E :        VLOG(1) << "Encountered an imported data symbol \"" << name << "\" that "
2001    :                << "extends past its parent block \"" << block->name() << "\".";
2002  E :      } else {
2003  i :        LOG(ERROR) << "Received data symbol \"" << name << "\" that extends past "
2004    :                   << "its parent block \"" << block->name() << "\".";
2005  i :        return DiaBrowser::kBrowserAbort;
2006    :      }
2007    :    }
2008    :  
2009  E :    if (!AddLabelToBlock(offset, name, attr, block))
2010  i :      return DiaBrowser::kBrowserAbort;
2011    :  
2012  E :    return DiaBrowser::kBrowserContinue;
2013  E :  }
2014    :  
2015    :  DiaBrowser::BrowserDirective NewDecomposer::OnPublicSymbol(
2016    :      const DiaBrowser& dia_browser,
2017    :      const DiaBrowser::SymTagVector& sym_tags,
2018  E :      const DiaBrowser::SymbolPtrVector& symbols) {
2019  E :    DCHECK(!symbols.empty());
2020  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
2021  E :    DCHECK(current_block_ == NULL);
2022  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
2023    :  
2024  E :    HRESULT hr = E_FAIL;
2025  E :    DWORD rva = 0;
2026  E :    ScopedBstr name_bstr;
2027    :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
2028  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
2029  i :      LOG(ERROR) << "Failed to get public symbol properties: " << com::LogHr(hr)
2030    :                 << ".";
2031  i :      return DiaBrowser::kBrowserAbort;
2032    :    }
2033    :  
2034  E :    RelativeAddress addr(rva);
2035  E :    Block* block = image_->GetBlockByAddress(addr);
2036  E :    CHECK(block != NULL);
2037  E :    RelativeAddress block_addr;
2038  E :    CHECK(image_->GetAddressOf(block, &block_addr));
2039  E :    DCHECK(InRange(addr, block_addr, block->size()));
2040    :  
2041  E :    std::string name;
2042  E :    WideToUTF8(name_bstr, name_bstr.Length(), &name);
2043    :  
2044    :    // Public symbol names are mangled. Remove leading '_' as per
2045    :    // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
2046  E :    if (name[0] == '_')
2047  E :      name = name.substr(1);
2048    :  
2049  E :    Offset offset = addr - block_addr;
2050  E :    if (!AddLabelToBlock(offset, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
2051  i :      return DiaBrowser::kBrowserAbort;
2052    :  
2053  E :    return DiaBrowser::kBrowserContinue;
2054  E :  }
2055    :  
2056    :  DiaBrowser::BrowserDirective NewDecomposer::OnLabelSymbol(
2057    :      const DiaBrowser& dia_browser,
2058    :      const DiaBrowser::SymTagVector& sym_tags,
2059  E :      const DiaBrowser::SymbolPtrVector& symbols) {
2060  E :    DCHECK(!symbols.empty());
2061  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
2062  E :    DiaBrowser::SymbolPtr symbol = symbols.back();
2063    :  
2064  E :    HRESULT hr = E_FAIL;
2065  E :    DWORD rva = 0;
2066  E :    ScopedBstr name_bstr;
2067    :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
2068  E :        FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
2069  i :      LOG(ERROR) << "Failed to get label symbol properties: " << com::LogHr(hr)
2070    :                 << ".";
2071  i :      return DiaBrowser::kBrowserAbort;
2072    :    }
2073    :  
2074    :    // If we have a current_block_ the label should lie within its scope.
2075  E :    RelativeAddress addr(rva);
2076  E :    Block* block = current_block_;
2077  E :    RelativeAddress block_addr(current_address_);
2078  E :    if (block != NULL) {
2079  E :      if (!InRangeIncl(addr, current_address_, current_block_->size())) {
2080  i :        LOG(ERROR) << "Label falls outside of current block \""
2081    :                   << current_block_->name() << "\".";
2082  i :        return DiaBrowser::kBrowserAbort;
2083    :      }
2084  E :    } else {
2085    :      // If there is no current block this is a compiland scope label.
2086  E :      block = image_->GetBlockByAddress(addr);
2087  E :      CHECK(block != NULL);
2088  E :      CHECK(image_->GetAddressOf(block, &block_addr));
2089  E :      DCHECK(InRange(addr, block_addr, block->size()));
2090    :  
2091    :      // TODO(chrisha): This label is in compiland scope, so we should be
2092    :      //     finding the block whose section contribution shares the same
2093    :      //     compiland.
2094    :    }
2095    :  
2096  E :    std::string name;
2097  E :    WideToUTF8(name_bstr, name_bstr.Length(), &name);
2098    :  
2099  E :    Offset offset = addr - block_addr;
2100  E :    if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
2101  i :      return DiaBrowser::kBrowserAbort;
2102    :  
2103  E :    return DiaBrowser::kBrowserContinue;
2104  E :  }
2105    :  
2106    :  DiaBrowser::BrowserDirective NewDecomposer::OnScopeSymbol(
2107  E :      enum SymTagEnum type, DiaBrowser::SymbolPtr symbol) {
2108    :    // We should only get here via the successful exploration of a SymTagFunction,
2109    :    // so current_block_ should be set.
2110  E :    DCHECK(current_block_ != NULL);
2111    :  
2112  E :    HRESULT hr = E_FAIL;
2113  E :    DWORD rva = 0;
2114  E :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
2115  i :      LOG(ERROR) << "Failed to get scope symbol properties: " << com::LogHr(hr)
2116    :                 << ".";
2117  i :      return DiaBrowser::kBrowserAbort;
2118    :    }
2119    :  
2120    :    // The label may potentially lay at the first byte past the function.
2121  E :    RelativeAddress addr(rva);
2122  E :    DCHECK_LE(current_address_, addr);
2123  E :    DCHECK_LE(addr, current_address_ + current_block_->size());
2124    :  
2125    :    // Get the attributes for this label.
2126  E :    BlockGraph::LabelAttributes attr = 0;
2127  E :    std::string name;
2128  E :    CHECK(ScopeSymTagToLabelProperties(type, current_scope_count_, &attr, &name));
2129    :  
2130    :    // Add the label.
2131  E :    Offset offset = addr - current_address_;
2132  E :    if (!AddLabelToBlock(offset, name, attr, current_block_))
2133  i :      return DiaBrowser::kBrowserAbort;
2134    :  
2135    :    // If this is a scope we extract the length and explicitly add a corresponding
2136    :    // end label.
2137  E :    if (type == SymTagBlock) {
2138  E :      ULONGLONG length = 0;
2139  E :      if (symbol->get_length(&length) != S_OK) {
2140  i :        LOG(ERROR) << "Failed to extract code scope length for block \""
2141    :                    << current_block_->name() << "\".";
2142  i :        return DiaBrowser::kBrowserAbort;
2143    :      }
2144  E :      DCHECK_LE(static_cast<size_t>(offset + length), current_block_->size());
2145  E :      name = base::StringPrintf("<scope-end-%d>", current_scope_count_);
2146  E :      ++current_scope_count_;
2147    :      if (!AddLabelToBlock(offset + length, name,
2148  E :                           BlockGraph::SCOPE_END_LABEL, current_block_)) {
2149  i :        return DiaBrowser::kBrowserAbort;
2150    :      }
2151    :    }
2152    :  
2153  E :    return DiaBrowser::kBrowserContinue;
2154  E :  }
2155    :  
2156    :  DiaBrowser::BrowserDirective NewDecomposer::OnCallSiteSymbol(
2157  E :      DiaBrowser::SymbolPtr symbol) {
2158    :    // We should only get here via the successful exploration of a SymTagFunction,
2159    :    // so current_block_ should be set.
2160  E :    DCHECK(current_block_ != NULL);
2161    :  
2162  E :    HRESULT hr = E_FAIL;
2163  E :    DWORD rva = 0;
2164  E :    if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
2165  i :      LOG(ERROR) << "Failed to get call site symbol properties: "
2166    :                 << com::LogHr(hr) << ".";
2167  i :      return DiaBrowser::kBrowserAbort;
2168    :    }
2169    :  
2170  E :    RelativeAddress addr(rva);
2171  E :    if (!InRange(addr, current_address_, current_block_->size())) {
2172  i :      LOG(ERROR) << "Call site falls outside of current block \""
2173    :                 << current_block_->name() << "\".";
2174  i :      return DiaBrowser::kBrowserAbort;
2175    :    }
2176    :  
2177  E :    Offset offset = addr - current_address_;
2178    :    if (!AddLabelToBlock(offset, "<call-site>", BlockGraph::CALL_SITE_LABEL,
2179  E :                         current_block_)) {
2180  i :      return DiaBrowser::kBrowserAbort;
2181    :    }
2182    :  
2183  E :    return DiaBrowser::kBrowserContinue;
2184  E :  }
2185    :  
2186    :  Block* NewDecomposer::CreateBlock(BlockType type,
2187    :                                    RelativeAddress address,
2188    :                                    BlockGraph::Size size,
2189  E :                                    const base::StringPiece& name) {
2190  E :    Block* block = image_->AddBlock(type, address, size, name);
2191  E :    if (block == NULL) {
2192  i :      LOG(ERROR) << "Unable to add block \"" << name.as_string() << "\" at "
2193    :                 << address << " with size " << size << ".";
2194  i :      return NULL;
2195    :    }
2196    :  
2197    :    // Mark the source range from whence this block originates. This is assuming
2198    :    // an untransformed image. To handle transformed images we'd have to use the
2199    :    // OMAP information to do this properly.
2200    :    bool pushed = block->source_ranges().Push(
2201    :        Block::DataRange(0, size),
2202  E :        Block::SourceRange(address, size));
2203  E :    DCHECK(pushed);
2204    :  
2205  E :    BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2206  E :    if (section == BlockGraph::kInvalidSectionId) {
2207  i :      LOG(ERROR) << "Block \"" << name.as_string() << "\" at " << address
2208    :                 << " with size " << size << " lies outside of all sections.";
2209  i :      return NULL;
2210    :    }
2211  E :    block->set_section(section);
2212    :  
2213  E :    const uint8* data = image_file_.GetImageData(address, size);
2214  E :    if (data != NULL)
2215  E :      block->SetData(data, size);
2216    :  
2217  E :    return block;
2218  E :  }
2219    :  
2220    :  Block* NewDecomposer::CreateBlockOrFindCoveringPeBlock(
2221    :      BlockType type,
2222    :      RelativeAddress addr,
2223    :      BlockGraph::Size size,
2224  E :      const base::StringPiece& name) {
2225  E :    Block* block = image_->GetBlockByAddress(addr);
2226  E :    if (block != NULL) {
2227  E :      RelativeAddress block_addr;
2228  E :      CHECK(image_->GetAddressOf(block, &block_addr));
2229    :  
2230  E :      RelativeRange existing_block(block_addr, block->size());
2231    :  
2232    :      // If this is not a PE parsed or COFF group block that covers us entirely,
2233    :      // then this is an error.
2234    :      static const BlockGraph::BlockAttributes kCoveringAttributes =
2235    :          BlockGraph::PE_PARSED | BlockGraph::COFF_GROUP;
2236    :      if ((block->attributes() & kCoveringAttributes) == 0 ||
2237  E :          !existing_block.Contains(addr, size)) {
2238  i :        LOG(ERROR) << "Trying to create block \"" << name.as_string() << "\" at "
2239    :                   << addr.value() << " with size " << size << " that conflicts "
2240    :                   << "with existing block \"" << block->name() << " at "
2241    :                   << block_addr << " with size " << block->size() << ".";
2242  i :        return NULL;
2243    :      }
2244    :  
2245  E :      return block;
2246    :    }
2247  E :    DCHECK(block == NULL);
2248    :  
2249  E :    return CreateBlock(type, addr, size, name);
2250  E :  }
2251    :  
2252    :  bool NewDecomposer::CreateGapBlock(BlockType block_type,
2253    :                                     RelativeAddress address,
2254  E :                                     BlockGraph::Size size) {
2255    :    Block* block = CreateBlock(block_type, address, size,
2256  E :        base::StringPrintf("Gap Block 0x%08X", address.value()).c_str());
2257  E :    if (block == NULL) {
2258  i :      LOG(ERROR) << "Unable to create gap block.";
2259  i :      return false;
2260    :    }
2261  E :    block->set_attribute(BlockGraph::GAP_BLOCK);
2262    :  
2263  E :    return true;
2264  E :  }
2265    :  
2266    :  bool NewDecomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
2267  E :                                             BlockType block_type) {
2268  E :    RelativeAddress section_begin(header->VirtualAddress);
2269  E :    RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
2270    :    RelativeAddress image_end(
2271  E :        image_file_.nt_headers()->OptionalHeader.SizeOfImage);
2272    :  
2273    :    // Search for the first and last blocks interesting from the start and end
2274    :    // of the section to the end of the image.
2275    :    BlockGraph::AddressSpace::RangeMap::const_iterator it(
2276    :        image_->address_space_impl().FindFirstIntersection(
2277    :            BlockGraph::AddressSpace::Range(section_begin,
2278  E :                                            image_end - section_begin)));
2279    :  
2280    :    BlockGraph::AddressSpace::RangeMap::const_iterator end =
2281  E :        image_->address_space_impl().end();
2282  E :    if (section_end < image_end) {
2283    :      end = image_->address_space_impl().FindFirstIntersection(
2284    :          BlockGraph::AddressSpace::Range(section_end,
2285  E :                                          image_end - section_end));
2286    :    }
2287    :  
2288    :    // The whole section is missing. Cover it with one gap block.
2289  E :    if (it == end)
2290    :      return CreateGapBlock(
2291  i :          block_type, section_begin, section_end - section_begin);
2292    :  
2293    :    // Create the head gap block if need be.
2294  E :    if (section_begin < it->first.start()) {
2295    :      if (!CreateGapBlock(
2296  i :          block_type, section_begin, it->first.start() - section_begin)) {
2297  i :        return false;
2298    :      }
2299    :    }
2300    :  
2301    :    // Now iterate the blocks and fill in gaps.
2302  E :    for (; it != end; ++it) {
2303  E :      const Block* block = it->second;
2304  E :      DCHECK(block != NULL);
2305  E :      RelativeAddress block_end = it->first.start() + block->size();
2306  E :      if (block_end >= section_end)
2307  E :        break;
2308    :  
2309    :      // Walk to the next address in turn.
2310  E :      BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
2311  E :      ++next;
2312  E :      if (next == end) {
2313    :        // We're at the end of the list. Create the tail gap block.
2314  E :        DCHECK_GT(section_end, block_end);
2315  E :        if (!CreateGapBlock(block_type, block_end, section_end - block_end))
2316  i :          return false;
2317  E :        break;
2318    :      }
2319    :  
2320    :      // Create the interstitial gap block.
2321  E :      if (block_end < next->first.start())
2322    :        if (!CreateGapBlock(
2323  E :            block_type, block_end, next->first.start() - block_end)) {
2324  i :          return false;
2325    :        }
2326  E :    }
2327    :  
2328  E :    return true;
2329  E :  }
2330    :  
2331    :  }  // namespace pe

Coverage information generated Thu Jul 04 09:34:53 2013.