Coverage for /Syzygy/pe/decomposer.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
75.9%113214910.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/pe/decomposer.h"
  16    :  
  17    :  #include <cvconst.h>
  18    :  #include <algorithm>
  19    :  
  20    :  #include "base/bind.h"
  21    :  #include "base/logging.h"
  22    :  #include "base/path_service.h"
  23    :  #include "base/string_util.h"
  24    :  #include "base/stringprintf.h"
  25    :  #include "base/utf_string_conversions.h"
  26    :  #include "base/files/file_path.h"
  27    :  #include "base/memory/scoped_ptr.h"
  28    :  #include "base/win/scoped_bstr.h"
  29    :  #include "base/win/scoped_comptr.h"
  30    :  #include "sawbuck/common/com_utils.h"
  31    :  #include "sawbuck/sym_util/types.h"
  32    :  #include "syzygy/block_graph/block_util.h"
  33    :  #include "syzygy/block_graph/typed_block.h"
  34    :  #include "syzygy/core/disassembler_util.h"
  35    :  #include "syzygy/core/zstream.h"
  36    :  #include "syzygy/pdb/omap.h"
  37    :  #include "syzygy/pdb/pdb_byte_stream.h"
  38    :  #include "syzygy/pdb/pdb_util.h"
  39    :  #include "syzygy/pe/dia_util.h"
  40    :  #include "syzygy/pe/find.h"
  41    :  #include "syzygy/pe/metadata.h"
  42    :  #include "syzygy/pe/pdb_info.h"
  43    :  #include "syzygy/pe/pe_file_parser.h"
  44    :  #include "syzygy/pe/pe_transform_policy.h"
  45    :  #include "syzygy/pe/pe_utils.h"
  46    :  #include "syzygy/pe/serialization.h"
  47    :  
  48    :  namespace pe {
  49    :  namespace {
  50    :  
  51    :  using base::win::ScopedBstr;
  52    :  using base::win::ScopedComPtr;
  53    :  using block_graph::BlockGraph;
  54    :  using block_graph::ConstTypedBlock;
  55    :  using builder::Opt;
  56    :  using builder::Seq;
  57    :  using builder::Star;
  58    :  using core::AbsoluteAddress;
  59    :  using core::Disassembler;
  60    :  using core::RelativeAddress;
  61    :  
  62    :  typedef Disassembler::CallbackDirective CallbackDirective;
  63    :  
  64    :  const size_t kPointerSize = sizeof(AbsoluteAddress);
  65    :  
  66    :  // Converts from PdbFixup::Type to BlockGraph::ReferenceType.
  67    :  BlockGraph::ReferenceType PdbFixupTypeToReferenceType(
  68  E :      pdb::PdbFixup::Type type) {
  69  E :    switch (type) {
  70    :      case pdb::PdbFixup::TYPE_ABSOLUTE:
  71  E :        return BlockGraph::ABSOLUTE_REF;
  72    :  
  73    :      case pdb::PdbFixup::TYPE_RELATIVE:
  74  E :        return BlockGraph::RELATIVE_REF;
  75    :  
  76    :      case pdb::PdbFixup::TYPE_PC_RELATIVE:
  77  E :        return BlockGraph::PC_RELATIVE_REF;
  78    :  
  79    :      default:
  80  i :        NOTREACHED() << "Invalid PdbFixup::Type.";
  81    :        // The return type here is meaningless.
  82  i :        return BlockGraph::ABSOLUTE_REF;
  83    :    }
  84  E :  }
  85    :  
  86    :  // Adds a reference to the provided intermediate reference map. If one already
  87    :  // exists, will validate that they are consistent.
  88    :  bool AddReference(RelativeAddress src_addr,
  89    :                    BlockGraph::ReferenceType type,
  90    :                    BlockGraph::Size size,
  91    :                    RelativeAddress dst_base,
  92    :                    BlockGraph::Offset dst_offset,
  93  E :                    Decomposer::IntermediateReferenceMap* references) {
  94  E :    DCHECK(references != NULL);
  95    :  
  96    :    // If we get an iterator to a reference and it has the same source address
  97    :    // then ensure that we are consistent with it.
  98    :    Decomposer::IntermediateReferenceMap::iterator it =
  99  E :        references->lower_bound(src_addr);
 100  E :    if (it != references->end() && it->first == src_addr) {
 101    :      if (type != it->second.type || size != it->second.size ||
 102  E :          dst_base != it->second.base || dst_offset != it->second.offset) {
 103  i :        LOG(ERROR) << "Trying to insert inconsistent and colliding intermediate "
 104    :                      "references.";
 105  i :        return false;
 106    :      }
 107    :    }
 108    :  
 109  E :    Decomposer::IntermediateReference ref = { type,
 110  E :                                              size,
 111  E :                                              dst_base,
 112  E :                                              dst_offset };
 113    :  
 114    :    // Since we used lower_bound above, we can use it as a hint for the
 115    :    // insertion. This saves us from incurring the lookup cost twice.
 116  E :    references->insert(it, std::make_pair(src_addr, ref));
 117  E :    return true;
 118  E :  }
 119    :  
 120    :  // Validates the given reference against the given fixup map entry. If they
 121    :  // are consistent, marks the fixup as having been visited.
 122    :  bool ValidateReference(RelativeAddress src_addr,
 123    :                         BlockGraph::ReferenceType type,
 124    :                         BlockGraph::Size size,
 125  E :                         Decomposer::FixupMap::iterator fixup_it) {
 126  E :    if (type != fixup_it->second.type || size != kPointerSize) {
 127  i :      LOG(ERROR) << "Reference at " << src_addr
 128    :                 << " not consistent with corresponding fixup.";
 129  i :      return false;
 130    :    }
 131    :  
 132    :    // Mark this fixup as having been visited.
 133  E :    fixup_it->second.visited = true;
 134    :  
 135  E :    return true;
 136  E :  }
 137    :  
 138    :  enum ValidateOrAddReferenceMode {
 139    :    // Look for an existing fixup. If we find one, validate against it,
 140    :    // otherwise create a new intermediate reference.
 141    :    FIXUP_MAY_EXIST,
 142    :    // Compare against an existing fixup, bailing if there is none. Does not
 143    :    // create a new intermediate reference.
 144    :    FIXUP_MUST_EXIST,
 145    :    // Look for an existing fixup, and fail if one exists. Otherwise, create
 146    :    // a new intermediate reference.
 147    :    FIXUP_MUST_NOT_EXIST
 148    :  };
 149    :  bool ValidateOrAddReference(ValidateOrAddReferenceMode mode,
 150    :                              RelativeAddress src_addr,
 151    :                              BlockGraph::ReferenceType type,
 152    :                              BlockGraph::Size size,
 153    :                              RelativeAddress dst_base,
 154    :                              BlockGraph::Offset dst_offset,
 155    :                              Decomposer::FixupMap* fixup_map,
 156  E :                              Decomposer::IntermediateReferenceMap* references) {
 157  E :    DCHECK(fixup_map != NULL);
 158  E :    DCHECK(references != NULL);
 159    :  
 160  E :    Decomposer::FixupMap::iterator it = fixup_map->find(src_addr);
 161    :  
 162  E :    switch (mode) {
 163    :      case FIXUP_MAY_EXIST: {
 164    :        if (it != fixup_map->end() &&
 165  E :            !ValidateReference(src_addr, type, size, it))
 166  i :          return false;
 167    :        return AddReference(src_addr, type, size, dst_base, dst_offset,
 168  E :                            references);
 169    :      }
 170    :  
 171    :      case FIXUP_MUST_EXIST: {
 172  E :        if (it == fixup_map->end()) {
 173  i :          LOG(ERROR) << "Reference at " << src_addr << " has no matching fixup.";
 174  i :          return false;
 175    :        }
 176  E :        if (!ValidateReference(src_addr, type, size, it))
 177  i :          return false;
 178    :        // Do not create a new intermediate reference.
 179  E :        return true;
 180    :      }
 181    :  
 182    :      case FIXUP_MUST_NOT_EXIST: {
 183  E :        if (it != fixup_map->end()) {
 184  i :          LOG(ERROR) << "Reference at " << src_addr
 185    :                     << " collides with an existing fixup.";
 186  i :          return false;
 187    :        }
 188    :        return AddReference(src_addr, type, size, dst_base, dst_offset,
 189  E :                            references);
 190    :      }
 191    :  
 192    :      default: {
 193  i :        NOTREACHED() << "Invalid ValidateOrAddReferenceMode.";
 194  i :        return false;
 195    :      }
 196    :    }
 197  E :  }
 198    :  
 199  E :  bool GetTypeInfo(IDiaSymbol* symbol, size_t* length) {
 200  E :    DCHECK(symbol != NULL);
 201  E :    DCHECK(length != NULL);
 202    :  
 203  E :    *length = 0;
 204  E :    ScopedComPtr<IDiaSymbol> type;
 205  E :    HRESULT hr = symbol->get_type(type.Receive());
 206    :    // This happens if the symbol has no type information.
 207  E :    if (hr == S_FALSE)
 208  E :      return true;
 209  E :    if (hr != S_OK) {
 210  i :      LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
 211  i :      return false;
 212    :    }
 213    :  
 214  E :    ULONGLONG ull_length = 0;
 215  E :    hr = type->get_length(&ull_length);
 216  E :    if (hr != S_OK) {
 217  i :      LOG(ERROR) << "Failed to retrieve type length properties: "
 218    :                 << com::LogHr(hr) << ".";
 219  i :      return false;
 220    :    }
 221  E :    *length = ull_length;
 222    :  
 223  E :    return true;
 224  E :  }
 225    :  
 226  E :  SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
 227  E :    DCHECK(header != NULL);
 228  E :    if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
 229  E :      return kSectionCode;
 230  E :    if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
 231  E :      return kSectionData;
 232  i :    return kSectionUnknown;
 233  E :  }
 234    :  
 235  E :  void GuessDataBlockAlignment(BlockGraph::Block* block, uint32 max_alignment) {
 236  E :    DCHECK(block != NULL);
 237  E :    uint32 alignment = block->addr().GetAlignment();
 238    :    // Cap the alignment.
 239  E :    if (alignment > max_alignment)
 240  E :      alignment = max_alignment;
 241  E :    block->set_alignment(alignment);
 242  E :  }
 243    :  
 244    :  bool AreMatchedBlockAndLabelAttributes(
 245    :      BlockGraph::BlockType bt,
 246    :      BlockGraph::LabelAttributes la) {
 247    :    return (bt == BlockGraph::CODE_BLOCK && (la & BlockGraph::CODE_LABEL) != 0) ||
 248    :        (bt == BlockGraph::DATA_BLOCK && (la & BlockGraph::DATA_LABEL) != 0);
 249    :  }
 250    :  
 251  E :  BlockGraph::LabelAttributes SymTagToLabelAttributes(enum SymTagEnum sym_tag) {
 252  E :    switch (sym_tag) {
 253    :      case SymTagData:
 254  E :        return BlockGraph::DATA_LABEL;
 255    :      case SymTagLabel:
 256  E :        return BlockGraph::CODE_LABEL;
 257    :      case SymTagFuncDebugStart:
 258  E :        return BlockGraph::DEBUG_START_LABEL;
 259    :      case SymTagFuncDebugEnd:
 260  E :        return BlockGraph::DEBUG_END_LABEL;
 261    :      case SymTagBlock:
 262  E :        return BlockGraph::SCOPE_START_LABEL;
 263    :      case SymTagCallSite:
 264  E :        return BlockGraph::CALL_SITE_LABEL;
 265    :    }
 266    :  
 267  i :    NOTREACHED();
 268  i :    return 0;
 269  E :  }
 270    :  
 271    :  bool AddLabelToBlock(RelativeAddress addr,
 272    :                       const base::StringPiece& name,
 273    :                       BlockGraph::LabelAttributes label_attributes,
 274  E :                       BlockGraph::Block* block) {
 275  E :    DCHECK(block != NULL);
 276  E :    DCHECK_LE(block->addr(), addr);
 277  E :    DCHECK_GT(block->addr() + block->size(), addr);
 278    :  
 279  E :    BlockGraph::Offset offset = addr - block->addr();
 280    :  
 281    :    // Try to create the label.
 282  E :    if (block->SetLabel(offset, name, label_attributes)) {
 283    :      // If there was no label at offset 0, then this block has not yet been
 284    :      // renamed, and still has its section contribution as a name. Update it to
 285    :      // the first symbol we get for it. We parse symbols from most useful
 286    :      // (undecorated function names) to least useful (mangled public symbols), so
 287    :      // this ensures a block has the most useful name.
 288  E :      if (offset == 0)
 289  E :        block->set_name(name);
 290    :  
 291  E :      return true;
 292    :    }
 293    :  
 294    :    // If we get here there's an already existing label. Update it.
 295  E :    BlockGraph::Label label;
 296  E :    CHECK(block->GetLabel(offset, &label));
 297    :  
 298    :    // It is conceivable that there could be more than one scope with either the
 299    :    // same beginning or the same ending. However, this doesn't appear to happen
 300    :    // in any version of Chrome up to 20. We add this check so that we'd at least
 301    :    // be made aware of the situation. (We don't rely on these labels, so we
 302    :    // merely output a warning rather than an error.)
 303    :    {
 304    :      const BlockGraph::LabelAttributes kScopeAttributes =
 305    :          BlockGraph::SCOPE_START_LABEL |
 306  E :          BlockGraph::SCOPE_END_LABEL;
 307    :      BlockGraph::LabelAttributes scope_attributes =
 308  E :          label_attributes & kScopeAttributes;
 309  E :      if (scope_attributes != 0) {
 310  E :        if (label.has_any_attributes(scope_attributes)) {
 311  i :          LOG(WARNING) << "Detected colliding scope labels at offset "
 312    :                       << offset << " of block \"" << block->name() << "\".";
 313    :        }
 314    :      }
 315    :    }
 316    :  
 317    :    // Merge the names if this isn't a repeated name.
 318  E :    std::string new_name = label.name();
 319  E :    if (new_name.find(name.data()) == new_name.npos) {
 320  E :      new_name.append(", ");
 321  E :      name.AppendToString(&new_name);
 322    :    }
 323    :  
 324    :    // Merge the attributes.
 325    :    BlockGraph::LabelAttributes new_label_attr = label.attributes() |
 326  E :        label_attributes;
 327  E :    if (!BlockGraph::Label::AreValidAttributes(new_label_attr)) {
 328    :      // It's not clear which attributes should be the winner here, so we log an
 329    :      // error.
 330  i :      LOG(ERROR) << "Trying to merge conflicting label attributes \""
 331    :                 << BlockGraph::LabelAttributesToString(label_attributes)
 332    :                 << "\" for label \"" << label.ToString() << "\" at offset "
 333    :                 << offset << " of block \"" << block->name() << "\".";
 334  i :      return false;
 335    :    }
 336    :  
 337    :    // Update the label.
 338  E :    label = BlockGraph::Label(new_name, new_label_attr);
 339  E :    CHECK(block->RemoveLabel(offset));
 340  E :    CHECK(block->SetLabel(offset, label));
 341    :  
 342  E :    return true;
 343  E :  }
 344    :  
 345    :  // The MS linker pads between code blocks with int3s.
 346    :  static const uint8 kInt3 = 0xCC;
 347    :  
 348    :  // If the given run of bytes consists of a single value repeated, returns that
 349    :  // value. Otherwise, returns -1.
 350  E :  int RepeatedValue(const uint8* data, size_t size) {
 351  E :    DCHECK(data != NULL);
 352  E :    const uint8* data_end = data + size;
 353  E :    uint8 value = *(data++);
 354  E :    for (; data < data_end; ++data) {
 355  E :      if (*data != value)
 356  i :        return -1;
 357  E :    }
 358  E :    return value;
 359  E :  }
 360    :  
 361    :  const BlockGraph::BlockId kNullBlockId(-1);
 362    :  
 363    :  void GetDisassemblyStartingPoints(
 364    :      const BlockGraph::Block* block,
 365    :      AbsoluteAddress abs_block_addr,
 366    :      const PEFile::RelocSet& reloc_set,
 367  E :      Disassembler::AddressSet* addresses) {
 368  E :    DCHECK(block != NULL);
 369  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 370  E :    DCHECK(addresses != NULL);
 371    :  
 372  E :    addresses->clear();
 373    :  
 374    :    // Use code labels as starting points.
 375  E :    BlockGraph::Block::LabelMap::const_iterator it(block->labels().begin());
 376  E :    for (; it != block->labels().end(); ++it) {
 377  E :      BlockGraph::Offset offset = it->first;
 378  E :      DCHECK_LE(0, offset);
 379  E :      DCHECK_GT(block->size(), static_cast<size_t>(offset));
 380    :  
 381  E :      if (it->second.has_attributes(BlockGraph::CODE_LABEL)) {
 382    :        // We sometimes receive code labels that land on lookup tables; we can
 383    :        // detect these because the label will point directly to a reloc. These
 384    :        // should have already been marked as data by now. DCHECK to validate.
 385    :        // TODO(chrisha): Get rid of this DCHECK, and allow mixed CODE and DATA
 386    :        //     labels. Simply only use ones that are DATA only.
 387  E :        DCHECK_EQ(0u, reloc_set.count(block->addr() + offset));
 388    :  
 389  E :        addresses->insert(abs_block_addr + offset);
 390    :      }
 391  E :    }
 392  E :  }
 393    :  
 394    :  // Determines if the provided code block has the expected layout of code first,
 395    :  // data second. Returns true if so, false otherwise. Also returns the size of
 396    :  // the code portion of the block by trimming off any data labels.
 397    :  bool BlockHasExpectedCodeDataLayout(const BlockGraph::Block* block,
 398  E :                                      size_t* code_size) {
 399  E :    DCHECK(block != NULL);
 400  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 401  E :    DCHECK(code_size != NULL);
 402    :  
 403  E :    *code_size = block->data_size();
 404    :  
 405    :    BlockGraph::Block::LabelMap::const_reverse_iterator label_it =
 406  E :        block->labels().rbegin();
 407    :    BlockGraph::Block::LabelMap::const_reverse_iterator label_end =
 408  E :        block->labels().rend();
 409    :  
 410  E :    bool seen_non_data = false;
 411    :  
 412    :    // Walk through the labels in reverse order (by decreasing offset). Trim
 413    :    // any data labels from this blocks data_size.
 414  E :    for (; label_it != label_end; ++label_it) {
 415  E :      if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) {
 416    :        // We've encountered data not strictly at the end of the block. This
 417    :        // violates assumptions about code generated by cl.exe.
 418  E :        if (seen_non_data)
 419  i :          return false;
 420    :  
 421    :        // Otherwise, we're still in a run of data labels at the tail of the
 422    :        // block. Keep trimming the code size.
 423  E :        size_t offset = static_cast<size_t>(label_it->first);
 424  E :        if (offset < *code_size)
 425  E :          *code_size = offset;
 426  E :      } else {
 427  E :        seen_non_data = true;
 428    :      }
 429  E :    }
 430    :  
 431  E :    return true;
 432  E :  }
 433    :  
 434    :  // Given a compiland, returns its compiland details.
 435    :  bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
 436  E :                                       IDiaSymbol** compiland_details) {
 437  E :    DCHECK(compiland != NULL);
 438  E :    DCHECK(compiland_details != NULL);
 439  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 440    :  
 441  E :    *compiland_details = NULL;
 442    :  
 443    :    // Get the enumeration of compiland details.
 444  E :    ScopedComPtr<IDiaEnumSymbols> enum_symbols;
 445    :    HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
 446  E :                                         enum_symbols.Receive());
 447  E :    DCHECK_EQ(S_OK, hr);
 448    :  
 449    :    // We expect there to be compiland details. For compilands built by
 450    :    // non-standard toolchains, there usually aren't any.
 451  E :    LONG count = 0;
 452  E :    hr = enum_symbols->get_Count(&count);
 453  E :    DCHECK_EQ(S_OK, hr);
 454  E :    if (count == 0)
 455  i :      return false;
 456    :  
 457    :    // Get the compiland details.
 458  E :    ULONG fetched = 0;
 459  E :    hr = enum_symbols->Next(1, compiland_details, &fetched);
 460  E :    DCHECK_EQ(S_OK, hr);
 461  E :    DCHECK_EQ(1u, fetched);
 462  E :    return true;
 463  E :  }
 464    :  
 465    :  // Stores information regarding known compilers.
 466    :  struct KnownCompilerInfo {
 467    :    wchar_t* compiler_name;
 468    :    bool supported;
 469    :  };
 470    :  
 471    :  // A list of known compilers, and their status as being supported or not.
 472    :  KnownCompilerInfo kKnownCompilerInfos[] = {
 473    :    { L"Microsoft (R) Macro Assembler", false },
 474    :    { L"Microsoft (R) Optimizing Compiler", true },
 475    :    { L"Microsoft (R) LINK", false }
 476    :  };
 477    :  
 478    :  // Given a compiland, determines whether the compiler used is one of those that
 479    :  // we whitelist.
 480  E :  bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
 481  E :    DCHECK(compiland != NULL);
 482  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 483    :  
 484  E :    ScopedComPtr<IDiaSymbol> compiland_details;
 485    :    if (!GetCompilandDetailsForCompiland(compiland,
 486  E :                                         compiland_details.Receive())) {
 487    :      // If the compiland has no compiland details we assume the compiler is not
 488    :      // supported.
 489  i :      ScopedBstr compiland_name;
 490  i :      if (compiland->get_name(compiland_name.Receive()) == S_OK) {
 491  i :        VLOG(1) << "Compiland has no compiland details: "
 492    :                << com::ToString(compiland_name);
 493    :      }
 494  i :      return false;
 495    :    }
 496  E :    DCHECK(compiland_details.get() != NULL);
 497    :  
 498    :    // Get the compiler name.
 499  E :    ScopedBstr compiler_name;
 500  E :    HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
 501  E :    DCHECK_EQ(S_OK, hr);
 502    :  
 503    :    // Check the compiler name against the list of known compilers.
 504  E :    for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
 505  E :      if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
 506  E :        return kKnownCompilerInfos[i].supported;
 507    :      }
 508  E :    }
 509    :  
 510    :    // Anything we don't explicitly know about is not supported.
 511  E :    VLOG(1) << "Encountered unknown compiler: " << compiler_name;
 512  E :    return false;
 513  E :  }
 514    :  
 515    :  // Logs an error if @p error is true, a verbose logging message otherwise.
 516    :  #define LOG_ERROR_OR_VLOG1(error) LAZY_STREAM( \
 517    :      ::logging::LogMessage(__FILE__, \
 518    :                            __LINE__, \
 519    :                            (error) ? ::logging::LOG_ERROR : -1).stream(), \
 520    :      (error ? LOG_IS_ON(ERROR) : VLOG_IS_ON(1)))
 521    :  
 522    :  // Logs a warning if @p warn is true, a verbose logging message otherwise.
 523    :  #define LOG_WARNING_OR_VLOG1(warn) LAZY_STREAM( \
 524    :      ::logging::LogMessage(__FILE__, \
 525    :                            __LINE__, \
 526    :                            (warn) ? ::logging::LOG_WARNING : -1).stream(), \
 527    :      (warn ? LOG_IS_ON(WARNING) : VLOG_IS_ON(1)))
 528    :  
 529    :  // Sets the disassembler directive to an error if @p strict is true, otherwise
 530    :  // sets it to an early termination.
 531  i :  CallbackDirective AbortOrTerminateDisassembly(bool strict) {
 532  i :    if (strict)
 533  i :      return Disassembler::kDirectiveAbort;
 534  i :    else
 535  i :      return Disassembler::kDirectiveTerminateWalk;
 536  i :  }
 537    :  
 538    :  // Returns true if the callback-directive is an early termination that should be
 539    :  // returned immediately.
 540  E :  bool IsFatalCallbackDirective(CallbackDirective directive) {
 541  E :    switch (directive) {
 542    :      case Disassembler::kDirectiveContinue:
 543    :      case Disassembler::kDirectiveTerminatePath:
 544  E :        return false;
 545    :  
 546    :      case Disassembler::kDirectiveTerminateWalk:
 547    :      case Disassembler::kDirectiveAbort:
 548  i :        return true;
 549    :  
 550    :      default:
 551  i :        NOTREACHED();
 552    :    }
 553    :  
 554  i :    return true;
 555  E :  }
 556    :  
 557    :  // Combines two callback directives. Higher codes supersede lower ones.
 558    :  CallbackDirective CombineCallbackDirectives(CallbackDirective d1,
 559  E :                                              CallbackDirective d2) {
 560    :    // This ensures that this logic remains valid. This should prevent people
 561    :    // from tinkering with CallbackDirective and breaking this code.
 562    :    COMPILE_ASSERT(Disassembler::kDirectiveContinue <
 563    :                       Disassembler::kDirectiveTerminatePath &&
 564    :                   Disassembler::kDirectiveTerminatePath <
 565    :                       Disassembler::kDirectiveTerminateWalk &&
 566    :                   Disassembler::kDirectiveTerminateWalk <
 567    :                       Disassembler::kDirectiveAbort,
 568    :                   callback_directive_enum_is_not_sorted);
 569  E :    return std::max(d1, d2);
 570  E :  }
 571    :  
 572    :  // Determines if the given block has a data label in the given range of bytes.
 573    :  bool HasDataLabelInRange(const BlockGraph::Block* block,
 574    :                           BlockGraph::Offset offset,
 575  i :                           BlockGraph::Size size) {
 576    :    BlockGraph::Block::LabelMap::const_iterator it =
 577  i :        block->labels().lower_bound(offset);
 578    :    BlockGraph::Block::LabelMap::const_iterator end =
 579  i :        block->labels().lower_bound(offset + size);
 580    :  
 581  i :    for (; it != end; ++it) {
 582  i :      if (it->second.has_attributes(BlockGraph::DATA_LABEL))
 583  i :        return true;
 584  i :    }
 585    :  
 586  i :    return false;
 587  i :  }
 588    :  
 589    :  void ReportPotentialNonReturningFunction(
 590    :      const Decomposer::IntermediateReferenceMap& refs,
 591    :      const BlockGraph::AddressSpace& image,
 592    :      const BlockGraph::Block* block,
 593    :      BlockGraph::Offset call_ref_offset,
 594  i :      const char* reason) {
 595    :    typedef Decomposer::IntermediateReferenceMap::const_iterator RefIter;
 596    :  
 597    :    // Try and track down the block being pointed at by the call. If this is a
 598    :    // computed address there will be no reference.
 599  i :    RefIter ref_it = refs.find(block->addr() + call_ref_offset);
 600  i :    if (ref_it == refs.end()) {
 601  i :      LOG(WARNING) << "Suspected non-returning function call from offset "
 602    :                   << call_ref_offset << " (followed by " << reason
 603    :                   << ") of block \"" << block->name()
 604    :                   << "\", but target can not be tracked down.";
 605  i :      return;
 606    :    }
 607    :  
 608  i :    BlockGraph::Block* target = image.GetBlockByAddress(ref_it->second.base);
 609  i :    DCHECK(target != NULL);
 610    :  
 611    :    // If this was marked as non-returning, then its not suspicious.
 612  i :    if ((target->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
 613  i :      return;
 614    :  
 615    :    // If the target is a code block then this is a direct call.
 616  i :    if (target->type() == BlockGraph::CODE_BLOCK) {
 617  i :      LOG(WARNING) << "Suspected non-returning call from offset "
 618    :                   << call_ref_offset << " (followed by " << reason
 619    :                   << ") of block \"" << block->name() << "\" to code block \""
 620    :                   << target->name() << "\".";
 621  i :      return;
 622    :    }
 623    :    // Otherwise the target is a data block and this is a memory indirect call
 624    :    // to a thunk.
 625  i :    DCHECK_EQ(BlockGraph::DATA_BLOCK, target->type());
 626    :  
 627    :    // Track down the import thunk.
 628  i :    RefIter thunk_ref_it = refs.find(ref_it->second.base);
 629  i :    DCHECK(thunk_ref_it != refs.end());
 630  i :    BlockGraph::Block* thunk = image.GetBlockByAddress(thunk_ref_it->second.base);
 631    :  
 632    :    // If this was marked as non-returning, then its not suspicious.
 633  i :    if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
 634  i :      return;
 635    :  
 636    :    // Otherwise, this is an import thunk. Get the module and symbol names.
 637  i :    LOG(WARNING) << "Suspected non-returning call from offset "
 638    :                 << call_ref_offset << " (followed by " << reason
 639    :                 << ") of block \"" << block->name() << "\" to import thunk \""
 640    :                 << thunk->name() << "\".";
 641  i :  }
 642    :  
 643    :  void LookForNonReturningFunctions(
 644    :      const Decomposer::IntermediateReferenceMap& refs,
 645    :      const BlockGraph::AddressSpace& image,
 646    :      const BlockGraph::Block* block,
 647  i :      const Disassembler& disasm) {
 648  i :    bool saw_call = false;
 649  i :    bool saw_call_then_nop = false;
 650  i :    BlockGraph::Offset call_ref_offset = 0;
 651    :  
 652  i :    AbsoluteAddress end_of_last_inst;
 653    :    Disassembler::VisitedSpace::const_iterator inst_it =
 654  i :        disasm.visited().begin();
 655  i :    for (; inst_it != disasm.visited().end(); ++inst_it) {
 656    :      // Not contiguous with the last instruction? Then we're spanning a gap. If
 657    :      // it's an instruction then we didn't parse it; thus, we already know that
 658    :      // if the last instruction is a call it's to a non-returning function. So,
 659    :      // we only need to check for data.
 660  i :      if (inst_it->first.start() != end_of_last_inst) {
 661  i :        if (saw_call || saw_call_then_nop) {
 662  i :          BlockGraph::Offset offset = end_of_last_inst - disasm.code_addr();
 663  i :          BlockGraph::Size size = inst_it->first.start() - end_of_last_inst;
 664  i :          if (HasDataLabelInRange(block, offset, size))
 665    :            // We do not expect this to ever occur in cl.exe generated code.
 666    :            // However, it is entirely possible in hand-written assembly.
 667    :            ReportPotentialNonReturningFunction(
 668    :                refs, image, block, call_ref_offset,
 669  i :                saw_call ? "data" : "nop(s) and data");
 670    :        }
 671    :  
 672  i :        saw_call = false;
 673  i :        saw_call_then_nop = false;
 674    :      }
 675    :  
 676  i :      _DInst inst = { 0 };
 677  i :      BlockGraph::Offset offset = inst_it->first.start() - disasm.code_addr();
 678  i :      const uint8* code = disasm.code() + offset;
 679  i :      CHECK(core::DecodeOneInstruction(code, inst_it->first.size(), &inst));
 680    :  
 681    :      // Previous instruction was a call?
 682  i :      if (saw_call) {
 683  i :        if (core::IsNop(inst)) {
 684  i :          saw_call_then_nop = true;
 685  i :        } else if (core::IsDebugInterrupt(inst)) {
 686    :          ReportPotentialNonReturningFunction(
 687  i :              refs, image, block, call_ref_offset, "int3");
 688    :        }
 689  i :        saw_call = false;
 690  i :      } else if (saw_call_then_nop) {
 691    :        // The previous instructions we've seen have been a call followed by
 692    :        // arbitrary many nops. Look for another nop to continue the pattern.
 693  i :        saw_call_then_nop = core::IsNop(inst);
 694  i :      } else {
 695    :        // The previous instruction was not a call, so we're looking for one.
 696    :        // If this instruction is a call, remember that fact and also remember
 697    :        // the offset of its operand (the call target).
 698  i :        if (core::IsCall(inst)) {
 699  i :          saw_call = true;
 700    :          call_ref_offset = offset + inst_it->first.size() -
 701  i :              BlockGraph::Reference::kMaximumSize;
 702    :        }
 703    :      }
 704    :  
 705    :      // Remember the end of the last instruction we processed.
 706  i :      end_of_last_inst = inst_it->first.end();
 707  i :    }
 708    :  
 709    :    // If the last instruction was a call and we've marked that we've disassembled
 710    :    // past the end, then this is also a suspected non-returning function.
 711    :    if ((saw_call || saw_call_then_nop) &&
 712  i :        (block->attributes() & BlockGraph::DISASSEMBLED_PAST_END) != 0) {
 713  i :      const char* reason = saw_call ? "end of block" : "nop(s) and end of block";
 714    :      ReportPotentialNonReturningFunction(
 715  i :          refs, image, block, call_ref_offset, reason);
 716    :    }
 717  i :  }
 718    :  
 719  E :  bool CodeBlockHasAlignedJumpTables(const BlockGraph::Block* block) {
 720  E :    DCHECK(block != NULL);
 721  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 722    :  
 723    :    // Iterate over the labels of this block looking for jump tables.
 724  E :    bool has_jump_tables = false;
 725    :    BlockGraph::Block::LabelMap::const_iterator label_it =
 726  E :        block->labels().begin();
 727  E :    for (; label_it != block->labels().end(); ++label_it) {
 728  E :      if (!label_it->second.has_attributes(BlockGraph::JUMP_TABLE_LABEL))
 729  E :        continue;
 730    :  
 731  E :      has_jump_tables = true;
 732    :  
 733    :      // If the jump table is misaligned we can return false immediately.
 734  E :      if (label_it->first % kPointerSize != 0)
 735  i :        return false;
 736  E :    }
 737    :  
 738  E :    return has_jump_tables;
 739  E :  }
 740    :  
 741  E :  bool AlignCodeBlocksWithJumpTables(ImageLayout* image_layout) {
 742  E :    DCHECK(image_layout != NULL);
 743    :  
 744    :    BlockGraph::AddressSpace::RangeMapConstIter block_it =
 745  E :        image_layout->blocks.begin();
 746  E :    for (; block_it != image_layout->blocks.end(); ++block_it) {
 747  E :      BlockGraph::Block* block = block_it->second;
 748    :  
 749    :      // We only care about code blocks that are already aligned 0 mod 4 but
 750    :      // whose explicit alignment is currently less than that.
 751  E :      if (block->type() != BlockGraph::CODE_BLOCK)
 752  E :        continue;
 753  E :      if (block->alignment() >= kPointerSize)
 754  i :        continue;
 755  E :      if (block_it->first.start().value() % kPointerSize != 0)
 756  E :        continue;
 757    :  
 758    :      // Inspect them to see if they have aligned jump tables. If they do,
 759    :      // set the alignment of the block itself.
 760  E :      if (CodeBlockHasAlignedJumpTables(block_it->second))
 761  E :        block->set_alignment(kPointerSize);
 762  E :    }
 763    :  
 764  E :    return true;
 765  E :  }
 766    :  
 767    :  }  // namespace
 768    :  
 769    :  Decomposer::Decomposer(const PEFile& image_file)
 770    :      : image_(NULL),
 771    :        image_file_(image_file),
 772    :        current_block_(NULL),
 773  E :        be_strict_with_current_block_(true) {
 774    :    // Register static initializer patterns that we know are always present.
 775    :    // CRT C/C++/etc initializers.
 776  E :    CHECK(RegisterStaticInitializerPatterns("(__x.*)_a", "(__x.*)_z"));
 777    :    // RTC (run-time checks) initializers (part of CRT).
 778  E :    CHECK(RegisterStaticInitializerPatterns("(__rtc_[it])aa", "(__rtc_[it])zz"));
 779    :    // ATL object map initializers.
 780    :    CHECK(RegisterStaticInitializerPatterns("(__pobjMapEntry)First",
 781  E :                                            "(__pobjMapEntry)Last"));
 782    :    // Thread-local storage template.
 783  E :    CHECK(RegisterStaticInitializerPatterns("(_tls_)start", "(_tls_)end"));
 784    :  
 785    :    // Register non-returning functions that for some reason the symbols lie to
 786    :    // us about.
 787  E :    CHECK(RegisterNonReturningFunction("_CxxThrowException"));
 788  E :    CHECK(RegisterNonReturningFunction("_longjmp"));
 789    :  
 790    :    // Register non-returning imports that we know about.
 791  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitProcess"));
 792  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitThread"));
 793  E :  }
 794    :  
 795  E :  bool Decomposer::Decompose(ImageLayout* image_layout) {
 796    :    // We start by finding the PDB path.
 797  E :    if (!FindAndValidatePdbPath())
 798  E :      return false;
 799  E :    DCHECK(!pdb_path_.empty());
 800    :  
 801    :    // Check if the block-graph has already been serialized into the PDB and load
 802    :    // it from here in this case. This allows round-trip decomposition.
 803  E :    bool stream_exists = false;
 804    :    if (LoadBlockGraphFromPdb(pdb_path_, image_file_, image_layout,
 805  E :                              &stream_exists)) {
 806  E :      return true;
 807  i :    } else {
 808    :      // If the stream exists but hasn't been loaded we return an error. At this
 809    :      // point an error message has already been logged if there was one.
 810  E :      if (stream_exists)
 811  i :        return false;
 812    :    }
 813    :  
 814    :    // Move on to instantiating and initializing our Debug Interface Access
 815    :    // session.
 816  E :    ScopedComPtr<IDiaDataSource> dia_source;
 817  E :    if (!CreateDiaSource(dia_source.Receive()))
 818  i :      return false;
 819    :  
 820    :    // We create the session using the PDB file directly, as we've already
 821    :    // validated that it matches the module.
 822  E :    ScopedComPtr<IDiaSession> dia_session;
 823    :    if (!CreateDiaSession(pdb_path_,
 824    :                          dia_source.get(),
 825  E :                          dia_session.Receive())) {
 826  i :      return false;
 827    :    }
 828    :  
 829    :    HRESULT hr = dia_session->put_loadAddress(
 830  E :        image_file_.nt_headers()->OptionalHeader.ImageBase);
 831  E :    if (hr != S_OK) {
 832  i :      LOG(ERROR) << "Failed to set the DIA load address: "
 833    :                 << com::LogHr(hr) << ".";
 834  i :      return false;
 835    :    }
 836    :  
 837  E :    ScopedComPtr<IDiaSymbol> global;
 838  E :    hr = dia_session->get_globalScope(global.Receive());
 839  E :    if (hr != S_OK) {
 840  i :      LOG(ERROR) << "Failed to get the DIA global scope: "
 841    :                 << com::LogHr(hr) << ".";
 842  i :      return false;
 843    :    }
 844    :  
 845  E :    image_ = &image_layout->blocks;
 846    :  
 847    :    // Create the sections for the image.
 848  E :    bool success = CreateSections();
 849    :  
 850    :    // Load FIXUP information from the PDB file. We do this early on so that we
 851    :    // can do accounting with references that are created later on.
 852  E :    if (success)
 853  E :      success = LoadDebugStreams(dia_session);
 854    :  
 855    :    // Create intermediate references for each fixup entry.
 856  E :    if (success)
 857  E :      success = CreateReferencesFromFixups();
 858    :  
 859    :    // Chunk out important PE image structures, like the headers and such.
 860  E :    PEFileParser::PEHeader header;
 861  E :    if (success)
 862  E :      success = CreatePEImageBlocksAndReferences(&header);
 863    :  
 864    :    // Parse and validate the relocation entries.
 865  E :    if (success)
 866  E :      success = ParseRelocs();
 867    :  
 868    :    // Our first round of parsing is using section contributions. This creates
 869    :    // both code and data blocks.
 870  E :    if (success)
 871  E :      success = CreateBlocksFromSectionContribs(dia_session);
 872    :  
 873    :    // Process the function and thunk symbols in the image. This does not create
 874    :    // any blocks, as all functions are covered by section contributions.
 875  E :    if (success)
 876  E :      success = ProcessCodeSymbols(global);
 877    :  
 878    :    // Process data symbols. This can cause the creation of some blocks as the
 879    :    // data sections are not fully covered by section contributions.
 880  E :    if (success)
 881  E :      success = ProcessDataSymbols(global);
 882    :  
 883    :    // Create labels in code blocks.
 884  E :    if (success)
 885  E :      success = CreateGlobalLabels(global);
 886    :  
 887    :    // Create gap blocks. This ensures that we have complete coverage of the
 888    :    // entire image.
 889  E :    if (success)
 890  E :      success = CreateGapBlocks();
 891    :  
 892    :    // Parse public symbols, augmenting code and data labels where possible.
 893    :    // Some public symbols land on gap blocks, so they need to have been parsed
 894    :    // already.
 895  E :    if (success)
 896  E :      success = ProcessPublicSymbols(global);
 897    :  
 898    :    // Parse initialization bracketing symbols. This needs to happen after
 899    :    // PublicSymbols have been parsed.
 900  E :    if (success)
 901  E :      success = ProcessStaticInitializers();
 902    :  
 903    :    // We know that some data blocks need to have alignment precisely preserved.
 904    :    // For now, we very conservatively (guaranteed to be correct, but causes many
 905    :    // blocks to be aligned that don't strictly need alignment) guess alignment
 906    :    // for each block. This must be run after static initializers have been
 907    :    // parsed.
 908  E :    if (success)
 909  E :      success = GuessDataBlockAlignments();
 910    :  
 911    :    // Disassemble code blocks and create PC-relative references
 912  E :    if (success)
 913  E :      success = CreateCodeReferences();
 914    :  
 915    :    // Turn the address->address format references we've created into
 916    :    // block->block references on the blocks in the image.
 917  E :    if (success)
 918  E :      success = FinalizeIntermediateReferences();
 919    :  
 920    :    // Everything called after this points requires the references to have been
 921    :    // finalized.
 922    :  
 923    :    // One way of ensuring full coverage is to check that all of the fixups
 924    :    // were visited during decomposition.
 925  E :    if (success)
 926  E :      success = ConfirmFixupsVisited();
 927    :  
 928    :    // Now, find and label any padding blocks.
 929  E :    if (success)
 930  E :      success = FindPaddingBlocks();
 931    :  
 932    :    // Copy the image headers over to the layout.
 933  E :    if (success)
 934  E :      success = CopyHeaderToImageLayout(header.nt_headers, image_layout);
 935    :  
 936    :    // Set the alignment on code blocks with jump tables. This ensures that the
 937    :    // jump tables remain aligned post-transform.
 938  E :    if (success)
 939  E :      success = AlignCodeBlocksWithJumpTables(image_layout);
 940    :  
 941  E :    image_ = NULL;
 942    :  
 943  E :    return success;
 944  E :  }
 945    :  
 946  E :  bool Decomposer::FindAndValidatePdbPath() {
 947    :    // Manually find the PDB path if it is not specified.
 948  E :    if (pdb_path_.empty()) {
 949    :      if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
 950  E :          pdb_path_.empty()) {
 951  i :        LOG(ERROR) << "Unable to find PDB file for module: "
 952    :                   << image_file_.path().value();
 953  i :        return false;
 954    :      }
 955    :    }
 956  E :    DCHECK(!pdb_path_.empty());
 957    :  
 958  E :    if (!file_util::PathExists(pdb_path_)) {
 959  E :      LOG(ERROR) << "Path not found: " << pdb_path_.value();
 960  E :      return false;
 961    :    }
 962    :  
 963    :    // Get the PDB info from the PDB file.
 964    :    pdb::PdbInfoHeader70 pdb_info_header;
 965  E :    if (!pdb::ReadPdbHeader(pdb_path_, &pdb_info_header)) {
 966  i :      LOG(ERROR) << "Unable to read PDB info header from PDB file: "
 967    :                 << pdb_path_.value();
 968  i :      return false;
 969    :    }
 970    :  
 971    :    // Get the PDB info from the module.
 972  E :    PdbInfo pdb_info;
 973  E :    if (!pdb_info.Init(image_file_)) {
 974  i :      LOG(ERROR) << "Unable to read PDB info from PE file: "
 975    :                 << image_file_.path().value();
 976  i :      return false;
 977    :    }
 978    :  
 979    :    // Ensure that they are consistent.
 980  E :    if (!pdb_info.IsConsistent(pdb_info_header)) {
 981  i :      LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
 982    :                 << "module \"" << image_file_.path().value() << "\".";
 983  i :      return false;
 984    :    }
 985    :  
 986  E :    return true;
 987  E :  }
 988    :  
 989  E :  bool Decomposer::ProcessCodeSymbols(IDiaSymbol* global) {
 990  E :    if (!ProcessFunctionSymbols(global))
 991  i :      return false;
 992  E :    if (!ProcessThunkSymbols(global))
 993  i :      return false;
 994    :  
 995  E :    return true;
 996  E :  }
 997    :  
 998  E :  bool Decomposer::ProcessFunctionSymbols(IDiaSymbol* global) {
 999  E :    DCHECK(IsSymTag(global, SymTagExe));
1000    :  
1001    :    // Otherwise enumerate its offspring.
1002  E :    ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1003    :    HRESULT hr = global->findChildren(SymTagFunction,
1004    :                                      NULL,
1005    :                                      nsNone,
1006  E :                                      dia_enum_symbols.Receive());
1007  E :    if (hr != S_OK) {
1008  i :      LOG(ERROR) << "Failed to get the DIA function enumerator: "
1009    :                 << com::LogHr(hr) << ".";
1010  i :      return false;
1011    :    }
1012    :  
1013  E :    LONG count = 0;
1014  E :    if (dia_enum_symbols->get_Count(&count) != S_OK) {
1015  i :      LOG(ERROR) << "Failed to get function enumeration length.";
1016  i :      return false;
1017    :    }
1018    :  
1019  E :    for (LONG visited = 0; visited < count; ++visited) {
1020  E :      ScopedComPtr<IDiaSymbol> function;
1021  E :      ULONG fetched = 0;
1022  E :      hr = dia_enum_symbols->Next(1, function.Receive(), &fetched);
1023  E :      if (hr != S_OK) {
1024  i :        LOG(ERROR) << "Failed to enumerate functions: " << com::LogHr(hr) << ".";
1025  i :        return false;
1026    :      }
1027  E :      if (fetched == 0)
1028  i :        break;
1029    :  
1030    :      // Create the block representing the function.
1031  E :      DCHECK(IsSymTag(function, SymTagFunction));
1032  E :      if (!ProcessFunctionOrThunkSymbol(function))
1033  i :        return false;
1034  E :    }
1035    :  
1036  E :    return true;
1037  E :  }
1038    :  
1039  E :  bool Decomposer::ProcessFunctionOrThunkSymbol(IDiaSymbol* function) {
1040  E :    DCHECK(IsSymTag(function, SymTagFunction) || IsSymTag(function, SymTagThunk));
1041    :  
1042  E :    DWORD location_type = LocIsNull;
1043  E :    HRESULT hr = E_FAIL;
1044  E :    if (FAILED(hr = function->get_locationType(&location_type))) {
1045  i :      LOG(ERROR) << "Failed to retrieve function address type: "
1046    :                 << com::LogHr(hr) << ".";
1047  i :      return false;
1048    :    }
1049  E :    if (location_type != LocIsStatic) {
1050  i :      DCHECK_EQ(static_cast<DWORD>(LocIsNull), location_type);
1051  i :      return true;
1052    :    }
1053    :  
1054  E :    DWORD rva = 0;
1055  E :    ULONGLONG length = 0;
1056  E :    ScopedBstr name;
1057    :    if ((hr = function->get_relativeVirtualAddress(&rva)) != S_OK ||
1058    :        (hr = function->get_length(&length)) != S_OK ||
1059  E :        (hr = function->get_name(name.Receive())) != S_OK) {
1060  i :      LOG(ERROR) << "Failed to retrieve function information: "
1061    :                 << com::LogHr(hr) << ".";
1062  i :      return false;
1063    :    }
1064    :  
1065    :    // Certain properties are not defined on all blocks, so the following calls
1066    :    // may return S_FALSE.
1067  E :    BOOL no_return = FALSE;
1068  E :    if (function->get_noReturn(&no_return) != S_OK)
1069  E :      no_return = FALSE;
1070    :  
1071  E :    BOOL has_inl_asm = FALSE;
1072  E :    if (function->get_hasInlAsm(&has_inl_asm) != S_OK)
1073  E :      has_inl_asm = FALSE;
1074    :  
1075  E :    BOOL has_eh = FALSE;
1076  E :    if (function->get_hasEH(&has_eh) != S_OK)
1077  E :      has_eh = FALSE;
1078    :  
1079  E :    BOOL has_seh = FALSE;
1080  E :    if (function->get_hasSEH(&has_seh) != S_OK)
1081  E :      has_seh = FALSE;
1082    :  
1083  E :    std::string block_name;
1084  E :    if (!WideToUTF8(name, name.Length(), &block_name)) {
1085  i :      LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1086  i :      return false;
1087    :    }
1088    :  
1089    :    // Find the block to which this symbol maps, and ensure it fully covers the
1090    :    // symbol.
1091  E :    RelativeAddress block_addr(rva);
1092  E :    BlockGraph::Block* block = image_->GetBlockByAddress(block_addr);
1093  E :    if (block == NULL) {
1094  i :      LOG(ERROR) << "No block found for function/thunk symbol \""
1095    :                 << block_name << "\".";
1096  i :      return false;
1097    :    }
1098  E :    if (block->addr() + block->size() < block_addr + length) {
1099  i :      LOG(ERROR) << "Section contribution \"" << block->name() << "\" does not "
1100    :                 << "fully cover function/thunk symbol \"" << block_name << "\".";
1101  i :      return false;
1102    :    }
1103    :  
1104    :    // Annotate the block with a label, as this is an entry point to it. This is
1105    :    // the routine that adds labels, so there should never be any collisions.
1106  E :    CHECK(AddLabelToBlock(block_addr, block_name, BlockGraph::CODE_LABEL, block));
1107    :  
1108    :    // If we didn't get an explicit no-return flag from the symbols check our
1109    :    // list of exceptions.
1110  E :    if (no_return == FALSE && non_returning_functions_.count(block->name()) > 0) {
1111  E :      VLOG(1) << "Forcing non-returning attribute on function \""
1112    :              << block->name() << "\".";
1113  E :      no_return = TRUE;
1114    :    }
1115    :  
1116    :    // Set the block attributes.
1117  E :    if (no_return == TRUE)
1118  E :      block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1119  E :    if (has_inl_asm == TRUE)
1120  E :      block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1121  E :    if (has_eh || has_seh)
1122  E :      block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1123  E :    if (IsSymTag(function, SymTagThunk))
1124  E :      block->set_attribute(BlockGraph::THUNK);
1125    :  
1126  E :    if (!CreateLabelsForFunction(function, block)) {
1127  i :      LOG(ERROR) << "Failed to create labels for '" << block->name() << "'.";
1128  i :      return false;
1129    :    }
1130    :  
1131  E :    return true;
1132  E :  }
1133    :  
1134    :  bool Decomposer::CreateLabelsForFunction(IDiaSymbol* function,
1135  E :                                           BlockGraph::Block* block) {
1136  E :    DCHECK(function != NULL);
1137  E :    DCHECK(block != NULL);
1138    :  
1139    :    // Lookup the block address.
1140  E :    RelativeAddress block_addr;
1141  E :    if (!image_->GetAddressOf(block, &block_addr)) {
1142  i :      NOTREACHED() << "Block " << block->name() << " has no address.";
1143  i :      return false;
1144    :    }
1145    :  
1146    :    // Enumerate all symbols which are children of function.
1147  E :    ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1148    :    HRESULT hr = function->findChildren(SymTagNull,
1149    :                                        NULL,
1150    :                                        nsNone,
1151  E :                                        dia_enum_symbols.Receive());
1152  E :    if (FAILED(hr)) {
1153  i :      LOG(ERROR) << "Failed to get the DIA label enumerator: "
1154    :                 << com::LogHr(hr) << ".";
1155  i :      return false;
1156    :    }
1157    :  
1158  E :    while (true) {
1159  E :      ScopedComPtr<IDiaSymbol> symbol;
1160  E :      ULONG fetched = 0;
1161  E :      hr = dia_enum_symbols->Next(1, symbol.Receive(), &fetched);
1162  E :      if (FAILED(hr)) {
1163  i :        LOG(ERROR) << "Failed to enumerate the DIA symbol: "
1164    :                   << com::LogHr(hr) << ".";
1165  i :        return false;
1166    :      }
1167  E :      if (hr != S_OK || fetched == 0)
1168  E :        break;
1169    :  
1170    :      // If it doesn't have an RVA then it's not interesting to us.
1171  E :      DWORD temp_rva = 0;
1172  E :      if (symbol->get_relativeVirtualAddress(&temp_rva) != S_OK)
1173  E :        continue;
1174    :  
1175    :      // Get the type of symbol we're looking at.
1176  E :      DWORD temp_sym_tag = 0;
1177  E :      if (symbol->get_symTag(&temp_sym_tag) != S_OK) {
1178  i :        LOG(ERROR) << "Failed to retrieve label information.";
1179  i :        return false;
1180    :      }
1181    :  
1182  E :      enum SymTagEnum sym_tag = static_cast<enum SymTagEnum>(temp_sym_tag);
1183    :  
1184    :  #if defined(_MSC_VER) && (_MSC_VER >= 1700)
1185    :      // Since VS 2012 there's some new symbols exposed by DIA which are not
1186    :      // handled at the moment.
1187    :      // TODO(sebmarchand): Handle those symbols.
1188    :      if (sym_tag == SymTagInlineSite)
1189    :        continue;
1190    :  #endif
1191    :  
1192  E :      BlockGraph::LabelAttributes label_attr = SymTagToLabelAttributes(sym_tag);
1193    :  
1194    :      // TODO(rogerm): Add a flag to include/exclude the symbol types that are
1195    :      //     interesting for debugging purposes, but not actually needed for
1196    :      //     decomposition: FuncDebugStart/End, Block, etc.
1197    :  
1198    :      // We ignore labels that fall outside of the code block. We sometimes
1199    :      // get labels at the end of a code block, and if the binary has any OMAP
1200    :      // information these follow the original successor block, and they can
1201    :      // end up most anywhere in the binary.
1202  E :      RelativeAddress label_rva(temp_rva);
1203  E :      if (label_rva < block_addr || label_rva >= block_addr + block->size())
1204  E :        continue;
1205    :  
1206    :      // Extract the symbol's name.
1207  E :      std::string label_name;
1208    :      {
1209  E :        ScopedBstr temp_name;
1210    :        if (symbol->get_name(temp_name.Receive()) == S_OK &&
1211  E :            !WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1212  i :          LOG(ERROR) << "Failed to convert label name to UTF8.";
1213  i :          return false;
1214    :        }
1215  E :      }
1216    :  
1217    :      // Not all symbols have a name, if we've found one without a name, make
1218    :      // one up.
1219  E :      BlockGraph::Offset offset = label_rva - block_addr;
1220  E :      if (label_name.empty()) {
1221  E :        switch (sym_tag) {
1222    :          case SymTagFuncDebugStart: {
1223  E :            label_name = "<debug-start>";
1224  E :            break;
1225    :          }
1226    :  
1227    :          case SymTagFuncDebugEnd: {
1228  E :            label_name = "<debug-end>";
1229  E :            break;
1230    :          }
1231    :  
1232    :          case SymTagData: {
1233  E :            if (reloc_set_.count(label_rva)) {
1234  E :              label_name = base::StringPrintf("<jump-table-%d>", offset);
1235  E :              label_attr |= BlockGraph::JUMP_TABLE_LABEL;
1236  E :            } else {
1237  E :              label_name = base::StringPrintf("<case-table-%d>", offset);
1238  E :              label_attr |= BlockGraph::CASE_TABLE_LABEL;
1239    :            }
1240  E :            break;
1241    :          }
1242    :  
1243    :          case SymTagBlock: {
1244  E :            label_name = "<scope-start>";
1245  E :            break;
1246    :          }
1247    :  
1248    :          // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
1249    :          case SymTagCallSite: {
1250  E :            label_name = "<call-site>";
1251  E :            break;
1252    :          }
1253    :  
1254    :          default: {
1255  i :            LOG(WARNING) << "Unexpected symbol type " << sym_tag << " in "
1256    :                         << block->name() << " at "
1257    :                         << base::StringPrintf("0x%08X.", label_rva.value());
1258  i :            label_name = base::StringPrintf("<anonymous-%d>", sym_tag);
1259    :          }
1260    :        }
1261    :      }
1262    :  
1263    :      // We expect that we'll never see a code label that refers to a reloc.
1264    :      // This happens sometimes, however, as we generally get a code label for
1265    :      // the first byte after a switch statement. This can sometimes land on the
1266    :      // following jump table.
1267  E :      if ((label_attr & BlockGraph::CODE_LABEL) && reloc_set_.count(label_rva)) {
1268  E :        VLOG(1) << "Collision between reloc and code label in "
1269    :                << block->name() << " at " << label_name
1270    :                << base::StringPrintf(" (0x%08X).", label_rva.value())
1271    :                << " Falling back to data label.";
1272  E :        label_attr = BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL;
1273  E :        DCHECK_EQ(block_addr, block->addr());
1274  E :        BlockGraph::Label label;
1275    :        if (block->GetLabel(offset, &label) &&
1276  E :            !label.has_attributes(BlockGraph::DATA_LABEL)) {
1277  i :          VLOG(1) << block->name() << ": Replacing label " << label.name()
1278    :                  << " ("
1279    :                  << BlockGraph::LabelAttributesToString(label.attributes())
1280    :                  << ") at offset " << offset << ".";
1281  i :          block->RemoveLabel(offset);
1282    :        }
1283  E :      }
1284    :  
1285    :      // Add the label to the block.
1286  E :      if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1287  i :        LOG(ERROR) << "Failed to add label to code block.";
1288  i :        return false;
1289    :      }
1290    :  
1291    :      // Is this a scope? Then it also has a length. Use it to create the matching
1292    :      // scope end.
1293  E :      if (sym_tag == SymTagBlock) {
1294  E :        ULONGLONG length = 0;
1295  E :        if (symbol->get_length(&length) != S_OK) {
1296  i :          LOG(ERROR) << "Failed to extract code scope length for "
1297    :                     << block->name();
1298  i :          return false;
1299    :        }
1300  E :        label_rva += length;
1301  E :        label_name = "<scope-end>";
1302  E :        label_attr = BlockGraph::SCOPE_END_LABEL;
1303  E :        if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1304  i :          LOG(ERROR) << "Failed to add label to code block.";
1305  i :          return false;
1306    :        }
1307    :      }
1308  E :    }
1309    :  
1310  E :    return true;
1311  E :  }
1312    :  
1313  E :  bool Decomposer::ProcessThunkSymbols(IDiaSymbol* globals) {
1314  E :    ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1315    :    HRESULT hr = globals->findChildren(SymTagCompiland,
1316    :                                       NULL,
1317    :                                       nsNone,
1318  E :                                       enum_compilands.Receive());
1319  E :    if (FAILED(hr)) {
1320  i :      LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1321    :                 << com::LogHr(hr) << ".";
1322  i :      return false;
1323    :    }
1324    :  
1325  E :    while (true) {
1326  E :      ScopedComPtr<IDiaSymbol> compiland;
1327  E :      ULONG fetched = 0;
1328  E :      hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1329  E :      if (FAILED(hr)) {
1330  i :        LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1331    :                   << com::LogHr(hr) << ".";
1332  i :        return false;
1333    :      }
1334  E :      if (hr != S_OK || fetched == 0)
1335  E :        break;
1336    :  
1337  E :      ScopedComPtr<IDiaEnumSymbols> enum_thunks;
1338    :      hr = compiland->findChildren(SymTagThunk,
1339    :                                   NULL,
1340    :                                   nsNone,
1341  E :                                   enum_thunks.Receive());
1342  E :      if (FAILED(hr)) {
1343  i :        LOG(ERROR) << "Failed to retrieve thunk enumerator: "
1344    :                   << com::LogHr(hr) << ".";
1345  i :        return false;
1346    :      }
1347    :  
1348  E :      while (true) {
1349  E :        ScopedComPtr<IDiaSymbol> thunk;
1350  E :        hr = enum_thunks->Next(1, thunk.Receive(), &fetched);
1351  E :        if (FAILED(hr)) {
1352  i :          LOG(ERROR) << "Failed to enumerate thunk enumerator: "
1353    :                     << com::LogHr(hr) << ".";
1354  i :          return false;
1355    :        }
1356  E :        if (hr != S_OK || fetched == 0)
1357  E :          break;
1358    :  
1359  E :        DCHECK(IsSymTag(thunk, SymTagThunk));
1360    :  
1361  E :        if (!ProcessFunctionOrThunkSymbol(thunk))
1362  i :          return false;
1363  E :      }
1364  E :    }
1365    :  
1366  E :    return true;
1367  E :  }
1368    :  
1369  E :  bool Decomposer::CreateGlobalLabels(IDiaSymbol* globals) {
1370  E :    ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1371    :    HRESULT hr = globals->findChildren(SymTagCompiland,
1372    :                                       NULL,
1373    :                                       nsNone,
1374  E :                                       enum_compilands.Receive());
1375  E :    if (FAILED(hr)) {
1376  i :      LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1377    :                 << com::LogHr(hr) << ".";
1378  i :      return false;
1379    :    }
1380    :  
1381  E :    while (true) {
1382  E :      ScopedComPtr<IDiaSymbol> compiland;
1383  E :      ULONG fetched = 0;
1384  E :      hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1385  E :      if (FAILED(hr)) {
1386  i :        LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1387    :                   << com::LogHr(hr) << ".";
1388  i :        return false;
1389    :      }
1390  E :      if (hr != S_OK || fetched == 0)
1391  E :        break;
1392    :  
1393  E :      ScopedComPtr<IDiaEnumSymbols> enum_labels;
1394    :      hr = compiland->findChildren(SymTagLabel,
1395    :                                   NULL,
1396    :                                   nsNone,
1397  E :                                   enum_labels.Receive());
1398  E :      if (FAILED(hr)) {
1399  i :        LOG(ERROR) << "Failed to retrieve label enumerator: "
1400    :                   << com::LogHr(hr) << ".";
1401  i :        return false;
1402    :      }
1403    :  
1404  E :      while (true) {
1405  E :        ScopedComPtr<IDiaSymbol> label;
1406  E :        hr = enum_labels->Next(1, label.Receive(), &fetched);
1407  E :        if (FAILED(hr)) {
1408  i :          LOG(ERROR) << "Failed to enumerate label enumerator: "
1409    :                     << com::LogHr(hr) << ".";
1410  i :          return false;
1411    :        }
1412  E :        if (hr != S_OK || fetched == 0)
1413  E :          break;
1414    :  
1415  E :        DCHECK(IsSymTag(label, SymTagLabel));
1416    :  
1417  E :        DWORD addr = 0;
1418  E :        ScopedBstr temp_name;
1419    :        if (label->get_relativeVirtualAddress(&addr) != S_OK ||
1420  E :            label->get_name(temp_name.Receive()) != S_OK) {
1421  i :          LOG(ERROR) << "Failed to retrieve label address or name.";
1422  i :          return false;
1423    :        }
1424    :  
1425  E :        std::string label_name;
1426  E :        if (!WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1427  i :          LOG(ERROR) << "Failed to convert label name to UTF8.";
1428  i :          return false;
1429    :        }
1430    :  
1431  E :        RelativeAddress label_addr(addr);
1432  E :        BlockGraph::Block* block = image_->GetBlockByAddress(label_addr);
1433  E :        if (block == NULL) {
1434  i :          LOG(ERROR) << "No block for label " << label_name << " at " << addr;
1435  i :          return false;
1436    :        }
1437    :  
1438    :        if (!AddLabelToBlock(label_addr,
1439    :                             label_name,
1440    :                             BlockGraph::CODE_LABEL,
1441  E :                             block)) {
1442  i :          LOG(ERROR) << "Failed to add label to code block.";
1443  i :          return false;
1444    :        }
1445  E :      }
1446  E :    }
1447    :  
1448  E :    return true;
1449  E :  }
1450    :  
1451    :  bool Decomposer::CreateGapBlock(BlockGraph::BlockType block_type,
1452    :                                  RelativeAddress address,
1453  E :                                  BlockGraph::Size size) {
1454    :    BlockGraph::Block* block = FindOrCreateBlock(block_type, address, size,
1455    :        base::StringPrintf("Gap Block 0x%08X", address.value()).c_str(),
1456  E :        kExpectNoBlock);
1457  E :    if (block == NULL) {
1458  i :      LOG(ERROR) << "Unable to create gap block.";
1459  i :      return false;
1460    :    }
1461  E :    block->set_attribute(BlockGraph::GAP_BLOCK);
1462    :  
1463  E :    return true;
1464  E :  }
1465    :  
1466    :  bool Decomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
1467  E :                                          BlockGraph::BlockType block_type) {
1468  E :    RelativeAddress section_begin(header->VirtualAddress);
1469  E :    RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
1470    :    RelativeAddress image_end(
1471  E :        image_file_.nt_headers()->OptionalHeader.SizeOfImage);
1472    :  
1473    :    // Search for the first and last blocks interesting from the start and end
1474    :    // of the section to the end of the image.
1475    :    BlockGraph::AddressSpace::RangeMap::const_iterator it(
1476    :        image_->address_space_impl().FindFirstIntersection(
1477    :            BlockGraph::AddressSpace::Range(section_begin,
1478  E :                                            image_end - section_begin)));
1479    :  
1480    :    BlockGraph::AddressSpace::RangeMap::const_iterator end =
1481  E :        image_->address_space_impl().end();
1482  E :    if (section_end < image_end) {
1483    :      end = image_->address_space_impl().FindFirstIntersection(
1484    :          BlockGraph::AddressSpace::Range(section_end,
1485  E :                                          image_end - section_end));
1486    :    }
1487    :  
1488    :    // The whole section is missing. Cover it with one gap block.
1489  E :    if (it == end)
1490    :      return CreateGapBlock(
1491  i :          block_type, section_begin, section_end - section_begin);
1492    :  
1493    :    // Create the head gap block if need be.
1494  E :    if (section_begin < it->first.start())
1495    :      if (!CreateGapBlock(
1496  i :          block_type, section_begin, it->first.start() - section_begin))
1497  i :        return false;
1498    :  
1499    :    // Now iterate the blocks and fill in gaps.
1500  E :    for (; it != end; ++it) {
1501  E :      const BlockGraph::Block* block = it->second;
1502  E :      DCHECK(block != NULL);
1503  E :      RelativeAddress block_end = it->first.start() + block->size();
1504  E :      if (block_end >= section_end)
1505  E :        break;
1506    :  
1507    :      // Walk to the next address in turn.
1508  E :      BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
1509  E :      ++next;
1510  E :      if (next == end) {
1511    :        // We're at the end of the list. Create the tail gap block.
1512  E :        DCHECK_GT(section_end, block_end);
1513  E :        if (!CreateGapBlock(block_type, block_end, section_end - block_end))
1514  i :          return false;
1515  E :        break;
1516    :      }
1517    :  
1518    :      // Create the interstitial gap block.
1519  E :      if (block_end < next->first.start())
1520    :        if (!CreateGapBlock(
1521  E :            block_type, block_end, next->first.start() - block_end))
1522  i :          return false;
1523  E :    }
1524    :  
1525  E :    return true;
1526  E :  }
1527    :  
1528  E :  bool Decomposer::CreateGapBlocks() {
1529  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1530    :  
1531    :    // Iterate through all the image sections.
1532  E :    for (size_t i = 0; i < num_sections; ++i) {
1533  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1534  E :      DCHECK(header != NULL);
1535    :  
1536  E :      BlockGraph::BlockType type = BlockGraph::CODE_BLOCK;
1537  E :      const char* section_type = NULL;
1538  E :      switch (GetSectionType(header)) {
1539    :        case kSectionCode:
1540  E :          type = BlockGraph::CODE_BLOCK;
1541  E :          section_type = "code";
1542  E :          break;
1543    :  
1544    :        case kSectionData:
1545  E :          type = BlockGraph::DATA_BLOCK;
1546  E :          section_type = "data";
1547  E :          break;
1548    :  
1549    :        default:
1550  i :          continue;
1551    :      }
1552    :  
1553  E :      if (!CreateSectionGapBlocks(header, type)) {
1554  i :        LOG(ERROR) << "Unable to create gap blocks for " << section_type
1555    :                   << " section \"" << header->Name << "\".";
1556  i :        return false;
1557    :      }
1558  E :    }
1559    :  
1560  E :    return true;
1561  E :  }
1562    :  
1563    :  bool Decomposer::AddReferenceCallback(RelativeAddress src_addr,
1564    :                                        BlockGraph::ReferenceType type,
1565    :                                        BlockGraph::Size size,
1566  E :                                        RelativeAddress dst_addr) {
1567    :    // This is only called by the PEFileParser, and it creates some references
1568    :    // for which there are no corresponding fixup entries.
1569    :    return ValidateOrAddReference(FIXUP_MAY_EXIST, src_addr, type, size, dst_addr,
1570  E :                                  0, &fixup_map_, &references_);
1571  E :  }
1572    :  
1573  E :  bool Decomposer::ParseRelocs() {
1574  E :    if (!image_file_.DecodeRelocs(&reloc_set_)) {
1575  i :      LOG(ERROR) << "Unable to decode image relocs.";
1576  i :      return false;
1577    :    }
1578    :  
1579  E :    PEFile::RelocMap reloc_map;
1580  E :    if (!image_file_.ReadRelocs(reloc_set_, &reloc_map)) {
1581  i :      LOG(ERROR) << "Unable to read image relocs.";
1582  i :      return false;
1583    :    }
1584    :  
1585    :    // Validate each relocation entry against the corresponding fixup entry.
1586  E :    if (!ValidateRelocs(reloc_map))
1587  i :      return false;
1588    :  
1589  E :    return true;
1590  E :  }
1591    :  
1592  E :  bool Decomposer::CreateReferencesFromFixups() {
1593  E :    FixupMap::const_iterator it(fixup_map_.begin());
1594  E :    for (; it != fixup_map_.end(); ++it) {
1595  E :      RelativeAddress src_addr(it->second.location);
1596  E :      uint32 data = 0;
1597  E :      if (!image_file_.ReadImage(src_addr, &data, sizeof(data))) {
1598  i :        LOG(ERROR) << "Unable to read image data for fixup with source at "
1599    :                   << src_addr;
1600  i :        return false;
1601    :      }
1602    :  
1603  E :      RelativeAddress dst_base(it->second.base);
1604  E :      BlockGraph::Offset dst_offset = 0;
1605  E :      switch (it->second.type) {
1606    :        case BlockGraph::PC_RELATIVE_REF: {
1607  E :          dst_offset = src_addr + kPointerSize + data - dst_base;
1608  E :          break;
1609    :        }
1610    :  
1611    :        case BlockGraph::ABSOLUTE_REF: {
1612  E :          dst_offset = image_file_.AbsToRelDisplacement(data) - dst_base.value();
1613  E :          break;
1614    :        }
1615    :  
1616    :        case BlockGraph::RELATIVE_REF: {
1617  E :          dst_offset = data - dst_base.value();
1618  E :          break;
1619    :        }
1620    :  
1621    :        default: {
1622  i :          NOTREACHED() << "Invalid reference type.";
1623  i :          return false;
1624    :        }
1625    :      }
1626    :  
1627    :      if (!AddReference(src_addr, it->second.type, kPointerSize, dst_base,
1628  E :                        dst_offset, &references_)) {
1629  i :        return false;
1630    :      }
1631  E :    }
1632    :  
1633  E :    return true;
1634  E :  }
1635    :  
1636  E :  bool Decomposer::ValidateRelocs(const PEFile::RelocMap& reloc_map) {
1637  E :    PEFile::RelocMap::const_iterator it(reloc_map.begin());
1638  E :    PEFile::RelocMap::const_iterator end(reloc_map.end());
1639  E :    for (; it != end; ++it) {
1640  E :      RelativeAddress src(it->first);
1641  E :      RelativeAddress dummy;
1642    :  
1643    :      if (!ValidateOrAddReference(
1644    :              FIXUP_MUST_EXIST, src, BlockGraph::ABSOLUTE_REF,
1645  E :              sizeof(dummy), dummy, 0, &fixup_map_, &references_)) {
1646  i :        return false;
1647    :      }
1648  E :    }
1649    :  
1650  E :    return true;
1651  E :  }
1652    :  
1653  E :  bool Decomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1654  E :    ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1655    :    SearchResult search_result = FindDiaTable(session,
1656  E :                                              section_contribs.Receive());
1657  E :    if (search_result != kSearchSucceeded) {
1658  i :      if (search_result == kSearchFailed)
1659  i :        LOG(ERROR) << "No section contribution table found.";
1660  i :      return false;
1661    :    }
1662    :  
1663  E :    size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1664    :  
1665  E :    LONG count = 0;
1666  E :    if (section_contribs->get_Count(&count) != S_OK) {
1667  i :      LOG(ERROR) << "Failed to get section contributions enumeration length.";
1668  i :      return false;
1669    :    }
1670    :  
1671  E :    for (LONG visited = 0; visited < count; ++visited) {
1672  E :      ScopedComPtr<IDiaSectionContrib> section_contrib;
1673  E :      ULONG fetched = 0;
1674  E :      HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1675  E :      if (hr != S_OK) {
1676  i :        LOG(ERROR) << "Failed to get DIA section contribution: "
1677    :                   << com::LogHr(hr) << ".";
1678  i :        return false;
1679    :      }
1680  E :      if (fetched == 0)
1681  i :        break;
1682    :  
1683  E :      hr = E_FAIL;
1684  E :      DWORD rva = 0;
1685  E :      DWORD length = 0;
1686  E :      DWORD section_id = 0;
1687  E :      BOOL code = FALSE;
1688  E :      ScopedComPtr<IDiaSymbol> compiland;
1689  E :      ScopedBstr bstr_name;
1690    :      if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1691    :          (hr = section_contrib->get_length(&length)) != S_OK ||
1692    :          (hr = section_contrib->get_addressSection(&section_id)) != S_OK ||
1693    :          (hr = section_contrib->get_code(&code)) != S_OK ||
1694    :          (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1695  E :          (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1696  i :        LOG(ERROR) << "Failed to get section contribution properties: "
1697    :                   << com::LogHr(hr) << ".";
1698  i :        return false;
1699    :      }
1700    :  
1701    :      // Determine if this function was built by a supported compiler.
1702    :      bool is_built_by_supported_compiler =
1703  E :          IsBuiltBySupportedCompiler(compiland.get());
1704    :  
1705    :      // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1706  E :      DCHECK_LT(0u, section_id);
1707  E :      --section_id;
1708    :  
1709    :      // We don't parse the resource section, as it is parsed by the PEFileParser.
1710  E :      if (section_id == rsrc_id)
1711  E :        continue;
1712    :  
1713  E :      std::string name;
1714  E :      if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1715  i :        LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1716  i :        return false;
1717    :      }
1718    :  
1719    :      // Create the block.
1720    :      BlockGraph::BlockType block_type =
1721  E :          code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1722    :      BlockGraph::Block* block = FindOrCreateBlock(block_type,
1723    :                                                   RelativeAddress(rva),
1724    :                                                   length,
1725    :                                                   name.c_str(),
1726  E :                                                   kExpectNoBlock);
1727  E :      if (block == NULL) {
1728  i :        LOG(ERROR) << "Unable to create block.";
1729  i :        return false;
1730    :      }
1731    :  
1732    :      // Set the block compiland name.
1733  E :      block->set_compiland_name(name);
1734    :  
1735    :      // Set the block attributes.
1736  E :      block->set_attribute(BlockGraph::SECTION_CONTRIB);
1737  E :      if (!is_built_by_supported_compiler)
1738  E :        block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1739  E :    }
1740    :  
1741  E :    return true;
1742  E :  }
1743    :  
1744    :  DiaBrowser::BrowserDirective Decomposer::OnDataSymbol(
1745    :      const DiaBrowser& dia_browser,
1746    :      const DiaBrowser::SymTagVector& sym_tags,
1747  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1748  E :    DCHECK_LT(0u, sym_tags.size());
1749  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1750  E :    DCHECK_EQ(SymTagData, sym_tags.back());
1751    :  
1752  E :    const DiaBrowser::SymbolPtr& data(symbols.back());
1753    :  
1754  E :    HRESULT hr = E_FAIL;
1755  E :    DWORD location_type = LocIsNull;
1756  E :    DWORD rva = 0;
1757  E :    ScopedBstr name_bstr;
1758    :    if (FAILED(hr = data->get_locationType(&location_type)) ||
1759    :        FAILED(hr = data->get_relativeVirtualAddress(&rva)) ||
1760  E :        FAILED(hr = data->get_name(name_bstr.Receive()))) {
1761  i :      LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1762  i :      return DiaBrowser::kBrowserAbort;
1763    :    }
1764    :  
1765    :    // We only parse data symbols with static storage.
1766  E :    if (location_type != LocIsStatic)
1767  E :      return DiaBrowser::kBrowserContinue;
1768    :  
1769    :    // Symbols with an address of zero are essentially invalid. They appear to
1770    :    // have been optimized away by the compiler, but they are still reported.
1771  E :    if (rva == 0)
1772  E :      return DiaBrowser::kBrowserContinue;
1773    :  
1774    :    // TODO(chrisha): We eventually want to get alignment info from the type
1775    :    //     information. This is strictly a lower bound, however, as certain
1776    :    //     data may be used in instructions that impose stricter alignment
1777    :    //     requirements.
1778  E :    size_t length = 0;
1779  E :    if (!GetTypeInfo(data, &length)) {
1780  i :      return DiaBrowser::kBrowserAbort;
1781    :    }
1782    :    // Zero-length data symbols act as 'forward declares' in some sense. They
1783    :    // are always followed by a non-zero length data symbol with the same name
1784    :    // and location.
1785  E :    if (length == 0)
1786  E :      return DiaBrowser::kBrowserContinue;
1787    :  
1788  E :    RelativeAddress addr(rva);
1789  E :    std::string name;
1790  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1791  i :      LOG(ERROR) << "Failed to convert data symbol name to UTF8.";
1792  i :      return DiaBrowser::kBrowserAbort;
1793    :    }
1794    :  
1795    :    // In general we expect data symbols to be completely contained by a block.
1796    :    // The data symbol can exceed the size of the block in the case of data
1797    :    // imports. For some reason the toolchain emits a global data symbol with
1798    :    // type information equal to the type of the data *pointed* to by the import
1799    :    // entry rather than the type of the entry itself. Thus, if the data type
1800    :    // is bigger than the entire IAT this symbol will exceed it. To complicate
1801    :    // matters even more, a poorly written module can import its own export in
1802    :    // which case a linker generated pseudo-import-entry block will be
1803    :    // generated. This won't be part of the IAT, so we can't even filter based
1804    :    // on that. Instead, we simply ignore global data symbols that exceed the
1805    :    // block size.
1806  E :    FindOrCreateBlockDirective directive = kAllowCoveringBlock;
1807  E :    base::StringPiece spname(name);
1808  E :    if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
1809    :      // For global data symbols (no parent symbols) to imported data ("_imp_"
1810    :      // prefix) we allow partially covering blocks.
1811  E :      directive = kAllowPartialCoveringBlock;
1812    :    }
1813    :  
1814    :    BlockGraph::Block* block = FindOrCreateBlock(BlockGraph::DATA_BLOCK,
1815    :                                                 addr, length, spname,
1816  E :                                                 directive);
1817    :  
1818    :    // We've seen null blocks for some symbols in modules compiled using a custom
1819    :    // non-Microsoft toolchain.
1820  E :    if (block == NULL) {
1821  i :      LOG(ERROR) << "Failed to get a block for symbol named " << name << ".";
1822  i :      return DiaBrowser::kBrowserAbort;
1823    :    }
1824    :  
1825  E :    if (block->type() == BlockGraph::CODE_BLOCK) {
1826    :      // The NativeClient bits of chrome.dll consists of hand-written assembly
1827    :      // that is compiled using a custom non-Microsoft toolchain. Unfortunately
1828    :      // for us this toolchain emits 1-byte data symbols instead of code labels.
1829    :      static const char kNaClPrefix[] = "NaCl";
1830    :      if (length == 1 &&
1831  E :          name.compare(0, arraysize(kNaClPrefix) - 1, kNaClPrefix) == 0) {
1832  i :        if (!AddLabelToBlock(addr, name, BlockGraph::CODE_LABEL, block)) {
1833  i :          LOG(ERROR) << "Failed to add label to code block.";
1834  i :          return DiaBrowser::kBrowserAbort;
1835    :        }
1836    :  
1837  i :        return DiaBrowser::kBrowserContinue;
1838    :      }
1839    :    }
1840    :  
1841  E :    if (!AddLabelToBlock(addr, name, BlockGraph::DATA_LABEL, block)) {
1842  i :      LOG(ERROR) << "Failed to add data label to block.";
1843  i :      return DiaBrowser::kBrowserAbort;
1844    :    }
1845    :  
1846  E :    return DiaBrowser::kBrowserContinue;
1847  E :  }
1848    :  
1849    :  DiaBrowser::BrowserDirective Decomposer::OnPublicSymbol(
1850    :      const DiaBrowser& dia_browser,
1851    :      const DiaBrowser::SymTagVector& sym_tags,
1852  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1853  E :    DCHECK_LT(0u, sym_tags.size());
1854  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1855  E :    DCHECK_EQ(SymTagPublicSymbol, sym_tags.back());
1856  E :    const DiaBrowser::SymbolPtr& symbol(symbols.back());
1857    :  
1858    :    // We don't care about symbols that don't have addresses.
1859  E :    DWORD rva = 0;
1860  E :    if (S_OK != symbol->get_relativeVirtualAddress(&rva))
1861  E :      return DiaBrowser::kBrowserContinue;
1862    :  
1863  E :    ScopedBstr name_bstr;
1864  E :    if (S_OK != symbol->get_name(name_bstr.Receive())) {
1865  i :      LOG(ERROR) << "Failed to get public symbol name.";
1866  i :      return DiaBrowser::kBrowserAbort;
1867    :    }
1868    :  
1869  E :    std::string name;
1870  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1871  i :      LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1872  i :      return DiaBrowser::kBrowserAbort;
1873    :    }
1874    :  
1875  E :    RelativeAddress addr(rva);
1876  E :    BlockGraph::Block* block = image_->GetBlockByAddress(addr);
1877  E :    if (block == NULL) {
1878  i :      LOG(ERROR) << "No block found for public symbol \"" << name << "\".";
1879  i :      return DiaBrowser::kBrowserAbort;
1880    :    }
1881    :  
1882    :    // Public symbol names are mangled. Remove leading '_' as per
1883    :    // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1884  E :    if (name[0] == '_')
1885  E :      name = name.substr(1);
1886    :  
1887  E :    if (!AddLabelToBlock(addr, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
1888  i :      return DiaBrowser::kBrowserAbort;
1889    :  
1890  E :    return DiaBrowser::kBrowserContinue;
1891  E :  }
1892    :  
1893  E :  bool Decomposer::ProcessStaticInitializers() {
1894    :    typedef std::pair<RelativeAddress, RelativeAddress> AddressPair;
1895    :    typedef std::map<std::string, AddressPair> AddressPairMap;
1896    :  
1897  E :    const RelativeAddress kNull(0);
1898    :  
1899    :    // This stores pairs of addresses, representing the beginning and the end
1900    :    // of each static initializer block. It is keyed with a string, which is
1901    :    // returned by the match group of the corresponding initializer pattern.
1902    :    // The key is necessary to correlate matching labels (as multiple pairs
1903    :    // of labels may match through a single pattern).
1904  E :    AddressPairMap addr_pair_map;
1905    :  
1906    :    // Used for keeping track of which label, if any, we matched.
1907    :    enum MatchType {
1908    :      kMatchNone,
1909    :      kMatchBeginLabel,
1910    :      kMatchEndLabel
1911    :    };
1912    :  
1913    :    // Iterate through all data blocks, looking for known initializer labels.
1914  E :    BlockGraph::AddressSpace::RangeMapConstIter block_it = image_->begin();
1915  E :    for (; block_it != image_->end(); ++block_it) {
1916  E :      const BlockGraph::Block* block = block_it->second;
1917    :      // Skip non-data blocks.
1918  E :      if (block->type() != BlockGraph::DATA_BLOCK)
1919  E :        continue;
1920    :  
1921    :      // Check the block name against each of the initializer patterns.
1922  E :      MatchType match = kMatchNone;
1923  E :      std::string block_name = block->name();
1924  E :      std::string name;
1925  E :      for (size_t i = 0; i < static_initializer_patterns_.size(); ++i) {
1926  E :        REPair& re_pair(static_initializer_patterns_[i]);
1927  E :        if (re_pair.first.FullMatch(block_name, &name))
1928  E :          match = kMatchBeginLabel;
1929  E :        else if (re_pair.second.FullMatch(block_name, &name))
1930  E :          match = kMatchEndLabel;
1931    :  
1932  E :        if (match != kMatchNone)
1933  E :          break;
1934  E :      }
1935    :  
1936    :      // No pattern matched this symbol? Continue to the next one.
1937  E :      if (match == kMatchNone)
1938  E :        continue;
1939    :  
1940    :      // Ensure this symbol exists in the map. Thankfully, addresses default
1941    :      // construct to NULL.
1942  E :      AddressPair& addr_pair = addr_pair_map[name];
1943    :  
1944    :      // Update the bracketing symbol endpoint. Make sure each symbol endpoint
1945    :      // is only seen once.
1946  E :      RelativeAddress* addr = NULL;
1947  E :      RelativeAddress new_addr;
1948  E :      if (match == kMatchBeginLabel) {
1949  E :        addr = &addr_pair.first;
1950  E :        new_addr = block->addr();
1951  E :      } else {
1952  E :        addr = &addr_pair.second;
1953  E :        new_addr = block->addr() + block->size();
1954    :      }
1955  E :      if (*addr != kNull) {
1956  i :        LOG(ERROR) << "Bracketing symbol appears multiple times: "
1957    :                   << block_name;
1958  i :        return false;
1959    :      }
1960  E :      *addr = new_addr;
1961  E :    }
1962    :  
1963    :    // Use the bracketing symbols to make the initializers contiguous.
1964  E :    AddressPairMap::const_iterator init_it = addr_pair_map.begin();
1965  E :    for (; init_it != addr_pair_map.end(); ++init_it) {
1966  E :      RelativeAddress begin_addr = init_it->second.first;
1967  E :      if (begin_addr == kNull) {
1968  i :        LOG(ERROR) << "Bracketing start symbol missing: " << init_it->first;
1969  i :        return false;
1970    :      }
1971    :  
1972  E :      RelativeAddress end_addr = init_it->second.second;
1973  E :      if (end_addr == kNull) {
1974  i :        LOG(ERROR) << "Bracketing end symbol missing: " << init_it->first;
1975  i :        return false;
1976    :      }
1977    :  
1978  E :      if (begin_addr > end_addr) {
1979  i :        LOG(ERROR) << "Bracketing symbols out of order: " << init_it->first;
1980  i :        return false;
1981    :      }
1982    :  
1983    :      // Merge the initializers.
1984  E :      DataSpace::Range range(begin_addr, end_addr - begin_addr);
1985  E :      BlockGraph::Block* merged = image_->MergeIntersectingBlocks(range);
1986    :      std::string name = base::StringPrintf("Bracketed Initializers: %s",
1987  E :                                            init_it->first.c_str());
1988  E :      DCHECK(merged != NULL);
1989  E :      merged->set_name(name);
1990  E :      merged->set_attribute(BlockGraph::COFF_GROUP);
1991  E :    }
1992    :  
1993  E :    return true;
1994  E :  }
1995    :  
1996  E :  bool Decomposer::ProcessDataSymbols(IDiaSymbol* root) {
1997    :    DiaBrowser::MatchCallback on_data_symbol(
1998  E :        base::Bind(&Decomposer::OnDataSymbol, base::Unretained(this)));
1999    :  
2000  E :    DiaBrowser dia_browser;
2001    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
2002  E :                           on_data_symbol);
2003    :    dia_browser.AddPattern(Seq(SymTagCompiland, SymTagFunction,
2004    :                               Star(SymTagBlock), SymTagData),
2005  E :                           on_data_symbol);
2006    :  
2007  E :    return dia_browser.Browse(root);
2008  E :  }
2009    :  
2010  E :  bool Decomposer::ProcessPublicSymbols(IDiaSymbol* root) {
2011    :    DiaBrowser::MatchCallback on_public_symbol(
2012  E :        base::Bind(&Decomposer::OnPublicSymbol, base::Unretained(this)));
2013    :  
2014  E :    DiaBrowser dia_browser;
2015  E :    dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
2016    :  
2017  E :    return dia_browser.Browse(root);
2018  E :  }
2019    :  
2020  E :  bool Decomposer::GuessDataBlockAlignments() {
2021  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2022    :    // Iterate through all the image sections.
2023  E :    for (size_t i = 0; i < num_sections; ++i) {
2024  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2025  E :      DCHECK(header != NULL);
2026    :  
2027    :      // Only iterate through data sections.
2028  E :      if (GetSectionType(header) != kSectionData)
2029  E :        continue;
2030    :  
2031  E :      RelativeAddress section_begin(header->VirtualAddress);
2032  E :      size_t section_length = header->Misc.VirtualSize;
2033    :  
2034    :      // Get the range of blocks in this section.
2035    :      BlockGraph::AddressSpace::RangeMapIterPair it_pair =
2036  E :          image_->GetIntersectingBlocks(section_begin, section_length);
2037    :  
2038    :      // Iterate through the blocks in the section, setting their alignment.
2039  E :      BlockGraph::AddressSpace::RangeMapIter it = it_pair.first;
2040  E :      for (; it != it_pair.second; ++it) {
2041  E :        BlockGraph::Block* block = it->second;
2042    :        GuessDataBlockAlignment(block,
2043  E :            image_file_.nt_headers()->OptionalHeader.SectionAlignment);
2044  E :      }
2045  E :    }
2046    :  
2047  E :    return true;
2048  E :  }
2049    :  
2050  E :  bool Decomposer::CreateCodeReferences() {
2051  E :    BlockGraph::BlockMap::iterator it(image_->graph()->blocks_mutable().begin());
2052  E :    BlockGraph::BlockMap::iterator end(image_->graph()->blocks_mutable().end());
2053  E :    for (; it != end; ++it) {
2054  E :      BlockGraph::Block* block = &it->second;
2055    :  
2056  E :      if (block->type() != BlockGraph::CODE_BLOCK)
2057  E :        continue;
2058    :  
2059    :      // We shouldn't attempt disassembly on unsafe blocks. The new decomposer
2060    :      // has this fixed, but this is a workaround here for now.
2061    :      if (!pe::PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
2062  E :              block, false)) {
2063  E :        continue;
2064    :      }
2065    :  
2066  E :      if (!CreateCodeReferencesForBlock(block))
2067  i :        return false;
2068  E :    }
2069    :  
2070  E :    return true;
2071  E :  }
2072    :  
2073  E :  bool Decomposer::CreateCodeReferencesForBlock(BlockGraph::Block* block) {
2074  E :    DCHECK(current_block_ == NULL);
2075  E :    current_block_ = block;
2076    :  
2077  E :    RelativeAddress block_addr;
2078  E :    if (!image_->GetAddressOf(block, &block_addr)) {
2079  i :      LOG(ERROR) << "Block \"" << block->name() << "\" has no address.";
2080  i :      return false;
2081    :    }
2082    :  
2083  E :    AbsoluteAddress abs_block_addr;
2084  E :    if (!image_file_.Translate(block_addr, &abs_block_addr)) {
2085  i :      LOG(ERROR) << "Unable to get absolute address for " << block_addr;
2086  i :      return false;
2087    :    }
2088    :  
2089    :    Disassembler::InstructionCallback on_instruction(
2090  E :        base::Bind(&Decomposer::OnInstruction, base::Unretained(this)));
2091    :  
2092    :    // Use block labels and code references as starting points for disassembly.
2093  E :    Disassembler::AddressSet starting_points;
2094    :    GetDisassemblyStartingPoints(block, abs_block_addr, reloc_set_,
2095  E :                                 &starting_points);
2096    :  
2097    :    // If the block has no starting points, then it has no private symbols and
2098    :    // is not BB safe. We mark the block as not safe for basic-block disassembly.
2099    :    if (starting_points.empty() &&
2100  E :        (block->attributes() & BlockGraph::GAP_BLOCK) == 0) {
2101  E :      VLOG(1) << "Block \"" << block->name() << "\" has no private symbols.";
2102  E :      block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2103    :    }
2104    :  
2105    :    // Determine whether or not we are being strict with disassembly.
2106    :    // NOTE: This is particularly ugly. Decomposer should not depend on the
2107    :    //     transform policy object. In fact, Decomposer should not even be doing
2108    :    //     disassembly. This all disappears in the new decomposer.
2109    :    bool strict = PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
2110  E :        block, false);
2111  E :    be_strict_with_current_block_ = false;
2112    :  
2113    :    // Determine the length of the code portion of the block by trimming off any
2114    :    // known trailing data. Also, if we're in strict mode, ensure that our
2115    :    // assumption regarding code/data layout is met.
2116  E :    size_t code_size = 0;
2117    :    if (!BlockHasExpectedCodeDataLayout(block, &code_size) &&
2118  E :        be_strict_with_current_block_) {
2119  i :      LOG(ERROR) << "Block \"" << block->name() << "\" has unexpected code/data "
2120    :                 << "layout.";
2121  i :      return false;
2122    :    }
2123    :  
2124    :    // Disassemble the block.
2125    :    Disassembler disasm(block->data(),
2126    :                        code_size,
2127    :                        abs_block_addr,
2128    :                        starting_points,
2129  E :                        on_instruction);
2130  E :    Disassembler::WalkResult result = disasm.Walk();
2131    :  
2132    :    // If we're strict (that is, we're confident that the block was produced by
2133    :    // cl.exe), then we can use that knowledge to look for calls that appear to be
2134    :    // to non-returning functions that we may not have symbol info for.
2135  E :    if (be_strict_with_current_block_)
2136  i :      LookForNonReturningFunctions(references_, *image_, current_block_, disasm);
2137    :  
2138  E :    DCHECK_EQ(block, current_block_);
2139  E :    current_block_ = NULL;
2140  E :    be_strict_with_current_block_ = true;
2141    :  
2142  E :    switch (result) {
2143    :      case Disassembler::kWalkIncomplete:
2144    :        // There were computed branches that couldn't be chased down.
2145  E :        block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2146  E :        return true;
2147    :  
2148    :      case Disassembler::kWalkTerminated:
2149    :        // This exit condition should only ever occur for non-strict disassembly.
2150    :        // If strict, we should always get kWalkError.
2151  i :        DCHECK(!strict);
2152    :        // This means that they code was malformed, or broke some expected
2153    :        // conventions. This code is not safe for basic block disassembly.
2154  i :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2155  i :        return true;
2156    :  
2157    :      case Disassembler::kWalkSuccess:
2158    :        // Were any bytes in the block not accounted for? This generally means
2159    :        // unreachable code, which we see quite often, especially in debug builds.
2160  E :        if (disasm.code_size() != disasm.disassembled_bytes())
2161  E :          block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2162  E :        return true;
2163    :  
2164    :      case Disassembler::kWalkError:
2165  i :        return false;
2166    :  
2167    :      default:
2168  i :        NOTREACHED() << "Unhandled Disassembler WalkResult.";
2169  i :        return false;
2170    :    }
2171  E :  }
2172    :  
2173    :  BlockGraph::Block* Decomposer::CreateBlock(BlockGraph::BlockType type,
2174    :                                             RelativeAddress address,
2175    :                                             BlockGraph::Size size,
2176  E :                                             const base::StringPiece& name) {
2177  E :    BlockGraph::Block* block = image_->AddBlock(type, address, size, name);
2178  E :    if (block == NULL) {
2179  i :      LOG(ERROR) << "Unable to add block at " << address << " with size "
2180    :                 << size << ".";
2181  i :      return NULL;
2182    :    }
2183    :  
2184    :    // Mark the source range from whence this block originates.
2185    :    bool pushed = block->source_ranges().Push(
2186    :        BlockGraph::Block::DataRange(0, size),
2187  E :        BlockGraph::Block::SourceRange(address, size));
2188  E :    DCHECK(pushed);
2189    :  
2190  E :    BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2191  E :    if (section == BlockGraph::kInvalidSectionId) {
2192  i :      LOG(ERROR) << "Block at " << address << " with size " << size
2193    :                 << " lies outside of all sections.";
2194  i :      return NULL;
2195    :    }
2196  E :    block->set_section(section);
2197    :  
2198  E :    const uint8* data = image_file_.GetImageData(address, size);
2199  E :    if (data != NULL)
2200  E :      block->SetData(data, size);
2201    :  
2202  E :    return block;
2203  E :  }
2204    :  
2205    :  BlockGraph::Block* Decomposer::FindOrCreateBlock(
2206    :      BlockGraph::BlockType type,
2207    :      RelativeAddress addr,
2208    :      BlockGraph::Size size,
2209    :      const base::StringPiece& name,
2210  E :      FindOrCreateBlockDirective directive) {
2211  E :    BlockGraph::Block* block = image_->GetBlockByAddress(addr);
2212  E :    if (block != NULL) {
2213    :      // If we got a block we're guaranteed that it at least partially covers
2214    :      // the query range, so we can immediately return it in that case.
2215  E :      if (directive == kAllowPartialCoveringBlock)
2216  E :        return block;
2217    :  
2218  E :      if (block->attributes() & BlockGraph::PE_PARSED) {
2219    :        // Always allow collisions where the new block is a proper subset of
2220    :        // an existing PE parsed block. The PE parser often knows more than we do
2221    :        // about blocks that need to stick together.
2222  E :        directive = kAllowCoveringBlock;
2223    :  
2224    :        // Allow PE-parsed blocks to be grown to reflect reality. For example,
2225    :        // in VS2013 the linker makes space for 2 debug directories rather than
2226    :        // just one, and the symbols reflect this. We parse the debug directory
2227    :        // with the size indicated in the PE header, which conflicts with that
2228    :        // indicated by the section contributions.
2229  E :        if (name == "* Linker *" && size > block->size()) {
2230  E :          if (!image_->ResizeBlock(block, size)) {
2231  i :            LOG(ERROR) << "Failed to extend PE parsed block with linker "
2232    :                       << "section contribution.";
2233  i :            return false;
2234    :          }
2235  E :          const uint8* data = image_file_.GetImageData(addr, size);
2236  E :          block->SetData(data, size);
2237    :        }
2238    :      }
2239    :  
2240  E :      bool collision = false;
2241  E :      switch (directive) {
2242    :        case kExpectNoBlock: {
2243  i :          collision = true;
2244  i :          break;
2245    :        }
2246    :        case kAllowIdenticalBlock: {
2247  i :          collision = (block->addr() != addr || block->size() != size);
2248  i :          break;
2249    :        }
2250    :        default: {
2251  E :          DCHECK(directive == kAllowCoveringBlock);
2252    :          collision = block->addr() > addr ||
2253  E :              (block->addr() + block->size()) < addr + size;
2254    :          break;
2255    :        }
2256    :      }
2257    :  
2258  E :      if (collision) {
2259  i :        LOG(ERROR) << "Block collision for \"" << name.as_string() << "\" at "
2260    :                   << addr << "(" << size << ") with existing block \""
2261    :                   << block->name() << "\" at " << block->addr() << " ("
2262    :                   << block->size() << ").";
2263  i :        return NULL;
2264    :      }
2265    :  
2266  E :      return block;
2267    :    }
2268  E :    DCHECK(block == NULL);
2269    :  
2270  E :    return CreateBlock(type, addr, size, name);
2271  E :  }
2272    :  
2273    :  CallbackDirective Decomposer::LookPastInstructionForData(
2274  E :      RelativeAddress instr_end) {
2275    :    // If this instruction terminates at a data boundary (ie: the *next*
2276    :    // instruction will be data or a reloc), we can be certain that a new
2277    :    // lookup table is starting at this address.
2278  E :    if (reloc_set_.find(instr_end) == reloc_set_.end())
2279  E :      return Disassembler::kDirectiveContinue;
2280    :  
2281    :    // Find the block housing the reloc. We expect the reloc to be contained
2282    :    // completely within this block.
2283  E :    BlockGraph::Block* block = image_->GetContainingBlock(instr_end, 4);
2284  E :    if (block != current_block_) {
2285  i :      CHECK(block != NULL);
2286  i :      LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2287    :          << "Found an instruction/data boundary between blocks: "
2288    :          << current_block_->name() << " and " << block->name();
2289  i :      return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2290    :    }
2291    :  
2292  E :    BlockGraph::Offset offset = instr_end - block->addr();
2293    :  
2294    :    // We expect there to be a jump-table data label already.
2295  E :    BlockGraph::Label label;
2296  E :    bool have_label = block->GetLabel(offset, &label);
2297    :    if (!have_label || !label.has_attributes(
2298  E :            BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)) {
2299  i :      LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2300    :          << "Expected there to be a data label marking the jump "
2301    :          << "table at " << block->name() << " + " << offset << ".";
2302    :  
2303    :      // If we're in strict mode, we're a block that obeys standard conventions.
2304    :      // Which means we should already be aware of any jump tables in this block.
2305  i :      if (be_strict_with_current_block_)
2306  i :        return Disassembler::kDirectiveAbort;
2307    :  
2308    :      // If we're not in strict mode, add the jump-table label.
2309  i :      if (have_label) {
2310  i :        CHECK(block->RemoveLabel(offset));
2311    :      }
2312    :  
2313    :      CHECK(block->SetLabel(offset, BlockGraph::Label(
2314    :          base::StringPrintf("<JUMP-TABLE-%d>", offset),
2315  i :          BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)));
2316    :    }
2317    :  
2318  E :    return Disassembler::kDirectiveTerminatePath;
2319  E :  }
2320    :  
2321  E :  void Decomposer::MarkDisassembledPastEnd() {
2322  E :    DCHECK(current_block_ != NULL);
2323  E :    current_block_->set_attribute(BlockGraph::DISASSEMBLED_PAST_END);
2324    :    // TODO(chrisha): The entire "disassembled past end" and non-returning
2325    :    //     function infrastructure can be ripped out once we rework the BB
2326    :    //     disassembler to be straight path, and remove the disassembly phase
2327    :    //     from the decomposer (where it's no longer needed). In the meantime
2328    :    //     we simply crank down this log verbosity due to all of the false
2329    :    //     positives.
2330  E :    VLOG(1) << "Disassembled past end of block or into known data for block \""
2331    :            << current_block_->name() << "\" at " << current_block_->addr()
2332    :            << ".";
2333  E :  }
2334    :  
2335    :  CallbackDirective Decomposer::VisitNonFlowControlInstruction(
2336  E :      RelativeAddress instr_start, RelativeAddress instr_end) {
2337    :    // TODO(chrisha): We could walk the operands and follow references
2338    :    //     explicitly. If any of them are of reference type and there's no
2339    :    //     matching reference, this would be cause to blow up and die (we
2340    :    //     should get all of these as relocs and/or fixups).
2341    :  
2342    :    IntermediateReferenceMap::const_iterator ref_it =
2343  E :        references_.upper_bound(instr_start);
2344    :    IntermediateReferenceMap::const_iterator ref_end =
2345  E :        references_.lower_bound(instr_end);
2346    :  
2347  E :    for (; ref_it != ref_end; ++ref_it) {
2348    :      BlockGraph::Block* ref_block = image_->GetContainingBlock(
2349  E :          ref_it->second.base, 1);
2350  E :      DCHECK(ref_block != NULL);
2351    :  
2352    :      // This is an inter-block reference.
2353  E :      if (ref_block != current_block_) {
2354    :        // There should be no cross-block references to the middle of other
2355    :        // code blocks (to the top is fine, as we could be passing around a
2356    :        // function pointer). The exception is if the remote block is not
2357    :        // generated by cl.exe. In this case, there could be arbitrary labels
2358    :        // that act like functions within the body of that block, and referring
2359    :        // to them is perfectly fine.
2360  E :        bool ref_attr_safe = true;
2361  E :        if (ref_block->type() == BlockGraph::CODE_BLOCK) {
2362    :          ref_attr_safe =
2363    :              PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(ref_block,
2364  E :                                                                      false);
2365    :        }
2366    :        if (ref_block->type() == BlockGraph::CODE_BLOCK &&
2367    :            ref_it->second.base != ref_block->addr() &&
2368  E :            ref_attr_safe) {
2369  i :          LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2370    :              << "Found a non-control-flow code-block to middle-of-code-block "
2371    :              << "reference from block \"" << current_block_->name()
2372    :              << "\" to block \"" << ref_block->name() << "\".";
2373  i :          return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2374    :        }
2375  E :      } else {
2376    :        // This is an intra-block reference.
2377    :        BlockGraph::Offset ref_offset =
2378  E :            ref_it->second.base - current_block_->addr();
2379    :  
2380    :        // If this is to offset zero, we assume we are taking a pointer to
2381    :        // ourself, which is safe.
2382  E :        if (ref_offset != 0) {
2383    :          // If this is 'clean' code it should be to data, and there should be a
2384    :          // label.
2385  E :          BlockGraph::Label label;
2386  E :          if (!current_block_->GetLabel(ref_offset, &label)) {
2387  i :            LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2388    :                << "Found an intra-block data-reference with no label.";
2389  i :            return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2390  i :          } else {
2391    :            if (!label.has_attributes(BlockGraph::DATA_LABEL) ||
2392  E :                label.has_attributes(BlockGraph::CODE_LABEL)) {
2393  i :              LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2394    :                  << "Found an intra-block data-like reference to a non-data "
2395    :                  << "or code label in block \"" << current_block_->name()
2396    :                  << "\".";
2397  i :              return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2398    :            }
2399    :          }
2400  E :        }
2401    :      }
2402  E :    }
2403    :  
2404  E :    return Disassembler::kDirectiveContinue;
2405  E :  }
2406    :  
2407    :  CallbackDirective Decomposer::VisitPcRelativeFlowControlInstruction(
2408    :      AbsoluteAddress instr_abs,
2409    :      RelativeAddress instr_rel,
2410    :      const _DInst& instruction,
2411  E :      bool end_of_code) {
2412  E :    int fc = META_GET_FC(instruction.meta);
2413  E :    DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
2414  E :    DCHECK_EQ(O_PC, instruction.ops[0].type);
2415  E :    DCHECK_EQ(O_NONE, instruction.ops[1].type);
2416  E :    DCHECK_EQ(O_NONE, instruction.ops[2].type);
2417  E :    DCHECK_EQ(O_NONE, instruction.ops[3].type);
2418    :    DCHECK(instruction.ops[0].size == 8 ||
2419    :        instruction.ops[0].size == 16 ||
2420  E :        instruction.ops[0].size == 32);
2421    :    // Distorm gives us size in bits, we want bytes.
2422  E :    BlockGraph::Size size = instruction.ops[0].size / 8;
2423    :  
2424    :    // Get the reference's address. Note we assume it's in the instruction's
2425    :    // tail end - I don't know of a case where a PC-relative offset in a branch
2426    :    // or call is not the very last thing in an x86 instruction.
2427  E :    AbsoluteAddress abs_src = instr_abs + instruction.size - size;
2428    :    AbsoluteAddress abs_dst = instr_abs + instruction.size +
2429  E :        static_cast<size_t>(instruction.imm.addr);
2430    :  
2431  E :    RelativeAddress src, dst;
2432    :    if (!image_file_.Translate(abs_src, &src) ||
2433  E :        !image_file_.Translate(abs_dst, &dst)) {
2434  i :      LOG(ERROR) << "Unable to translate absolute to relative addresses.";
2435  i :      return Disassembler::kDirectiveAbort;
2436    :    }
2437    :  
2438    :    // Get the block associated with the destination address. It must exist
2439    :    // and be a code block.
2440  E :    BlockGraph::Block* block = image_->GetContainingBlock(dst, 1);
2441  E :    DCHECK(block != NULL);
2442  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
2443    :  
2444    :    // For short references, we should not see a fixup.
2445  E :    ValidateOrAddReferenceMode mode = FIXUP_MUST_NOT_EXIST;
2446  E :    if (size == kPointerSize) {
2447    :      // Long PC_RELATIVE reference within a single block? FIXUPs aren't
2448    :      // strictly necessary.
2449  E :      if (block->Contains(src, kPointerSize))
2450  E :        mode = FIXUP_MAY_EXIST;
2451  E :      else
2452    :        // But if they're between blocks (section contributions), we expect to
2453    :        // find them.
2454  E :        mode = FIXUP_MUST_EXIST;
2455  E :    } else {
2456    :      // Since we slice by section contributions we no longer see short
2457    :      // references across blocks. If we do, bail!
2458  E :      if (block != current_block_) {
2459  i :        LOG(ERROR) << "Found a short PC-relative reference out of block \""
2460    :                   << current_block_->name() << "\".";
2461  i :        return Disassembler::kDirectiveAbort;
2462    :      }
2463    :    }
2464    :  
2465    :    // Validate or create the reference, as necessary.
2466    :    if (!ValidateOrAddReference(mode, src, BlockGraph::PC_RELATIVE_REF, size,
2467  E :                                dst, 0, &fixup_map_, &references_)) {
2468  i :      LOG(ERROR) << "Failed to validate/create reference originating from "
2469    :                 << "block \"" << current_block_->name() << "\".";
2470  i :      return Disassembler::kDirectiveAbort;
2471    :    }
2472    :  
2473    :    // If this is a call and the destination is a non-returning function,
2474    :    // then indicate that we should terminate this disassembly path.
2475    :    if (fc == FC_CALL &&
2476  E :        (block->attributes() & BlockGraph::NON_RETURN_FUNCTION)) {
2477    :      // TODO(chrisha): For now, we enforce that the call be to the beginning
2478    :      //    of the function. This may not be necessary, but better safe than
2479    :      //    sorry for now.
2480  E :      if (block->addr() != dst) {
2481  i :        LOG(ERROR) << "Calling inside the body of a non-returning function: "
2482    :                   << block->name();
2483  i :        return Disassembler::kDirectiveAbort;
2484    :      }
2485    :  
2486  E :      return Disassembler::kDirectiveTerminatePath;
2487    :    }
2488    :  
2489    :    // If we get here, then we don't think it's a non-returning call. If it's
2490    :    // not an unconditional jump and we're at the end of the code for this block
2491    :    // then we consider this as disassembling past the end.
2492  E :    if (fc != FC_UNC_BRANCH && end_of_code)
2493  i :      MarkDisassembledPastEnd();
2494    :  
2495  E :    return Disassembler::kDirectiveContinue;
2496  E :  }
2497    :  
2498    :  CallbackDirective Decomposer::VisitIndirectMemoryCallInstruction(
2499  E :        const _DInst& instruction, bool end_of_code) {
2500  E :    DCHECK_EQ(FC_CALL, META_GET_FC(instruction.meta));
2501  E :    DCHECK_EQ(O_DISP, instruction.ops[0].type);
2502    :  
2503    :    // TODO(rogerm): Consider changing to image_file_.AbsToRelDisplacement()
2504    :    //     instead of translate. In theory, the indexing into a function-table
2505    :    //     could be statically offset such that the displacement falls outside
2506    :    //     of the image's address space. But, we have never seen the compiler
2507    :    //     generate code like that. This is left to use Translate, which will
2508    :    //     trigger an error in such a case.
2509  E :    AbsoluteAddress disp_addr_abs(static_cast<uint32>(instruction.disp));
2510  E :    RelativeAddress disp_addr_rel;
2511  E :    if (!image_file_.Translate(disp_addr_abs, &disp_addr_rel)) {
2512  i :      LOG(ERROR) << "Unable to translate call address.";
2513  i :      return Disassembler::kDirectiveAbort;
2514    :    }
2515    :  
2516    :    // Try to dereference the address of the call instruction. This can fail
2517    :    // for blocks that are only initialized at runtime, so we don't fail if
2518    :    // we don't find a reference.
2519    :    IntermediateReferenceMap::const_iterator ref_it =
2520  E :        references_.find(disp_addr_rel);
2521  E :    if (ref_it == references_.end())
2522  E :      return Disassembler::kDirectiveContinue;
2523    :  
2524    :    // NOTE: This process derails for bound import tables. In this case the
2525    :    //     attempted dereference above will fail, but we could still actually
2526    :    //     find the import name thunk by inspecting the offset of the memory
2527    :    //     location.
2528    :  
2529    :    // The reference must be direct and 32-bit.
2530  E :    const IntermediateReference& ref = ref_it->second;
2531  E :    DCHECK_EQ(BlockGraph::Reference::kMaximumSize, ref.size);
2532  E :    DCHECK_EQ(0, ref.offset);
2533    :  
2534    :    // Look up the thunk this refers to.
2535  E :    BlockGraph::Block* thunk = image_->GetBlockByAddress(ref.base);
2536  E :    if (thunk == NULL) {
2537  i :      LOG(ERROR) << "Unable to dereference intermediate reference at "
2538    :                 << disp_addr_rel << " to " << ref.base << ".";
2539  i :      return Disassembler::kDirectiveAbort;
2540    :    }
2541    :  
2542  E :    if (ref.type == BlockGraph::RELATIVE_REF) {
2543    :      // If this is a relative reference it must be part of an import address
2544    :      // table (during runtime this address would be patched up with an absolute
2545    :      // reference). Thus we expect the referenced block to be data, an import
2546    :      // name thunk.
2547  E :      DCHECK_EQ(BlockGraph::DATA_BLOCK, thunk->type());
2548  E :    } else {
2549    :      // If this is an absolute address it should actually point directly to
2550    :      // code.
2551  E :      DCHECK_EQ(BlockGraph::ABSOLUTE_REF, ref.type);
2552  E :      DCHECK_EQ(BlockGraph::CODE_BLOCK, thunk->type());
2553    :    }
2554    :  
2555    :    // Either way, if the block is non-returning we terminate this path of
2556    :    // disassembly.
2557  E :    if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
2558  E :      return Disassembler::kDirectiveTerminatePath;
2559    :  
2560  E :    if (end_of_code)
2561  i :      MarkDisassembledPastEnd();
2562    :  
2563  E :    return Disassembler::kDirectiveContinue;
2564  E :  }
2565    :  
2566    :  CallbackDirective Decomposer::OnInstruction(const Disassembler& walker,
2567  E :                                              const _DInst& instruction) {
2568    :    // Get the relative address of this instruction.
2569  E :    AbsoluteAddress instr_abs(static_cast<uint32>(instruction.addr));
2570  E :    RelativeAddress instr_rel;
2571  E :    if (!image_file_.Translate(instr_abs, &instr_rel)) {
2572  i :      LOG(ERROR) << "Unable to translate instruction address.";
2573  i :      return Disassembler::kDirectiveAbort;
2574    :    }
2575  E :    RelativeAddress after_instr_rel = instr_rel + instruction.size;
2576    :  
2577    :  #ifndef NDEBUG
2578    :    // If we're in debug mode, it's helpful to have a pointer directly to the
2579    :    // beginning of this instruction in memory.
2580  E :    BlockGraph::Offset instr_offset = instr_rel - current_block_->addr();
2581  E :    const uint8* instr_data = current_block_->data() + instr_offset;
2582    :  #endif
2583    :  
2584    :    // TODO(chrisha): Certain instructions require aligned data (ie: MMX/SSE
2585    :    //     instructions). We need to follow the data that these instructions
2586    :    //     refer to, and set their alignment appropriately. For now, alignment
2587    :    //     is simply preserved from the original image.
2588    :  
2589  E :    CallbackDirective directive = LookPastInstructionForData(after_instr_rel);
2590  E :    if (IsFatalCallbackDirective(directive))
2591  i :      return directive;
2592    :  
2593    :    // We're at the end of code in this block if we encountered data, or this is
2594    :    // the last instruction to be processed.
2595  E :    RelativeAddress block_end(current_block_->addr() + current_block_->size());
2596    :    bool end_of_code = (directive == Disassembler::kDirectiveTerminatePath) ||
2597  E :        (after_instr_rel >= block_end);
2598    :  
2599  E :    int fc = META_GET_FC(instruction.meta);
2600    :  
2601  E :    if (fc == FC_NONE) {
2602    :      // There's no control flow and we're at the end of the block. Mark the
2603    :      // block as dirty.
2604  E :      if (end_of_code)
2605  i :        MarkDisassembledPastEnd();
2606    :  
2607    :      return CombineCallbackDirectives(directive,
2608  E :          VisitNonFlowControlInstruction(instr_rel, after_instr_rel));
2609    :    }
2610    :  
2611    :    if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
2612  E :        instruction.ops[0].type == O_PC) {
2613    :      // For all branches, calls and conditional branches to PC-relative
2614    :      // addresses, record a PC-relative reference.
2615    :      return CombineCallbackDirectives(directive,
2616    :          VisitPcRelativeFlowControlInstruction(instr_abs,
2617    :                                                instr_rel,
2618    :                                                instruction,
2619  E :                                                end_of_code));
2620    :    }
2621    :  
2622    :    // We explicitly handle indirect memory call instructions. These can often
2623    :    // be tracked down as pointing to a block in this image, or to an import
2624    :    // name thunk from another module.
2625  E :    if (fc == FC_CALL && instruction.ops[0].type == O_DISP) {
2626    :      return CombineCallbackDirectives(directive,
2627  E :          VisitIndirectMemoryCallInstruction(instruction, end_of_code));
2628    :    }
2629    :  
2630    :    // Look out for blocks where disassembly seems to run off the end of the
2631    :    // block. We do not treat interrupts as flow control as execution can
2632    :    // continue past the interrupt.
2633  E :    if (fc != FC_RET && fc != FC_UNC_BRANCH && end_of_code)
2634  E :      MarkDisassembledPastEnd();
2635    :  
2636  E :    return directive;
2637  E :  }
2638    :  
2639    :  bool Decomposer::CreatePEImageBlocksAndReferences(
2640  E :      PEFileParser::PEHeader* header) {
2641    :    PEFileParser::AddReferenceCallback add_reference(
2642  E :        base::Bind(&Decomposer::AddReferenceCallback, base::Unretained(this)));
2643  E :    PEFileParser parser(image_file_, image_, add_reference);
2644    :    parser.set_on_import_thunk(
2645  E :        base::Bind(&Decomposer::OnImportThunkCallback, base::Unretained(this)));
2646    :  
2647  E :    if (!parser.ParseImage(header)) {
2648  i :      LOG(ERROR) << "Unable to parse PE image.";
2649  i :      return false;
2650    :    }
2651    :  
2652  E :    return true;
2653  E :  }
2654    :  
2655  E :  bool Decomposer::FinalizeIntermediateReferences() {
2656  E :    IntermediateReferenceMap::const_iterator it(references_.begin());
2657  E :    IntermediateReferenceMap::const_iterator end(references_.end());
2658    :  
2659  E :    for (; it != end; ++it) {
2660  E :      RelativeAddress src_addr(it->first);
2661  E :      BlockGraph::Block* src = image_->GetBlockByAddress(src_addr);
2662  E :      RelativeAddress dst_base_addr(it->second.base);
2663  E :      RelativeAddress dst_addr(dst_base_addr + it->second.offset);
2664  E :      BlockGraph::Block* dst = image_->GetBlockByAddress(dst_base_addr);
2665    :  
2666  E :      if (src == NULL || dst == NULL) {
2667  i :        LOG(ERROR) << "Reference source or base destination address is out of "
2668    :                   << "range, src: " << src << ", dst: " << dst;
2669  i :        return false;
2670    :      }
2671    :  
2672  E :      RelativeAddress src_start = src->addr();
2673  E :      RelativeAddress dst_start = dst->addr();
2674    :  
2675    :      // Get the offset of the ultimate destination relative to the start of the
2676    :      // destination block.
2677  E :      BlockGraph::Offset dst_offset = dst_addr - dst_start;
2678    :  
2679    :      // Get the offset of the actual referenced object relative to the start of
2680    :      // the destination block.
2681  E :      BlockGraph::Offset dst_base = dst_base_addr - dst_start;
2682    :  
2683    :      BlockGraph::Reference ref(it->second.type,
2684    :                                it->second.size,
2685    :                                dst,
2686    :                                dst_offset,
2687  E :                                dst_base);
2688  E :      src->SetReference(src_addr - src_start, ref);
2689  E :    }
2690    :  
2691  E :    references_.clear();
2692    :  
2693  E :    return true;
2694  E :  }
2695    :  
2696  E :  bool Decomposer::ConfirmFixupsVisited() const {
2697  E :    bool success = true;
2698    :  
2699    :    // Ideally, all fixups should have been visited during decomposition.
2700    :    // TODO(chrisha): Address the root problems underlying the following
2701    :    //     temporary fix.
2702  E :    FixupMap::const_iterator fixup_it = fixup_map_.begin();
2703  E :    for (; fixup_it != fixup_map_.end(); ++fixup_it) {
2704  E :      if (fixup_it->second.visited)
2705  E :        continue;
2706    :  
2707    :      const BlockGraph::Block* block =
2708  E :          image_->GetContainingBlock(fixup_it->first, kPointerSize);
2709  E :      DCHECK(block != NULL);
2710    :  
2711    :      // We know that we currently do not have full disassembly coverage as there
2712    :      // are several orphaned pieces of apparently unreachable code in the CRT
2713    :      // that we do not disassemble, but which may contain jmp or call commands.
2714    :      // Thus, we expect that missed fixups are all PC-relative and lie within
2715    :      // code blocks.
2716    :      if (block->type() == BlockGraph::CODE_BLOCK &&
2717  E :          fixup_it->second.type == BlockGraph::PC_RELATIVE_REF)
2718  E :        continue;
2719    :  
2720  i :      success = false;
2721  i :      LOG(ERROR) << "Unexpected unseen fixup at " << fixup_it->second.location;
2722  i :    }
2723    :  
2724  E :    return success;
2725  E :  }
2726    :  
2727  E :  bool Decomposer::FindPaddingBlocks() {
2728  E :    DCHECK(image_ != NULL);
2729  E :    DCHECK(image_->graph() != NULL);
2730    :  
2731    :    BlockGraph::BlockMap::iterator block_it =
2732  E :        image_->graph()->blocks_mutable().begin();
2733  E :    for (; block_it != image_->graph()->blocks_mutable().end(); ++block_it) {
2734  E :      BlockGraph::Block& block = block_it->second;
2735    :  
2736    :      // Padding blocks must not have any symbol information: no labels,
2737    :      // no references, no referrers, and they must be a gap block.
2738    :      if (block.labels().size() != 0 ||
2739    :          block.references().size() != 0 ||
2740    :          block.referrers().size() != 0 ||
2741  E :          (block.attributes() & BlockGraph::GAP_BLOCK) == 0)
2742  E :        continue;
2743    :  
2744  E :      switch (block.type()) {
2745    :        // Code blocks should be fully defined and consist of only int3s.
2746    :        case BlockGraph::CODE_BLOCK: {
2747    :          if (block.data_size() != block.size() ||
2748  E :              RepeatedValue(block.data(), block.data_size()) != kInt3)
2749  i :            continue;
2750  E :          break;
2751    :        }
2752    :  
2753    :        // Data blocks should be uninitialized or have fully defined data
2754    :        // consisting only of zeros.
2755    :        default: {
2756  E :          DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
2757  E :          if (block.data_size() == 0)  // Uninitialized data blocks are padding.
2758  E :            break;
2759    :          if (block.data_size() != block.size() ||
2760  E :              RepeatedValue(block.data(), block.data_size()) != 0)
2761  i :            continue;
2762    :        }
2763    :      }
2764    :  
2765    :      // If we fall through to this point, then the block is a padding block.
2766  E :      block.set_attribute(BlockGraph::PADDING_BLOCK);
2767  E :    }
2768    :  
2769  E :    return true;
2770  E :  }
2771    :  
2772  E :  bool Decomposer::CreateSections() {
2773    :    // Iterate through the image sections, and create sections in the BlockGraph.
2774  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2775  E :    for (size_t i = 0; i < num_sections; ++i) {
2776  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2777  E :      std::string name = pe::PEFile::GetSectionName(*header);
2778    :      BlockGraph::Section* section = image_->graph()->AddSection(
2779  E :          name, header->Characteristics);
2780  E :      DCHECK(section != NULL);
2781    :  
2782    :      // For now, we expect them to have been created with the same IDs as those
2783    :      // in the original image.
2784  E :      if (section->id() != i) {
2785  i :        LOG(ERROR) << "Unexpected section ID.";
2786  i :        return false;
2787    :      }
2788  E :    }
2789    :  
2790  E :    return true;
2791  E :  }
2792    :  
2793  E :  bool Decomposer::LoadDebugStreams(IDiaSession* dia_session) {
2794  E :    DCHECK(dia_session != NULL);
2795    :  
2796    :    // Load the fixups. These must exist.
2797  E :    PdbFixups pdb_fixups;
2798    :    SearchResult search_result = FindAndLoadDiaDebugStreamByName(
2799  E :        kFixupDiaDebugStreamName, dia_session, &pdb_fixups);
2800  E :    if (search_result != kSearchSucceeded) {
2801  i :      if (search_result == kSearchFailed) {
2802  i :        LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
2803    :                      "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
2804    :      }
2805  i :      return false;
2806    :    }
2807    :  
2808    :    // Load the omap_from table. It is not necessary that one exist.
2809  E :    std::vector<OMAP> omap_from;
2810    :    search_result = FindAndLoadDiaDebugStreamByName(
2811  E :        kOmapFromDiaDebugStreamName, dia_session, &omap_from);
2812  E :    if (search_result == kSearchErrored)
2813  i :      return false;
2814    :  
2815    :    // Translate and validate fixups.
2816  E :    if (!OmapAndValidateFixups(omap_from, pdb_fixups))
2817  i :      return false;
2818    :  
2819  E :    return true;
2820  E :  }
2821    :  
2822    :  bool Decomposer::OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
2823  E :                                         const PdbFixups& pdb_fixups) {
2824  E :    bool have_omap = omap_from.size() != 0;
2825    :  
2826    :    // The resource section in Chrome is modified post-link by a tool that adds a
2827    :    // manifest to it. This causes all of the fixups in the resource section (and
2828    :    // anything beyond it) to be invalid. As long as the resource section is the
2829    :    // last section in the image, this is not a problem (we can safely ignore the
2830    :    // .rsrc fixups, which we know how to parse without them). However, if there
2831    :    // is a section after the resource section, things will have been shifted
2832    :    // and potentially crucial fixups will be invalid.
2833  E :    RelativeAddress rsrc_start(0xffffffff), max_start;
2834  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2835  E :    for (size_t i = 0; i < num_sections; ++i) {
2836  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2837  E :      RelativeAddress start(header->VirtualAddress);
2838  E :      if (start > max_start)
2839  E :        max_start = start;
2840    :      if (strncmp(kResourceSectionName,
2841    :                  reinterpret_cast<const char*>(header->Name),
2842  E :                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
2843  E :        rsrc_start = start;
2844  E :        break;
2845    :      }
2846  E :    }
2847    :  
2848    :    // Ensure there are no sections after the resource section.
2849  E :    if (max_start > rsrc_start) {
2850  i :      LOG(ERROR) << kResourceSectionName << " section is not the last section.";
2851  i :      return false;
2852    :    }
2853    :  
2854    :    // Ensure the fixups are all valid, and populate the fixup map.
2855  E :    for (size_t i = 0; i < pdb_fixups.size(); ++i) {
2856  E :      if (!pdb_fixups[i].ValidHeader()) {
2857  i :        LOG(ERROR) << "Unknown fixup header: "
2858    :                   << base::StringPrintf("0x%08X.", pdb_fixups[i].header);
2859  i :        return false;
2860    :      }
2861    :  
2862    :      // For now, we skip any offset fixups. We've only seen this in the context
2863    :      // of TLS data access, and we don't mess with TLS structures.
2864  E :      if (pdb_fixups[i].is_offset())
2865  E :        continue;
2866    :  
2867    :      // All fixups we handle should be full size pointers.
2868  E :      DCHECK_EQ(kPointerSize, pdb_fixups[i].size());
2869    :  
2870    :      // Get the original addresses, and map them through OMAP information.
2871    :      // Normally DIA takes care of this for us, but there is no API for
2872    :      // getting DIA to give us FIXUP information, so we have to do it manually.
2873  E :      RelativeAddress rva_location(pdb_fixups[i].rva_location);
2874  E :      RelativeAddress rva_base(pdb_fixups[i].rva_base);
2875  E :      if (have_omap) {
2876  i :        rva_location = pdb::TranslateAddressViaOmap(omap_from, rva_location);
2877  i :        rva_base = pdb::TranslateAddressViaOmap(omap_from, rva_base);
2878    :      }
2879    :  
2880    :      // If these are part of the .rsrc section, ignore them.
2881  E :      if (rva_location >= rsrc_start)
2882  E :        continue;
2883    :  
2884    :      // Ensure they live within the image, and refer to things within the
2885    :      // image.
2886    :      if (!image_file_.Contains(rva_location, kPointerSize) ||
2887  E :          !image_file_.Contains(rva_base, 1)) {
2888  i :        LOG(ERROR) << "Fixup refers to addresses outside of image.";
2889  i :        return false;
2890    :      }
2891    :  
2892    :      // Add the fix up, and ensure the source address is unique.
2893  E :      Fixup fixup = { PdbFixupTypeToReferenceType(pdb_fixups[i].type),
2894  E :                      pdb_fixups[i].refers_to_code(),
2895  E :                      pdb_fixups[i].is_data(),
2896  E :                      false,
2897  E :                      rva_location,
2898  E :                      rva_base };
2899  E :      bool added = fixup_map_.insert(std::make_pair(rva_location, fixup)).second;
2900  E :      if (!added) {
2901  i :        LOG(ERROR) << "Colliding fixups at " << rva_location;
2902  i :        return false;
2903    :      }
2904  E :    }
2905    :  
2906  E :    return true;
2907  E :  }
2908    :  
2909    :  bool Decomposer::RegisterStaticInitializerPatterns(
2910  E :      const base::StringPiece& begin, const base::StringPiece& end) {
2911    :    // Ensuring the patterns each have exactly one capturing group.
2912    :    REPair re_pair = std::make_pair(RE(begin.as_string()),
2913  E :                                    RE(end.as_string()));
2914    :    if (re_pair.first.NumberOfCapturingGroups() != 1 ||
2915  E :        re_pair.second.NumberOfCapturingGroups() != 1)
2916  i :      return false;
2917    :  
2918  E :    static_initializer_patterns_.push_back(re_pair);
2919    :  
2920  E :    return true;
2921  E :  }
2922    :  
2923    :  bool Decomposer::RegisterNonReturningFunction(
2924  E :      const base::StringPiece& function_name) {
2925  E :    return non_returning_functions_.insert(function_name.as_string()).second;
2926  E :  }
2927    :  
2928    :  bool Decomposer::RegisterNonReturningImport(
2929    :      const base::StringPiece& module_name,
2930  E :      const base::StringPiece& function_name) {
2931  E :    StringSet& module_set = non_returning_imports_[module_name.as_string()];
2932  E :    return module_set.insert(function_name.as_string()).second;
2933  E :  }
2934    :  
2935    :  bool Decomposer::LoadBlockGraphFromPdbStream(const PEFile& image_file,
2936    :                                               pdb::PdbStream* block_graph_stream,
2937  E :                                               ImageLayout* image_layout) {
2938  E :    DCHECK(block_graph_stream != NULL);
2939  E :    DCHECK(image_layout != NULL);
2940  E :    LOG(INFO) << "Reading block-graph and image layout from the PDB.";
2941    :  
2942    :    // Initialize an input archive pointing to the stream.
2943  E :    scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
2944  E :    if (!byte_stream->Init(block_graph_stream))
2945  i :      return false;
2946  E :    DCHECK(byte_stream.get() != NULL);
2947    :  
2948  E :    core::ScopedInStreamPtr pdb_in_stream;
2949    :    pdb_in_stream.reset(core::CreateByteInStream(
2950  E :        byte_stream->data(), byte_stream->data() + byte_stream->length()));
2951    :  
2952    :    // Read the header.
2953  E :    uint32 stream_version = 0;
2954  E :    unsigned char compressed = 0;
2955    :    if (!pdb_in_stream->Read(sizeof(stream_version),
2956    :                             reinterpret_cast<core::Byte*>(&stream_version)) ||
2957    :        !pdb_in_stream->Read(sizeof(compressed),
2958  E :                             reinterpret_cast<core::Byte*>(&compressed))) {
2959  i :      LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
2960  i :      return false;
2961    :    }
2962    :  
2963    :    // Check the stream version.
2964  E :    if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
2965  E :      LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
2966    :                 << " version (got " << stream_version << ", expected "
2967    :                 << pdb::kSyzygyBlockGraphStreamVersion << ").";
2968  E :      return false;
2969    :    }
2970    :  
2971    :    // If the stream is compressed insert the decompression filter.
2972  E :    core::InStream* in_stream = pdb_in_stream.get();
2973  E :    scoped_ptr<core::ZInStream> zip_in_stream;
2974  E :    if (compressed != 0) {
2975  E :      zip_in_stream.reset(new core::ZInStream(in_stream));
2976  E :      if (!zip_in_stream->Init()) {
2977  i :        LOG(ERROR) << "Unable to initialize ZInStream.";
2978  i :        return false;
2979    :      }
2980  E :      in_stream = zip_in_stream.get();
2981    :    }
2982    :  
2983    :    // Deserialize the image-layout.
2984  E :    core::NativeBinaryInArchive in_archive(in_stream);
2985  E :    block_graph::BlockGraphSerializer::Attributes attributes = 0;
2986    :    if (!LoadBlockGraphAndImageLayout(
2987  E :        image_file, &attributes, image_layout, &in_archive)) {
2988  i :      LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
2989  i :      return false;
2990    :    }
2991    :  
2992  E :    return true;
2993  E :  }
2994    :  
2995    :  bool Decomposer::LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
2996    :                                         const PEFile& image_file,
2997    :                                         ImageLayout* image_layout,
2998  E :                                         bool* stream_exists) {
2999  E :    DCHECK(image_layout != NULL);
3000  E :    DCHECK(stream_exists != NULL);
3001    :  
3002  E :    pdb::PdbFile pdb_file;
3003  E :    pdb::PdbReader pdb_reader;
3004  E :    if (!pdb_reader.Read(pdb_path, &pdb_file)) {
3005  i :      LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
3006    :                 << "\".";
3007  i :      return NULL;
3008    :    }
3009    :  
3010    :    // Try to get the block-graph stream from the PDB.
3011    :    scoped_refptr<pdb::PdbStream> block_graph_stream =
3012  E :        GetBlockGraphStreamFromPdb(&pdb_file);
3013  E :    if (block_graph_stream.get() == NULL) {
3014  E :      *stream_exists = false;
3015  E :      return false;
3016    :    }
3017    :  
3018    :    // The PDB contains a block-graph stream, the block-graph and the image layout
3019    :    // will be read from this stream.
3020  E :    *stream_exists = true;
3021    :    if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
3022  E :                                     image_layout)) {
3023  i :      return false;
3024    :    }
3025    :  
3026  E :    return true;
3027  E :  }
3028    :  
3029    :  scoped_refptr<pdb::PdbStream> Decomposer::GetBlockGraphStreamFromPdb(
3030  E :      pdb::PdbFile* pdb_file) {
3031  E :    scoped_refptr<pdb::PdbStream> block_graph_stream;
3032    :    // Get the PDB header and try to get the block-graph ID stream from it.
3033  E :    pdb::PdbInfoHeader70 pdb_header = {0};
3034  E :    pdb::NameStreamMap name_stream_map;
3035    :    if (!ReadHeaderInfoStream(pdb_file->GetStream(pdb::kPdbHeaderInfoStream),
3036    :                             &pdb_header,
3037  E :                             &name_stream_map)) {
3038  i :      LOG(ERROR) << "Failed to read header info stream.";
3039  i :      return block_graph_stream;
3040    :    }
3041    :    pdb::NameStreamMap::const_iterator name_it = name_stream_map.find(
3042  E :        pdb::kSyzygyBlockGraphStreamName);
3043  E :    if (name_it == name_stream_map.end()) {
3044  E :      return block_graph_stream;
3045    :    }
3046    :  
3047    :    // Get the block-graph stream and ensure that it's not empty.
3048  E :    block_graph_stream = pdb_file->GetStream(name_it->second);
3049  E :    if (block_graph_stream.get() == NULL) {
3050  i :      LOG(ERROR) << "Failed to read the block-graph stream from the PDB.";
3051  i :      return block_graph_stream;
3052    :    }
3053  E :    if (block_graph_stream->length() == 0) {
3054  i :      LOG(ERROR) << "The block-graph stream is empty.";
3055  i :      return block_graph_stream;
3056    :    }
3057    :  
3058  E :    return block_graph_stream;
3059  E :  }
3060    :  
3061    :  bool Decomposer::OnImportThunkCallback(const char* module_name,
3062    :                                         const char* symbol_name,
3063  E :                                         BlockGraph::Block* thunk) {
3064  E :    DCHECK(module_name != NULL);
3065  E :    DCHECK(symbol_name != NULL);
3066  E :    DCHECK(thunk != NULL);
3067    :  
3068    :    // Look for the module first.
3069    :    StringSetMap::const_iterator module_it =
3070  E :        non_returning_imports_.find(std::string(module_name));
3071  E :    if (module_it == non_returning_imports_.end())
3072  E :      return true;
3073    :  
3074    :    // Look for the symbol within the module.
3075  E :    if (module_it->second.count(std::string(symbol_name)) == 0)
3076  E :      return true;
3077    :  
3078    :    // If we get here then the imported symbol is found. Decorate the thunk.
3079  E :    thunk->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
3080  E :    VLOG(1) << "Forcing non-returning attribute on imported symbol \""
3081    :            << symbol_name << "\" from module \"" << module_name << "\".";
3082    :  
3083  E :    return true;
3084  E :  }
3085    :  
3086    :  }  // namespace pe

Coverage information generated Wed Dec 11 11:34:16 2013.