Coverage for /Syzygy/pe/decomposer.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
79.7%117914800.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/pe/decomposer.h"
  16    :  
  17    :  #include <cvconst.h>
  18    :  #include <algorithm>
  19    :  
  20    :  #include "base/bind.h"
  21    :  #include "base/logging.h"
  22    :  #include "base/path_service.h"
  23    :  #include "base/string_util.h"
  24    :  #include "base/stringprintf.h"
  25    :  #include "base/utf_string_conversions.h"
  26    :  #include "base/files/file_path.h"
  27    :  #include "base/memory/scoped_ptr.h"
  28    :  #include "base/win/scoped_bstr.h"
  29    :  #include "base/win/scoped_comptr.h"
  30    :  #include "sawbuck/common/com_utils.h"
  31    :  #include "sawbuck/sym_util/types.h"
  32    :  #include "syzygy/block_graph/block_util.h"
  33    :  #include "syzygy/block_graph/typed_block.h"
  34    :  #include "syzygy/core/disassembler_util.h"
  35    :  #include "syzygy/core/zstream.h"
  36    :  #include "syzygy/pdb/omap.h"
  37    :  #include "syzygy/pdb/pdb_byte_stream.h"
  38    :  #include "syzygy/pdb/pdb_util.h"
  39    :  #include "syzygy/pe/dia_util.h"
  40    :  #include "syzygy/pe/find.h"
  41    :  #include "syzygy/pe/metadata.h"
  42    :  #include "syzygy/pe/pdb_info.h"
  43    :  #include "syzygy/pe/pe_file_parser.h"
  44    :  #include "syzygy/pe/pe_utils.h"
  45    :  #include "syzygy/pe/serialization.h"
  46    :  
  47    :  namespace pe {
  48    :  namespace {
  49    :  
  50    :  using base::win::ScopedBstr;
  51    :  using base::win::ScopedComPtr;
  52    :  using block_graph::BlockGraph;
  53    :  using block_graph::ConstTypedBlock;
  54    :  using builder::Opt;
  55    :  using builder::Seq;
  56    :  using builder::Star;
  57    :  using core::AbsoluteAddress;
  58    :  using core::Disassembler;
  59    :  using core::RelativeAddress;
  60    :  
  61    :  typedef Disassembler::CallbackDirective CallbackDirective;
  62    :  
  63    :  const size_t kPointerSize = sizeof(AbsoluteAddress);
  64    :  
  65    :  // Converts from PdbFixup::Type to BlockGraph::ReferenceType.
  66    :  BlockGraph::ReferenceType PdbFixupTypeToReferenceType(
  67  E :      pdb::PdbFixup::Type type) {
  68  E :    switch (type) {
  69    :      case pdb::PdbFixup::TYPE_ABSOLUTE:
  70  E :        return BlockGraph::ABSOLUTE_REF;
  71    :  
  72    :      case pdb::PdbFixup::TYPE_RELATIVE:
  73  E :        return BlockGraph::RELATIVE_REF;
  74    :  
  75    :      case pdb::PdbFixup::TYPE_PC_RELATIVE:
  76  E :        return BlockGraph::PC_RELATIVE_REF;
  77    :  
  78    :      default:
  79  i :        NOTREACHED() << "Invalid PdbFixup::Type.";
  80    :        // The return type here is meaningless.
  81  i :        return BlockGraph::ABSOLUTE_REF;
  82    :    }
  83  E :  }
  84    :  
  85    :  // Adds a reference to the provided intermediate reference map. If one already
  86    :  // exists, will validate that they are consistent.
  87    :  bool AddReference(RelativeAddress src_addr,
  88    :                    BlockGraph::ReferenceType type,
  89    :                    BlockGraph::Size size,
  90    :                    RelativeAddress dst_base,
  91    :                    BlockGraph::Offset dst_offset,
  92  E :                    Decomposer::IntermediateReferenceMap* references) {
  93  E :    DCHECK(references != NULL);
  94    :  
  95    :    // If we get an iterator to a reference and it has the same source address
  96    :    // then ensure that we are consistent with it.
  97    :    Decomposer::IntermediateReferenceMap::iterator it =
  98  E :        references->lower_bound(src_addr);
  99  E :    if (it != references->end() && it->first == src_addr) {
 100    :      if (type != it->second.type || size != it->second.size ||
 101  E :          dst_base != it->second.base || dst_offset != it->second.offset) {
 102  i :        LOG(ERROR) << "Trying to insert inconsistent and colliding intermediate "
 103    :                      "references.";
 104  i :        return false;
 105    :      }
 106    :    }
 107    :  
 108  E :    Decomposer::IntermediateReference ref = { type,
 109  E :                                              size,
 110  E :                                              dst_base,
 111  E :                                              dst_offset };
 112    :  
 113    :    // Since we used lower_bound above, we can use it as a hint for the
 114    :    // insertion. This saves us from incurring the lookup cost twice.
 115  E :    references->insert(it, std::make_pair(src_addr, ref));
 116  E :    return true;
 117  E :  }
 118    :  
 119    :  // Validates the given reference against the given fixup map entry. If they
 120    :  // are consistent, marks the fixup as having been visited.
 121    :  bool ValidateReference(RelativeAddress src_addr,
 122    :                         BlockGraph::ReferenceType type,
 123    :                         BlockGraph::Size size,
 124  E :                         Decomposer::FixupMap::iterator fixup_it) {
 125  E :    if (type != fixup_it->second.type || size != kPointerSize) {
 126  i :      LOG(ERROR) << "Reference at " << src_addr
 127    :                 << " not consistent with corresponding fixup.";
 128  i :      return false;
 129    :    }
 130    :  
 131    :    // Mark this fixup as having been visited.
 132  E :    fixup_it->second.visited = true;
 133    :  
 134  E :    return true;
 135  E :  }
 136    :  
 137    :  enum ValidateOrAddReferenceMode {
 138    :    // Look for an existing fixup. If we find one, validate against it,
 139    :    // otherwise create a new intermediate reference.
 140    :    FIXUP_MAY_EXIST,
 141    :    // Compare against an existing fixup, bailing if there is none. Does not
 142    :    // create a new intermediate reference.
 143    :    FIXUP_MUST_EXIST,
 144    :    // Look for an existing fixup, and fail if one exists. Otherwise, create
 145    :    // a new intermediate reference.
 146    :    FIXUP_MUST_NOT_EXIST
 147    :  };
 148    :  bool ValidateOrAddReference(ValidateOrAddReferenceMode mode,
 149    :                              RelativeAddress src_addr,
 150    :                              BlockGraph::ReferenceType type,
 151    :                              BlockGraph::Size size,
 152    :                              RelativeAddress dst_base,
 153    :                              BlockGraph::Offset dst_offset,
 154    :                              Decomposer::FixupMap* fixup_map,
 155  E :                              Decomposer::IntermediateReferenceMap* references) {
 156  E :    DCHECK(fixup_map != NULL);
 157  E :    DCHECK(references != NULL);
 158    :  
 159  E :    Decomposer::FixupMap::iterator it = fixup_map->find(src_addr);
 160    :  
 161  E :    switch (mode) {
 162    :      case FIXUP_MAY_EXIST: {
 163    :        if (it != fixup_map->end() &&
 164  E :            !ValidateReference(src_addr, type, size, it))
 165  i :          return false;
 166    :        return AddReference(src_addr, type, size, dst_base, dst_offset,
 167  E :                            references);
 168    :      }
 169    :  
 170    :      case FIXUP_MUST_EXIST: {
 171  E :        if (it == fixup_map->end()) {
 172  i :          LOG(ERROR) << "Reference at " << src_addr << " has no matching fixup.";
 173  i :          return false;
 174    :        }
 175  E :        if (!ValidateReference(src_addr, type, size, it))
 176  i :          return false;
 177    :        // Do not create a new intermediate reference.
 178  E :        return true;
 179    :      }
 180    :  
 181    :      case FIXUP_MUST_NOT_EXIST: {
 182  E :        if (it != fixup_map->end()) {
 183  i :          LOG(ERROR) << "Reference at " << src_addr
 184    :                     << " collides with an existing fixup.";
 185  i :          return false;
 186    :        }
 187    :        return AddReference(src_addr, type, size, dst_base, dst_offset,
 188  E :                            references);
 189    :      }
 190    :  
 191    :      default: {
 192  i :        NOTREACHED() << "Invalid ValidateOrAddReferenceMode.";
 193  i :        return false;
 194    :      }
 195    :    }
 196  E :  }
 197    :  
 198  E :  bool GetTypeInfo(IDiaSymbol* symbol, size_t* length) {
 199  E :    DCHECK(symbol != NULL);
 200  E :    DCHECK(length != NULL);
 201    :  
 202  E :    *length = 0;
 203  E :    ScopedComPtr<IDiaSymbol> type;
 204  E :    HRESULT hr = symbol->get_type(type.Receive());
 205    :    // This happens if the symbol has no type information.
 206  E :    if (hr == S_FALSE)
 207  E :      return true;
 208  E :    if (hr != S_OK) {
 209  i :      LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
 210  i :      return false;
 211    :    }
 212    :  
 213  E :    ULONGLONG ull_length = 0;
 214  E :    hr = type->get_length(&ull_length);
 215  E :    if (hr != S_OK) {
 216  i :      LOG(ERROR) << "Failed to retrieve type length properties: "
 217    :                 << com::LogHr(hr) << ".";
 218  i :      return false;
 219    :    }
 220  E :    *length = ull_length;
 221    :  
 222  E :    return true;
 223  E :  }
 224    :  
 225    :  enum SectionType {
 226    :    kSectionCode,
 227    :    kSectionData,
 228    :    kSectionUnknown
 229    :  };
 230    :  
 231  E :  SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
 232  E :    DCHECK(header != NULL);
 233  E :    if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
 234  E :      return kSectionCode;
 235  E :    if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
 236  E :      return kSectionData;
 237  i :    return kSectionUnknown;
 238  E :  }
 239    :  
 240  E :  void GuessDataBlockAlignment(BlockGraph::Block* block, uint32 max_alignment) {
 241  E :    DCHECK(block != NULL);
 242  E :    uint32 alignment = block->addr().GetAlignment();
 243    :    // Cap the alignment.
 244  E :    if (alignment > max_alignment)
 245  E :      alignment = max_alignment;
 246  E :    block->set_alignment(alignment);
 247  E :  }
 248    :  
 249    :  bool AreMatchedBlockAndLabelAttributes(
 250    :      BlockGraph::BlockType bt,
 251    :      BlockGraph::LabelAttributes la) {
 252    :    return (bt == BlockGraph::CODE_BLOCK && (la & BlockGraph::CODE_LABEL) != 0) ||
 253    :        (bt == BlockGraph::DATA_BLOCK && (la & BlockGraph::DATA_LABEL) != 0);
 254    :  }
 255    :  
 256  E :  BlockGraph::LabelAttributes SymTagToLabelAttributes(enum SymTagEnum sym_tag) {
 257  E :    switch (sym_tag) {
 258    :      case SymTagData:
 259  E :        return BlockGraph::DATA_LABEL;
 260    :      case SymTagLabel:
 261  E :        return BlockGraph::CODE_LABEL;
 262    :      case SymTagFuncDebugStart:
 263  E :        return BlockGraph::DEBUG_START_LABEL;
 264    :      case SymTagFuncDebugEnd:
 265  E :        return BlockGraph::DEBUG_END_LABEL;
 266    :      case SymTagBlock:
 267  E :        return BlockGraph::SCOPE_START_LABEL;
 268    :      case SymTagCallSite:
 269  E :        return BlockGraph::CALL_SITE_LABEL;
 270    :    }
 271    :  
 272  i :    NOTREACHED();
 273  i :    return 0;
 274  E :  }
 275    :  
 276    :  bool AddLabelToBlock(RelativeAddress addr,
 277    :                       const base::StringPiece& name,
 278    :                       BlockGraph::LabelAttributes label_attributes,
 279  E :                       BlockGraph::Block* block) {
 280  E :    DCHECK(block != NULL);
 281  E :    DCHECK_LE(block->addr(), addr);
 282  E :    DCHECK_GT(block->addr() + block->size(), addr);
 283    :  
 284  E :    BlockGraph::Offset offset = addr - block->addr();
 285    :  
 286    :    // Try to create the label.
 287  E :    if (block->SetLabel(offset, name, label_attributes)) {
 288    :      // If there was no label at offset 0, then this block has not yet been
 289    :      // renamed, and still has its section contribution as a name. Update it to
 290    :      // the first symbol we get for it. We parse symbols from most useful
 291    :      // (undecorated function names) to least useful (mangled public symbols), so
 292    :      // this ensures a block has the most useful name.
 293  E :      if (offset == 0)
 294  E :        block->set_name(name);
 295    :  
 296  E :      return true;
 297    :    }
 298    :  
 299    :    // If we get here there's an already existing label. Update it.
 300  E :    BlockGraph::Label label;
 301  E :    CHECK(block->GetLabel(offset, &label));
 302    :  
 303    :    // It is conceivable that there could be more than one scope with either the
 304    :    // same beginning or the same ending. However, this doesn't appear to happen
 305    :    // in any version of Chrome up to 20. We add this check so that we'd at least
 306    :    // be made aware of the situation. (We don't rely on these labels, so we
 307    :    // merely output a warning rather than an error.)
 308    :    {
 309    :      const BlockGraph::LabelAttributes kScopeAttributes =
 310    :          BlockGraph::SCOPE_START_LABEL |
 311  E :          BlockGraph::SCOPE_END_LABEL;
 312    :      BlockGraph::LabelAttributes scope_attributes =
 313  E :          label_attributes & kScopeAttributes;
 314  E :      if (scope_attributes != 0) {
 315  E :        if (label.has_any_attributes(scope_attributes)) {
 316  i :          LOG(WARNING) << "Detected colliding scope labels at offset "
 317    :                       << offset << " of block \"" << block->name() << "\".";
 318    :        }
 319    :      }
 320    :    }
 321    :  
 322    :    // Merge the names if this isn't a repeated name.
 323  E :    std::string new_name = label.name();
 324  E :    if (new_name.find(name.data()) == new_name.npos) {
 325  E :      new_name.append(", ");
 326  E :      name.AppendToString(&new_name);
 327    :    }
 328    :  
 329    :    // Merge the attributes.
 330    :    BlockGraph::LabelAttributes new_label_attr = label.attributes() |
 331  E :        label_attributes;
 332  E :    if (!BlockGraph::Label::AreValidAttributes(new_label_attr)) {
 333    :      // It's not clear which attributes should be the winner here, so we log an
 334    :      // error.
 335  i :      LOG(ERROR) << "Trying to merge conflicting label attributes \""
 336    :                 << BlockGraph::LabelAttributesToString(label_attributes)
 337    :                 << "\" for label \"" << label.ToString() << "\" at offset "
 338    :                 << offset << " of block \"" << block->name() << "\".";
 339  i :      return false;
 340    :    }
 341    :  
 342    :    // Update the label.
 343  E :    label = BlockGraph::Label(new_name, new_label_attr);
 344  E :    CHECK(block->RemoveLabel(offset));
 345  E :    CHECK(block->SetLabel(offset, label));
 346    :  
 347  E :    return true;
 348  E :  }
 349    :  
 350    :  // The MS linker pads between code blocks with int3s.
 351    :  static const uint8 kInt3 = 0xCC;
 352    :  
 353    :  // If the given run of bytes consists of a single value repeated, returns that
 354    :  // value. Otherwise, returns -1.
 355  E :  int RepeatedValue(const uint8* data, size_t size) {
 356  E :    DCHECK(data != NULL);
 357  E :    const uint8* data_end = data + size;
 358  E :    uint8 value = *(data++);
 359  E :    for (; data < data_end; ++data) {
 360  E :      if (*data != value)
 361  i :        return -1;
 362  E :    }
 363  E :    return value;
 364  E :  }
 365    :  
 366    :  const BlockGraph::BlockId kNullBlockId(-1);
 367    :  
 368    :  void GetDisassemblyStartingPoints(
 369    :      const BlockGraph::Block* block,
 370    :      AbsoluteAddress abs_block_addr,
 371    :      const PEFile::RelocSet& reloc_set,
 372  E :      Disassembler::AddressSet* addresses) {
 373  E :    DCHECK(block != NULL);
 374  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 375  E :    DCHECK(addresses != NULL);
 376    :  
 377  E :    addresses->clear();
 378    :  
 379    :    // Use code labels as starting points.
 380  E :    BlockGraph::Block::LabelMap::const_iterator it(block->labels().begin());
 381  E :    for (; it != block->labels().end(); ++it) {
 382  E :      BlockGraph::Offset offset = it->first;
 383  E :      DCHECK_LE(0, offset);
 384  E :      DCHECK_GT(block->size(), static_cast<size_t>(offset));
 385    :  
 386  E :      if (it->second.has_attributes(BlockGraph::CODE_LABEL)) {
 387    :        // We sometimes receive code labels that land on lookup tables; we can
 388    :        // detect these because the label will point directly to a reloc. These
 389    :        // should have already been marked as data by now. DCHECK to validate.
 390    :        // TODO(chrisha): Get rid of this DCHECK, and allow mixed CODE and DATA
 391    :        //     labels. Simply only use ones that are DATA only.
 392  E :        DCHECK_EQ(0u, reloc_set.count(block->addr() + offset));
 393    :  
 394  E :        addresses->insert(abs_block_addr + offset);
 395    :      }
 396  E :    }
 397  E :  }
 398    :  
 399    :  // Determines if the provided code block has the expected layout of code first,
 400    :  // data second. Returns true if so, false otherwise. Also returns the size of
 401    :  // the code portion of the block by trimming off any data labels.
 402    :  bool BlockHasExpectedCodeDataLayout(const BlockGraph::Block* block,
 403  E :                                      size_t* code_size) {
 404  E :    DCHECK(block != NULL);
 405  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 406  E :    DCHECK(code_size != NULL);
 407    :  
 408  E :    *code_size = block->data_size();
 409    :  
 410    :    BlockGraph::Block::LabelMap::const_reverse_iterator label_it =
 411  E :        block->labels().rbegin();
 412    :    BlockGraph::Block::LabelMap::const_reverse_iterator label_end =
 413  E :        block->labels().rend();
 414    :  
 415  E :    bool seen_non_data = false;
 416    :  
 417    :    // Walk through the labels in reverse order (by decreasing offset). Trim
 418    :    // any data labels from this blocks data_size.
 419  E :    for (; label_it != label_end; ++label_it) {
 420  E :      if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) {
 421    :        // We've encountered data not strictly at the end of the block. This
 422    :        // violates assumptions about code generated by cl.exe.
 423  E :        if (seen_non_data)
 424  E :          return false;
 425    :  
 426    :        // Otherwise, we're still in a run of data labels at the tail of the
 427    :        // block. Keep trimming the code size.
 428  E :        size_t offset = static_cast<size_t>(label_it->first);
 429  E :        if (offset < *code_size)
 430  E :          *code_size = offset;
 431  E :      } else {
 432  E :        seen_non_data = true;
 433    :      }
 434  E :    }
 435    :  
 436  E :    return true;
 437  E :  }
 438    :  
 439    :  // Given a compiland, returns its compiland details.
 440    :  bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
 441  E :                                       IDiaSymbol** compiland_details) {
 442  E :    DCHECK(compiland != NULL);
 443  E :    DCHECK(compiland_details != NULL);
 444  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 445    :  
 446  E :    *compiland_details = NULL;
 447    :  
 448    :    // Get the enumeration of compiland details.
 449  E :    ScopedComPtr<IDiaEnumSymbols> enum_symbols;
 450    :    HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
 451  E :                                         enum_symbols.Receive());
 452  E :    DCHECK_EQ(S_OK, hr);
 453    :  
 454    :    // We expect there to be compiland details. For compilands built by
 455    :    // non-standard toolchains, there usually aren't any.
 456  E :    LONG count = 0;
 457  E :    hr = enum_symbols->get_Count(&count);
 458  E :    DCHECK_EQ(S_OK, hr);
 459  E :    if (count == 0)
 460  i :      return false;
 461    :  
 462    :    // Get the compiland details.
 463  E :    ULONG fetched = 0;
 464  E :    hr = enum_symbols->Next(1, compiland_details, &fetched);
 465  E :    DCHECK_EQ(S_OK, hr);
 466  E :    DCHECK_EQ(1u, fetched);
 467  E :    return true;
 468  E :  }
 469    :  
 470    :  // Stores information regarding known compilers.
 471    :  struct KnownCompilerInfo {
 472    :    wchar_t* compiler_name;
 473    :    bool supported;
 474    :  };
 475    :  
 476    :  // A list of known compilers, and their status as being supported or not.
 477    :  KnownCompilerInfo kKnownCompilerInfos[] = {
 478    :    { L"Microsoft (R) Macro Assembler", false },
 479    :    { L"Microsoft (R) Optimizing Compiler", true },
 480    :    { L"Microsoft (R) LINK", false }
 481    :  };
 482    :  
 483    :  // Given a compiland, determines whether the compiler used is one of those that
 484    :  // we whitelist.
 485  E :  bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
 486  E :    DCHECK(compiland != NULL);
 487  E :    DCHECK(IsSymTag(compiland, SymTagCompiland));
 488    :  
 489  E :    ScopedComPtr<IDiaSymbol> compiland_details;
 490    :    if (!GetCompilandDetailsForCompiland(compiland,
 491  E :                                         compiland_details.Receive())) {
 492    :      // If the compiland has no compiland details we assume the compiler is not
 493    :      // supported.
 494  i :      ScopedBstr compiland_name;
 495  i :      if (compiland->get_name(compiland_name.Receive()) == S_OK) {
 496  i :        VLOG(1) << "Compiland has no compiland details: "
 497    :                << com::ToString(compiland_name);
 498    :      }
 499  i :      return false;
 500    :    }
 501  E :    DCHECK(compiland_details.get() != NULL);
 502    :  
 503    :    // Get the compiler name.
 504  E :    ScopedBstr compiler_name;
 505  E :    HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
 506  E :    DCHECK_EQ(S_OK, hr);
 507    :  
 508    :    // Check the compiler name against the list of known compilers.
 509  E :    for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
 510  E :      if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
 511  E :        return kKnownCompilerInfos[i].supported;
 512    :      }
 513  E :    }
 514    :  
 515    :    // Anything we don't explicitly know about is not supported.
 516  E :    VLOG(1) << "Encountered unknown compiler: " << compiler_name;
 517  E :    return false;
 518  E :  }
 519    :  
 520    :  // Logs an error if @p error is true, a verbose logging message otherwise.
 521    :  #define LOG_ERROR_OR_VLOG1(error) LAZY_STREAM( \
 522    :      ::logging::LogMessage(__FILE__, \
 523    :                            __LINE__, \
 524    :                            (error) ? ::logging::LOG_ERROR : -1).stream(), \
 525    :      (error ? LOG_IS_ON(ERROR) : VLOG_IS_ON(1)))
 526    :  
 527    :  // Logs a warning if @p warn is true, a verbose logging message otherwise.
 528    :  #define LOG_WARNING_OR_VLOG1(warn) LAZY_STREAM( \
 529    :      ::logging::LogMessage(__FILE__, \
 530    :                            __LINE__, \
 531    :                            (warn) ? ::logging::LOG_WARNING : -1).stream(), \
 532    :      (warn ? LOG_IS_ON(WARNING) : VLOG_IS_ON(1)))
 533    :  
 534    :  // Sets the disassembler directive to an error if @p strict is true, otherwise
 535    :  // sets it to an early termination.
 536  E :  CallbackDirective AbortOrTerminateDisassembly(bool strict) {
 537  E :    if (strict)
 538  i :      return Disassembler::kDirectiveAbort;
 539  i :    else
 540  E :      return Disassembler::kDirectiveTerminateWalk;
 541  E :  }
 542    :  
 543    :  // Returns true if the callback-directive is an early termination that should be
 544    :  // returned immediately.
 545  E :  bool IsFatalCallbackDirective(CallbackDirective directive) {
 546  E :    switch (directive) {
 547    :      case Disassembler::kDirectiveContinue:
 548    :      case Disassembler::kDirectiveTerminatePath:
 549  E :        return false;
 550    :  
 551    :      case Disassembler::kDirectiveTerminateWalk:
 552    :      case Disassembler::kDirectiveAbort:
 553  i :        return true;
 554    :  
 555    :      default:
 556  i :        NOTREACHED();
 557    :    }
 558    :  
 559  i :    return true;
 560  E :  }
 561    :  
 562    :  // Combines two callback directives. Higher codes supersede lower ones.
 563    :  CallbackDirective CombineCallbackDirectives(CallbackDirective d1,
 564  E :                                              CallbackDirective d2) {
 565    :    // This ensures that this logic remains valid. This should prevent people
 566    :    // from tinkering with CallbackDirective and breaking this code.
 567    :    COMPILE_ASSERT(Disassembler::kDirectiveContinue <
 568    :                       Disassembler::kDirectiveTerminatePath &&
 569    :                   Disassembler::kDirectiveTerminatePath <
 570    :                       Disassembler::kDirectiveTerminateWalk &&
 571    :                   Disassembler::kDirectiveTerminateWalk <
 572    :                       Disassembler::kDirectiveAbort,
 573    :                   callback_directive_enum_is_not_sorted);
 574  E :    return std::max(d1, d2);
 575  E :  }
 576    :  
 577    :  // Determines if the given block has a data label in the given range of bytes.
 578    :  bool HasDataLabelInRange(const BlockGraph::Block* block,
 579    :                           BlockGraph::Offset offset,
 580  E :                           BlockGraph::Size size) {
 581    :    BlockGraph::Block::LabelMap::const_iterator it =
 582  E :        block->labels().lower_bound(offset);
 583    :    BlockGraph::Block::LabelMap::const_iterator end =
 584  E :        block->labels().lower_bound(offset + size);
 585    :  
 586  E :    for (; it != end; ++it) {
 587  i :      if (it->second.has_attributes(BlockGraph::DATA_LABEL))
 588  i :        return true;
 589  i :    }
 590    :  
 591  E :    return false;
 592  E :  }
 593    :  
 594    :  void ReportPotentialNonReturningFunction(
 595    :      const Decomposer::IntermediateReferenceMap& refs,
 596    :      const BlockGraph::AddressSpace& image,
 597    :      const BlockGraph::Block* block,
 598    :      BlockGraph::Offset call_ref_offset,
 599  E :      const char* reason) {
 600    :    typedef Decomposer::IntermediateReferenceMap::const_iterator RefIter;
 601    :  
 602    :    // Try and track down the block being pointed at by the call. If this is a
 603    :    // computed address there will be no reference.
 604  E :    RefIter ref_it = refs.find(block->addr() + call_ref_offset);
 605  E :    if (ref_it == refs.end()) {
 606  i :      LOG(WARNING) << "Suspected non-returning function call from offset "
 607    :                   << call_ref_offset << " (followed by " << reason
 608    :                   << ") of block \"" << block->name()
 609    :                   << "\", but target can not be tracked down.";
 610  i :      return;
 611    :    }
 612    :  
 613  E :    BlockGraph::Block* target = image.GetBlockByAddress(ref_it->second.base);
 614  E :    DCHECK(target != NULL);
 615    :  
 616    :    // If this was marked as non-returning, then its not suspicious.
 617  E :    if ((target->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
 618  E :      return;
 619    :  
 620    :    // If the target is a code block then this is a direct call.
 621  E :    if (target->type() == BlockGraph::CODE_BLOCK) {
 622  i :      LOG(WARNING) << "Suspected non-returning call from offset "
 623    :                   << call_ref_offset << " (followed by " << reason
 624    :                   << ") of block \"" << block->name() << "\" to code block \""
 625    :                   << target->name() << "\".";
 626  i :      return;
 627    :    }
 628    :    // Otherwise the target is a data block and this is a memory indirect call
 629    :    // to a thunk.
 630  E :    DCHECK_EQ(BlockGraph::DATA_BLOCK, target->type());
 631    :  
 632    :    // Track down the import thunk.
 633  E :    RefIter thunk_ref_it = refs.find(ref_it->second.base);
 634  E :    DCHECK(thunk_ref_it != refs.end());
 635  E :    BlockGraph::Block* thunk = image.GetBlockByAddress(thunk_ref_it->second.base);
 636    :  
 637    :    // If this was marked as non-returning, then its not suspicious.
 638  E :    if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
 639  E :      return;
 640    :  
 641    :    // Otherwise, this is an import thunk. Get the module and symbol names.
 642  i :    LOG(WARNING) << "Suspected non-returning call from offset "
 643    :                 << call_ref_offset << " (followed by " << reason
 644    :                 << ") of block \"" << block->name() << "\" to import thunk \""
 645    :                 << thunk->name() << "\".";
 646  E :  }
 647    :  
 648    :  void LookForNonReturningFunctions(
 649    :      const Decomposer::IntermediateReferenceMap& refs,
 650    :      const BlockGraph::AddressSpace& image,
 651    :      const BlockGraph::Block* block,
 652  E :      const Disassembler& disasm) {
 653  E :    bool saw_call = false;
 654  E :    bool saw_call_then_nop = false;
 655  E :    BlockGraph::Offset call_ref_offset = 0;
 656    :  
 657  E :    AbsoluteAddress end_of_last_inst;
 658    :    Disassembler::VisitedSpace::const_iterator inst_it =
 659  E :        disasm.visited().begin();
 660  E :    for (; inst_it != disasm.visited().end(); ++inst_it) {
 661    :      // Not contiguous with the last instruction? Then we're spanning a gap. If
 662    :      // it's an instruction then we didn't parse it; thus, we already know that
 663    :      // if the last instruction is a call it's to a non-returning function. So,
 664    :      // we only need to check for data.
 665  E :      if (inst_it->first.start() != end_of_last_inst) {
 666  E :        if (saw_call || saw_call_then_nop) {
 667  E :          BlockGraph::Offset offset = end_of_last_inst - disasm.code_addr();
 668  E :          BlockGraph::Size size = inst_it->first.start() - end_of_last_inst;
 669  E :          if (HasDataLabelInRange(block, offset, size))
 670    :            // We do not expect this to ever occur in cl.exe generated code.
 671    :            // However, it is entirely possible in hand-written assembly.
 672    :            ReportPotentialNonReturningFunction(
 673    :                refs, image, block, call_ref_offset,
 674  i :                saw_call ? "data" : "nop(s) and data");
 675    :        }
 676    :  
 677  E :        saw_call = false;
 678  E :        saw_call_then_nop = false;
 679    :      }
 680    :  
 681  E :      _DInst inst = { 0 };
 682  E :      BlockGraph::Offset offset = inst_it->first.start() - disasm.code_addr();
 683  E :      const uint8* code = disasm.code() + offset;
 684  E :      CHECK(core::DecodeOneInstruction(code, inst_it->first.size(), &inst));
 685    :  
 686    :      // Previous instruction was a call?
 687  E :      if (saw_call) {
 688  E :        if (core::IsNop(inst)) {
 689  i :          saw_call_then_nop = true;
 690  E :        } else if (core::IsDebugInterrupt(inst)) {
 691    :          ReportPotentialNonReturningFunction(
 692  E :              refs, image, block, call_ref_offset, "int3");
 693    :        }
 694  E :        saw_call = false;
 695  E :      } else if (saw_call_then_nop) {
 696    :        // The previous instructions we've seen have been a call followed by
 697    :        // arbitrary many nops. Look for another nop to continue the pattern.
 698  i :        saw_call_then_nop = core::IsNop(inst);
 699  i :      } else {
 700    :        // The previous instruction was not a call, so we're looking for one.
 701    :        // If this instruction is a call, remember that fact and also remember
 702    :        // the offset of its operand (the call target).
 703  E :        if (core::IsCall(inst)) {
 704  E :          saw_call = true;
 705    :          call_ref_offset = offset + inst_it->first.size() -
 706  E :              BlockGraph::Reference::kMaximumSize;
 707    :        }
 708    :      }
 709    :  
 710    :      // Remember the end of the last instruction we processed.
 711  E :      end_of_last_inst = inst_it->first.end();
 712  E :    }
 713    :  
 714    :    // If the last instruction was a call and we've marked that we've disassembled
 715    :    // past the end, then this is also a suspected non-returning function.
 716    :    if ((saw_call || saw_call_then_nop) &&
 717  E :        (block->attributes() & BlockGraph::DISASSEMBLED_PAST_END) != 0) {
 718  i :      const char* reason = saw_call ? "end of block" : "nop(s) and end of block";
 719    :      ReportPotentialNonReturningFunction(
 720  i :          refs, image, block, call_ref_offset, reason);
 721    :    }
 722  E :  }
 723    :  
 724  E :  bool CodeBlockHasAlignedJumpTables(const BlockGraph::Block* block) {
 725  E :    DCHECK(block != NULL);
 726  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
 727    :  
 728    :    // Iterate over the labels of this block looking for jump tables.
 729  E :    bool has_jump_tables = false;
 730    :    BlockGraph::Block::LabelMap::const_iterator label_it =
 731  E :        block->labels().begin();
 732  E :    for (; label_it != block->labels().end(); ++label_it) {
 733  E :      if (!label_it->second.has_attributes(BlockGraph::JUMP_TABLE_LABEL))
 734  E :        continue;
 735    :  
 736  E :      has_jump_tables = true;
 737    :  
 738    :      // If the jump table is misaligned we can return false immediately.
 739  E :      if (label_it->first % kPointerSize != 0)
 740  i :        return false;
 741  E :    }
 742    :  
 743  E :    return has_jump_tables;
 744  E :  }
 745    :  
 746  E :  bool AlignCodeBlocksWithJumpTables(ImageLayout* image_layout) {
 747  E :    DCHECK(image_layout != NULL);
 748    :  
 749    :    BlockGraph::AddressSpace::RangeMapConstIter block_it =
 750  E :        image_layout->blocks.begin();
 751  E :    for (; block_it != image_layout->blocks.end(); ++block_it) {
 752  E :      BlockGraph::Block* block = block_it->second;
 753    :  
 754    :      // We only care about code blocks that are already aligned 0 mod 4 but
 755    :      // whose explicit alignment is currently less than that.
 756  E :      if (block->type() != BlockGraph::CODE_BLOCK)
 757  E :        continue;
 758  E :      if (block->alignment() >= kPointerSize)
 759  i :        continue;
 760  E :      if (block_it->first.start().value() % kPointerSize != 0)
 761  E :        continue;
 762    :  
 763    :      // Inspect them to see if they have aligned jump tables. If they do,
 764    :      // set the alignment of the block itself.
 765  E :      if (CodeBlockHasAlignedJumpTables(block_it->second))
 766  E :        block->set_alignment(kPointerSize);
 767  E :    }
 768    :  
 769  E :    return true;
 770  E :  }
 771    :  
 772    :  }  // namespace
 773    :  
 774    :  Decomposer::Decomposer(const PEFile& image_file)
 775    :      : image_(NULL),
 776    :        image_file_(image_file),
 777    :        current_block_(NULL),
 778  E :        be_strict_with_current_block_(true) {
 779    :    // Register static initializer patterns that we know are always present.
 780    :    // CRT C/C++/etc initializers.
 781  E :    CHECK(RegisterStaticInitializerPatterns("(__x.*)_a", "(__x.*)_z"));
 782    :    // RTC (run-time checks) initializers (part of CRT).
 783  E :    CHECK(RegisterStaticInitializerPatterns("(__rtc_[it])aa", "(__rtc_[it])zz"));
 784    :    // ATL object map initializers.
 785    :    CHECK(RegisterStaticInitializerPatterns("(__pobjMapEntry)First",
 786  E :                                            "(__pobjMapEntry)Last"));
 787    :    // Thread-local storage template.
 788  E :    CHECK(RegisterStaticInitializerPatterns("(_tls_)start", "(_tls_)end"));
 789    :  
 790    :    // Register non-returning functions that for some reason the symbols lie to
 791    :    // us about.
 792  E :    CHECK(RegisterNonReturningFunction("_CxxThrowException"));
 793  E :    CHECK(RegisterNonReturningFunction("_longjmp"));
 794    :  
 795    :    // Register non-returning imports that we know about.
 796  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitProcess"));
 797  E :    CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitThread"));
 798  E :  }
 799    :  
 800  E :  bool Decomposer::Decompose(ImageLayout* image_layout) {
 801    :    // We start by finding the PDB path.
 802  E :    if (!FindAndValidatePdbPath())
 803  E :      return false;
 804  E :    DCHECK(!pdb_path_.empty());
 805    :  
 806    :    // Check if the block-graph has already been serialized into the PDB and load
 807    :    // it from here in this case. This allows round-trip decomposition.
 808  E :    bool stream_exists = false;
 809    :    if (LoadBlockGraphFromPdb(pdb_path_, image_file_, image_layout,
 810  E :                              &stream_exists)) {
 811  E :      return true;
 812  i :    } else {
 813    :      // If the stream exists but hasn't been loaded we return an error. At this
 814    :      // point an error message has already been logged if there was one.
 815  E :      if (stream_exists)
 816  i :        return false;
 817    :    }
 818    :  
 819    :    // Move on to instantiating and initializing our Debug Interface Access
 820    :    // session.
 821  E :    ScopedComPtr<IDiaDataSource> dia_source;
 822  E :    if (!CreateDiaSource(dia_source.Receive()))
 823  i :      return false;
 824    :  
 825    :    // We create the session using the PDB file directly, as we've already
 826    :    // validated that it matches the module.
 827  E :    ScopedComPtr<IDiaSession> dia_session;
 828    :    if (!CreateDiaSession(pdb_path_,
 829    :                          dia_source.get(),
 830  E :                          dia_session.Receive())) {
 831  i :      return false;
 832    :    }
 833    :  
 834    :    HRESULT hr = dia_session->put_loadAddress(
 835  E :        image_file_.nt_headers()->OptionalHeader.ImageBase);
 836  E :    if (hr != S_OK) {
 837  i :      LOG(ERROR) << "Failed to set the DIA load address: "
 838    :                 << com::LogHr(hr) << ".";
 839  i :      return false;
 840    :    }
 841    :  
 842  E :    ScopedComPtr<IDiaSymbol> global;
 843  E :    hr = dia_session->get_globalScope(global.Receive());
 844  E :    if (hr != S_OK) {
 845  i :      LOG(ERROR) << "Failed to get the DIA global scope: "
 846    :                 << com::LogHr(hr) << ".";
 847  i :      return false;
 848    :    }
 849    :  
 850  E :    image_ = &image_layout->blocks;
 851    :  
 852    :    // Create the sections for the image.
 853  E :    bool success = CreateSections();
 854    :  
 855    :    // Load FIXUP information from the PDB file. We do this early on so that we
 856    :    // can do accounting with references that are created later on.
 857  E :    if (success)
 858  E :      success = LoadDebugStreams(dia_session);
 859    :  
 860    :    // Create intermediate references for each fixup entry.
 861  E :    if (success)
 862  E :      success = CreateReferencesFromFixups();
 863    :  
 864    :    // Chunk out important PE image structures, like the headers and such.
 865  E :    PEFileParser::PEHeader header;
 866  E :    if (success)
 867  E :      success = CreatePEImageBlocksAndReferences(&header);
 868    :  
 869    :    // Parse and validate the relocation entries.
 870  E :    if (success)
 871  E :      success = ParseRelocs();
 872    :  
 873    :    // Our first round of parsing is using section contributions. This creates
 874    :    // both code and data blocks.
 875  E :    if (success)
 876  E :      success = CreateBlocksFromSectionContribs(dia_session);
 877    :  
 878    :    // Process the function and thunk symbols in the image. This does not create
 879    :    // any blocks, as all functions are covered by section contributions.
 880  E :    if (success)
 881  E :      success = ProcessCodeSymbols(global);
 882    :  
 883    :    // Process data symbols. This can cause the creation of some blocks as the
 884    :    // data sections are not fully covered by section contributions.
 885  E :    if (success)
 886  E :      success = ProcessDataSymbols(global);
 887    :  
 888    :    // Create labels in code blocks.
 889  E :    if (success)
 890  E :      success = CreateGlobalLabels(global);
 891    :  
 892    :    // Create gap blocks. This ensures that we have complete coverage of the
 893    :    // entire image.
 894  E :    if (success)
 895  E :      success = CreateGapBlocks();
 896    :  
 897    :    // Parse public symbols, augmenting code and data labels where possible.
 898    :    // Some public symbols land on gap blocks, so they need to have been parsed
 899    :    // already.
 900  E :    if (success)
 901  E :      success = ProcessPublicSymbols(global);
 902    :  
 903    :    // Parse initialization bracketing symbols. This needs to happen after
 904    :    // PublicSymbols have been parsed.
 905  E :    if (success)
 906  E :      success = ProcessStaticInitializers();
 907    :  
 908    :    // We know that some data blocks need to have alignment precisely preserved.
 909    :    // For now, we very conservatively (guaranteed to be correct, but causes many
 910    :    // blocks to be aligned that don't strictly need alignment) guess alignment
 911    :    // for each block. This must be run after static initializers have been
 912    :    // parsed.
 913  E :    if (success)
 914  E :      success = GuessDataBlockAlignments();
 915    :  
 916    :    // Disassemble code blocks and create PC-relative references
 917  E :    if (success)
 918  E :      success = CreateCodeReferences();
 919    :  
 920    :    // Turn the address->address format references we've created into
 921    :    // block->block references on the blocks in the image.
 922  E :    if (success)
 923  E :      success = FinalizeIntermediateReferences();
 924    :  
 925    :    // Everything called after this points requires the references to have been
 926    :    // finalized.
 927    :  
 928    :    // One way of ensuring full coverage is to check that all of the fixups
 929    :    // were visited during decomposition.
 930  E :    if (success)
 931  E :      success = ConfirmFixupsVisited();
 932    :  
 933    :    // Now, find and label any padding blocks.
 934  E :    if (success)
 935  E :      success = FindPaddingBlocks();
 936    :  
 937    :    // Copy the image headers over to the layout.
 938  E :    if (success)
 939  E :      success = CopyHeaderToImageLayout(header.nt_headers, image_layout);
 940    :  
 941    :    // Set the alignment on code blocks with jump tables. This ensures that the
 942    :    // jump tables remain aligned post-transform.
 943  E :    if (success)
 944  E :      success = AlignCodeBlocksWithJumpTables(image_layout);
 945    :  
 946  E :    image_ = NULL;
 947    :  
 948  E :    return success;
 949  E :  }
 950    :  
 951  E :  bool Decomposer::FindAndValidatePdbPath() {
 952    :    // Manually find the PDB path if it is not specified.
 953  E :    if (pdb_path_.empty()) {
 954    :      if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
 955  E :          pdb_path_.empty()) {
 956  i :        LOG(ERROR) << "Unable to find PDB file for module: "
 957    :                   << image_file_.path().value();
 958  i :        return false;
 959    :      }
 960    :    }
 961  E :    DCHECK(!pdb_path_.empty());
 962    :  
 963  E :    if (!file_util::PathExists(pdb_path_)) {
 964  E :      LOG(ERROR) << "Path not found: " << pdb_path_.value();
 965  E :      return false;
 966    :    }
 967    :  
 968    :    // Get the PDB info from the PDB file.
 969    :    pdb::PdbInfoHeader70 pdb_info_header;
 970  E :    if (!pdb::ReadPdbHeader(pdb_path_, &pdb_info_header)) {
 971  i :      LOG(ERROR) << "Unable to read PDB info header from PDB file: "
 972    :                 << pdb_path_.value();
 973  i :      return false;
 974    :    }
 975    :  
 976    :    // Get the PDB info from the module.
 977  E :    PdbInfo pdb_info;
 978  E :    if (!pdb_info.Init(image_file_)) {
 979  i :      LOG(ERROR) << "Unable to read PDB info from PE file: "
 980    :                 << image_file_.path().value();
 981  i :      return false;
 982    :    }
 983    :  
 984    :    // Ensure that they are consistent.
 985  E :    if (!pdb_info.IsConsistent(pdb_info_header)) {
 986  i :      LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
 987    :                 << "module \"" << image_file_.path().value() << "\".";
 988  i :      return false;
 989    :    }
 990    :  
 991  E :    return true;
 992  E :  }
 993    :  
 994  E :  bool Decomposer::ProcessCodeSymbols(IDiaSymbol* global) {
 995  E :    if (!ProcessFunctionSymbols(global))
 996  i :      return false;
 997  E :    if (!ProcessThunkSymbols(global))
 998  i :      return false;
 999    :  
1000  E :    return true;
1001  E :  }
1002    :  
1003  E :  bool Decomposer::ProcessFunctionSymbols(IDiaSymbol* global) {
1004  E :    DCHECK(IsSymTag(global, SymTagExe));
1005    :  
1006    :    // Otherwise enumerate its offspring.
1007  E :    ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1008    :    HRESULT hr = global->findChildren(SymTagFunction,
1009    :                                      NULL,
1010    :                                      nsNone,
1011  E :                                      dia_enum_symbols.Receive());
1012  E :    if (hr != S_OK) {
1013  i :      LOG(ERROR) << "Failed to get the DIA function enumerator: "
1014    :                 << com::LogHr(hr) << ".";
1015  i :      return false;
1016    :    }
1017    :  
1018  E :    LONG count = 0;
1019  E :    if (dia_enum_symbols->get_Count(&count) != S_OK) {
1020  i :      LOG(ERROR) << "Failed to get function enumeration length.";
1021  i :      return false;
1022    :    }
1023    :  
1024  E :    for (LONG visited = 0; visited < count; ++visited) {
1025  E :      ScopedComPtr<IDiaSymbol> function;
1026  E :      ULONG fetched = 0;
1027  E :      hr = dia_enum_symbols->Next(1, function.Receive(), &fetched);
1028  E :      if (hr != S_OK) {
1029  i :        LOG(ERROR) << "Failed to enumerate functions: " << com::LogHr(hr) << ".";
1030  i :        return false;
1031    :      }
1032  E :      if (fetched == 0)
1033  i :        break;
1034    :  
1035    :      // Create the block representing the function.
1036  E :      DCHECK(IsSymTag(function, SymTagFunction));
1037  E :      if (!ProcessFunctionOrThunkSymbol(function))
1038  i :        return false;
1039  E :    }
1040    :  
1041  E :    return true;
1042  E :  }
1043    :  
1044  E :  bool Decomposer::ProcessFunctionOrThunkSymbol(IDiaSymbol* function) {
1045  E :    DCHECK(IsSymTag(function, SymTagFunction) || IsSymTag(function, SymTagThunk));
1046    :  
1047  E :    DWORD location_type = LocIsNull;
1048  E :    HRESULT hr = E_FAIL;
1049  E :    if (FAILED(hr = function->get_locationType(&location_type))) {
1050  i :      LOG(ERROR) << "Failed to retrieve function address type: "
1051    :                 << com::LogHr(hr) << ".";
1052  i :      return false;
1053    :    }
1054  E :    if (location_type != LocIsStatic) {
1055  i :      DCHECK_EQ(static_cast<DWORD>(LocIsNull), location_type);
1056  i :      return true;
1057    :    }
1058    :  
1059  E :    DWORD rva = 0;
1060  E :    ULONGLONG length = 0;
1061  E :    ScopedBstr name;
1062    :    if ((hr = function->get_relativeVirtualAddress(&rva)) != S_OK ||
1063    :        (hr = function->get_length(&length)) != S_OK ||
1064  E :        (hr = function->get_name(name.Receive())) != S_OK) {
1065  i :      LOG(ERROR) << "Failed to retrieve function information: "
1066    :                 << com::LogHr(hr) << ".";
1067  i :      return false;
1068    :    }
1069    :  
1070    :    // Certain properties are not defined on all blocks, so the following calls
1071    :    // may return S_FALSE.
1072  E :    BOOL no_return = FALSE;
1073  E :    if (function->get_noReturn(&no_return) != S_OK)
1074  E :      no_return = FALSE;
1075    :  
1076  E :    BOOL has_inl_asm = FALSE;
1077  E :    if (function->get_hasInlAsm(&has_inl_asm) != S_OK)
1078  E :      has_inl_asm = FALSE;
1079    :  
1080  E :    BOOL has_eh = FALSE;
1081  E :    if (function->get_hasEH(&has_eh) != S_OK)
1082  E :      has_eh = FALSE;
1083    :  
1084  E :    BOOL has_seh = FALSE;
1085  E :    if (function->get_hasSEH(&has_seh) != S_OK)
1086  E :      has_seh = FALSE;
1087    :  
1088  E :    std::string block_name;
1089  E :    if (!WideToUTF8(name, name.Length(), &block_name)) {
1090  i :      LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1091  i :      return false;
1092    :    }
1093    :  
1094    :    // Find the block to which this symbol maps, and ensure it fully covers the
1095    :    // symbol.
1096  E :    RelativeAddress block_addr(rva);
1097  E :    BlockGraph::Block* block = image_->GetBlockByAddress(block_addr);
1098  E :    if (block == NULL) {
1099  i :      LOG(ERROR) << "No block found for function/thunk symbol \""
1100    :                 << block_name << "\".";
1101  i :      return false;
1102    :    }
1103  E :    if (block->addr() + block->size() < block_addr + length) {
1104  i :      LOG(ERROR) << "Section contribution \"" << block->name() << "\" does not "
1105    :                 << "fully cover function/thunk symbol \"" << block_name << "\".";
1106  i :      return false;
1107    :    }
1108    :  
1109    :    // Annotate the block with a label, as this is an entry point to it. This is
1110    :    // the routine that adds labels, so there should never be any collisions.
1111  E :    CHECK(AddLabelToBlock(block_addr, block_name, BlockGraph::CODE_LABEL, block));
1112    :  
1113    :    // If we didn't get an explicit no-return flag from the symbols check our
1114    :    // list of exceptions.
1115  E :    if (no_return == FALSE && non_returning_functions_.count(block->name()) > 0) {
1116  E :      VLOG(1) << "Forcing non-returning attribute on function \""
1117    :              << block->name() << "\".";
1118  E :      no_return = TRUE;
1119    :    }
1120    :  
1121    :    // Set the block attributes.
1122  E :    if (no_return == TRUE)
1123  E :      block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1124  E :    if (has_inl_asm == TRUE)
1125  E :      block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1126  E :    if (has_eh || has_seh)
1127  E :      block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1128  E :    if (IsSymTag(function, SymTagThunk))
1129  E :      block->set_attribute(BlockGraph::THUNK);
1130    :  
1131  E :    if (!CreateLabelsForFunction(function, block)) {
1132  i :      LOG(ERROR) << "Failed to create labels for '" << block->name() << "'.";
1133  i :      return false;
1134    :    }
1135    :  
1136  E :    return true;
1137  E :  }
1138    :  
1139    :  bool Decomposer::CreateLabelsForFunction(IDiaSymbol* function,
1140  E :                                           BlockGraph::Block* block) {
1141  E :    DCHECK(function != NULL);
1142  E :    DCHECK(block != NULL);
1143    :  
1144    :    // Lookup the block address.
1145  E :    RelativeAddress block_addr;
1146  E :    if (!image_->GetAddressOf(block, &block_addr)) {
1147  i :      NOTREACHED() << "Block " << block->name() << " has no address.";
1148  i :      return false;
1149    :    }
1150    :  
1151    :    // Enumerate all symbols which are children of function.
1152  E :    ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1153    :    HRESULT hr = function->findChildren(SymTagNull,
1154    :                                        NULL,
1155    :                                        nsNone,
1156  E :                                        dia_enum_symbols.Receive());
1157  E :    if (FAILED(hr)) {
1158  i :      LOG(ERROR) << "Failed to get the DIA label enumerator: "
1159    :                 << com::LogHr(hr) << ".";
1160  i :      return false;
1161    :    }
1162    :  
1163  E :    while (true) {
1164  E :      ScopedComPtr<IDiaSymbol> symbol;
1165  E :      ULONG fetched = 0;
1166  E :      hr = dia_enum_symbols->Next(1, symbol.Receive(), &fetched);
1167  E :      if (FAILED(hr)) {
1168  i :        LOG(ERROR) << "Failed to enumerate the DIA symbol: "
1169    :                   << com::LogHr(hr) << ".";
1170  i :        return false;
1171    :      }
1172  E :      if (hr != S_OK || fetched == 0)
1173  E :        break;
1174    :  
1175    :      // If it doesn't have an RVA then it's not interesting to us.
1176  E :      DWORD temp_rva = 0;
1177  E :      if (symbol->get_relativeVirtualAddress(&temp_rva) != S_OK)
1178  E :        continue;
1179    :  
1180    :      // Get the type of symbol we're looking at.
1181  E :      DWORD temp_sym_tag = 0;
1182  E :      if (symbol->get_symTag(&temp_sym_tag) != S_OK) {
1183  i :        LOG(ERROR) << "Failed to retrieve label information.";
1184  i :        return false;
1185    :      }
1186    :  
1187  E :      enum SymTagEnum sym_tag = static_cast<enum SymTagEnum>(temp_sym_tag);
1188  E :      BlockGraph::LabelAttributes label_attr = SymTagToLabelAttributes(sym_tag);
1189    :  
1190    :      // TODO(rogerm): Add a flag to include/exclude the symbol types that are
1191    :      //     interesting for debugging purposes, but not actually needed for
1192    :      //     decomposition: FuncDebugStart/End, Block, etc.
1193    :  
1194    :      // We ignore labels that fall outside of the code block. We sometimes
1195    :      // get labels at the end of a code block, and if the binary has any OMAP
1196    :      // information these follow the original successor block, and they can
1197    :      // end up most anywhere in the binary.
1198  E :      RelativeAddress label_rva(temp_rva);
1199  E :      if (label_rva < block_addr || label_rva >= block_addr + block->size())
1200  E :        continue;
1201    :  
1202    :      // Extract the symbol's name.
1203  E :      std::string label_name;
1204    :      {
1205  E :        ScopedBstr temp_name;
1206    :        if (symbol->get_name(temp_name.Receive()) == S_OK &&
1207  E :            !WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1208  i :          LOG(ERROR) << "Failed to convert label name to UTF8.";
1209  i :          return false;
1210    :        }
1211  E :      }
1212    :  
1213    :      // Not all symbols have a name, if we've found one without a name, make
1214    :      // one up.
1215  E :      BlockGraph::Offset offset = label_rva - block_addr;
1216  E :      if (label_name.empty()) {
1217  E :        switch (sym_tag) {
1218    :          case SymTagFuncDebugStart: {
1219  E :            label_name = "<debug-start>";
1220  E :            break;
1221    :          }
1222    :  
1223    :          case SymTagFuncDebugEnd: {
1224  E :            label_name = "<debug-end>";
1225  E :            break;
1226    :          }
1227    :  
1228    :          case SymTagData: {
1229  E :            if (reloc_set_.count(label_rva)) {
1230  E :              label_name = base::StringPrintf("<jump-table-%d>", offset);
1231  E :              label_attr |= BlockGraph::JUMP_TABLE_LABEL;
1232  E :            } else {
1233  E :              label_name = base::StringPrintf("<case-table-%d>", offset);
1234  E :              label_attr |= BlockGraph::CASE_TABLE_LABEL;
1235    :            }
1236  E :            break;
1237    :          }
1238    :  
1239    :          case SymTagBlock: {
1240  E :            label_name = "<scope-start>";
1241  E :            break;
1242    :          }
1243    :  
1244    :          // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
1245    :          case SymTagCallSite: {
1246  E :            label_name = "<call-site>";
1247  E :            break;
1248    :          }
1249    :  
1250    :          default: {
1251  i :            LOG(WARNING) << "Unexpected symbol type " << sym_tag << " in "
1252    :                         << block->name() << " at "
1253    :                         << base::StringPrintf("0x%08X.", label_rva.value());
1254  i :            label_name = base::StringPrintf("<anonymous-%d>", sym_tag);
1255    :          }
1256    :        }
1257    :      }
1258    :  
1259    :      // We expect that we'll never see a code label that refers to a reloc.
1260    :      // This happens sometimes, however, as we generally get a code label for
1261    :      // the first byte after a switch statement. This can sometimes land on the
1262    :      // following jump table.
1263  E :      if ((label_attr & BlockGraph::CODE_LABEL) && reloc_set_.count(label_rva)) {
1264  E :        VLOG(1) << "Collision between reloc and code label in "
1265    :                << block->name() << " at " << label_name
1266    :                << base::StringPrintf(" (0x%08X).", label_rva.value())
1267    :                << " Falling back to data label.";
1268  E :        label_attr = BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL;
1269  E :        DCHECK_EQ(block_addr, block->addr());
1270  E :        BlockGraph::Label label;
1271    :        if (block->GetLabel(offset, &label) &&
1272  E :            !label.has_attributes(BlockGraph::DATA_LABEL)) {
1273  i :          VLOG(1) << block->name() << ": Replacing label " << label.name()
1274    :                  << " ("
1275    :                  << BlockGraph::LabelAttributesToString(label.attributes())
1276    :                  << ") at offset " << offset << ".";
1277  i :          block->RemoveLabel(offset);
1278    :        }
1279  E :      }
1280    :  
1281    :      // Add the label to the block.
1282  E :      if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1283  i :        LOG(ERROR) << "Failed to add label to code block.";
1284  i :        return false;
1285    :      }
1286    :  
1287    :      // Is this a scope? Then it also has a length. Use it to create the matching
1288    :      // scope end.
1289  E :      if (sym_tag == SymTagBlock) {
1290  E :        ULONGLONG length = 0;
1291  E :        if (symbol->get_length(&length) != S_OK) {
1292  i :          LOG(ERROR) << "Failed to extract code scope length for "
1293    :                     << block->name();
1294  i :          return false;
1295    :        }
1296  E :        label_rva += length;
1297  E :        label_name = "<scope-end>";
1298  E :        label_attr = BlockGraph::SCOPE_END_LABEL;
1299  E :        if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1300  i :          LOG(ERROR) << "Failed to add label to code block.";
1301  i :          return false;
1302    :        }
1303    :      }
1304  E :    }
1305    :  
1306  E :    return true;
1307  E :  }
1308    :  
1309  E :  bool Decomposer::ProcessThunkSymbols(IDiaSymbol* globals) {
1310  E :    ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1311    :    HRESULT hr = globals->findChildren(SymTagCompiland,
1312    :                                       NULL,
1313    :                                       nsNone,
1314  E :                                       enum_compilands.Receive());
1315  E :    if (FAILED(hr)) {
1316  i :      LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1317    :                 << com::LogHr(hr) << ".";
1318  i :      return false;
1319    :    }
1320    :  
1321  E :    while (true) {
1322  E :      ScopedComPtr<IDiaSymbol> compiland;
1323  E :      ULONG fetched = 0;
1324  E :      hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1325  E :      if (FAILED(hr)) {
1326  i :        LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1327    :                   << com::LogHr(hr) << ".";
1328  i :        return false;
1329    :      }
1330  E :      if (hr != S_OK || fetched == 0)
1331  E :        break;
1332    :  
1333  E :      ScopedComPtr<IDiaEnumSymbols> enum_thunks;
1334    :      hr = compiland->findChildren(SymTagThunk,
1335    :                                   NULL,
1336    :                                   nsNone,
1337  E :                                   enum_thunks.Receive());
1338  E :      if (FAILED(hr)) {
1339  i :        LOG(ERROR) << "Failed to retrieve thunk enumerator: "
1340    :                   << com::LogHr(hr) << ".";
1341  i :        return false;
1342    :      }
1343    :  
1344  E :      while (true) {
1345  E :        ScopedComPtr<IDiaSymbol> thunk;
1346  E :        hr = enum_thunks->Next(1, thunk.Receive(), &fetched);
1347  E :        if (FAILED(hr)) {
1348  i :          LOG(ERROR) << "Failed to enumerate thunk enumerator: "
1349    :                     << com::LogHr(hr) << ".";
1350  i :          return false;
1351    :        }
1352  E :        if (hr != S_OK || fetched == 0)
1353  E :          break;
1354    :  
1355  E :        DCHECK(IsSymTag(thunk, SymTagThunk));
1356    :  
1357  E :        if (!ProcessFunctionOrThunkSymbol(thunk))
1358  i :          return false;
1359  E :      }
1360  E :    }
1361    :  
1362  E :    return true;
1363  E :  }
1364    :  
1365  E :  bool Decomposer::CreateGlobalLabels(IDiaSymbol* globals) {
1366  E :    ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1367    :    HRESULT hr = globals->findChildren(SymTagCompiland,
1368    :                                       NULL,
1369    :                                       nsNone,
1370  E :                                       enum_compilands.Receive());
1371  E :    if (FAILED(hr)) {
1372  i :      LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1373    :                 << com::LogHr(hr) << ".";
1374  i :      return false;
1375    :    }
1376    :  
1377  E :    while (true) {
1378  E :      ScopedComPtr<IDiaSymbol> compiland;
1379  E :      ULONG fetched = 0;
1380  E :      hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1381  E :      if (FAILED(hr)) {
1382  i :        LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1383    :                   << com::LogHr(hr) << ".";
1384  i :        return false;
1385    :      }
1386  E :      if (hr != S_OK || fetched == 0)
1387  E :        break;
1388    :  
1389  E :      ScopedComPtr<IDiaEnumSymbols> enum_labels;
1390    :      hr = compiland->findChildren(SymTagLabel,
1391    :                                   NULL,
1392    :                                   nsNone,
1393  E :                                   enum_labels.Receive());
1394  E :      if (FAILED(hr)) {
1395  i :        LOG(ERROR) << "Failed to retrieve label enumerator: "
1396    :                   << com::LogHr(hr) << ".";
1397  i :        return false;
1398    :      }
1399    :  
1400  E :      while (true) {
1401  E :        ScopedComPtr<IDiaSymbol> label;
1402  E :        hr = enum_labels->Next(1, label.Receive(), &fetched);
1403  E :        if (FAILED(hr)) {
1404  i :          LOG(ERROR) << "Failed to enumerate label enumerator: "
1405    :                     << com::LogHr(hr) << ".";
1406  i :          return false;
1407    :        }
1408  E :        if (hr != S_OK || fetched == 0)
1409  E :          break;
1410    :  
1411  E :        DCHECK(IsSymTag(label, SymTagLabel));
1412    :  
1413  E :        DWORD addr = 0;
1414  E :        ScopedBstr temp_name;
1415    :        if (label->get_relativeVirtualAddress(&addr) != S_OK ||
1416  E :            label->get_name(temp_name.Receive()) != S_OK) {
1417  i :          LOG(ERROR) << "Failed to retrieve label address or name.";
1418  i :          return false;
1419    :        }
1420    :  
1421  E :        std::string label_name;
1422  E :        if (!WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1423  i :          LOG(ERROR) << "Failed to convert label name to UTF8.";
1424  i :          return false;
1425    :        }
1426    :  
1427  E :        RelativeAddress label_addr(addr);
1428  E :        BlockGraph::Block* block = image_->GetBlockByAddress(label_addr);
1429  E :        if (block == NULL) {
1430  i :          LOG(ERROR) << "No block for label " << label_name << " at " << addr;
1431  i :          return false;
1432    :        }
1433    :  
1434    :        if (!AddLabelToBlock(label_addr,
1435    :                             label_name,
1436    :                             BlockGraph::CODE_LABEL,
1437  E :                             block)) {
1438  i :          LOG(ERROR) << "Failed to add label to code block.";
1439  i :          return false;
1440    :        }
1441  E :      }
1442  E :    }
1443    :  
1444  E :    return true;
1445  E :  }
1446    :  
1447    :  bool Decomposer::CreateGapBlock(BlockGraph::BlockType block_type,
1448    :                                  RelativeAddress address,
1449  E :                                  BlockGraph::Size size) {
1450    :    BlockGraph::Block* block = FindOrCreateBlock(block_type, address, size,
1451    :        base::StringPrintf("Gap Block 0x%08X", address.value()).c_str(),
1452  E :        kExpectNoBlock);
1453  E :    if (block == NULL) {
1454  i :      LOG(ERROR) << "Unable to create gap block.";
1455  i :      return false;
1456    :    }
1457  E :    block->set_attribute(BlockGraph::GAP_BLOCK);
1458    :  
1459  E :    return true;
1460  E :  }
1461    :  
1462    :  bool Decomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
1463  E :                                          BlockGraph::BlockType block_type) {
1464  E :    RelativeAddress section_begin(header->VirtualAddress);
1465  E :    RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
1466    :    RelativeAddress image_end(
1467  E :        image_file_.nt_headers()->OptionalHeader.SizeOfImage);
1468    :  
1469    :    // Search for the first and last blocks interesting from the start and end
1470    :    // of the section to the end of the image.
1471    :    BlockGraph::AddressSpace::RangeMap::const_iterator it(
1472    :        image_->address_space_impl().FindFirstIntersection(
1473    :            BlockGraph::AddressSpace::Range(section_begin,
1474  E :                                            image_end - section_begin)));
1475    :  
1476    :    BlockGraph::AddressSpace::RangeMap::const_iterator end =
1477  E :        image_->address_space_impl().end();
1478  E :    if (section_end < image_end) {
1479    :      end = image_->address_space_impl().FindFirstIntersection(
1480    :          BlockGraph::AddressSpace::Range(section_end,
1481  E :                                          image_end - section_end));
1482    :    }
1483    :  
1484    :    // The whole section is missing. Cover it with one gap block.
1485  E :    if (it == end)
1486    :      return CreateGapBlock(
1487  i :          block_type, section_begin, section_end - section_begin);
1488    :  
1489    :    // Create the head gap block if need be.
1490  E :    if (section_begin < it->first.start())
1491    :      if (!CreateGapBlock(
1492  i :          block_type, section_begin, it->first.start() - section_begin))
1493  i :        return false;
1494    :  
1495    :    // Now iterate the blocks and fill in gaps.
1496  E :    for (; it != end; ++it) {
1497  E :      const BlockGraph::Block* block = it->second;
1498  E :      DCHECK(block != NULL);
1499  E :      RelativeAddress block_end = it->first.start() + block->size();
1500  E :      if (block_end >= section_end)
1501  E :        break;
1502    :  
1503    :      // Walk to the next address in turn.
1504  E :      BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
1505  E :      ++next;
1506  E :      if (next == end) {
1507    :        // We're at the end of the list. Create the tail gap block.
1508  E :        DCHECK_GT(section_end, block_end);
1509  E :        if (!CreateGapBlock(block_type, block_end, section_end - block_end))
1510  i :          return false;
1511  E :        break;
1512    :      }
1513    :  
1514    :      // Create the interstitial gap block.
1515  E :      if (block_end < next->first.start())
1516    :        if (!CreateGapBlock(
1517  E :            block_type, block_end, next->first.start() - block_end))
1518  i :          return false;
1519  E :    }
1520    :  
1521  E :    return true;
1522  E :  }
1523    :  
1524  E :  bool Decomposer::CreateGapBlocks() {
1525  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1526    :  
1527    :    // Iterate through all the image sections.
1528  E :    for (size_t i = 0; i < num_sections; ++i) {
1529  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1530  E :      DCHECK(header != NULL);
1531    :  
1532  E :      BlockGraph::BlockType type = BlockGraph::CODE_BLOCK;
1533  E :      const char* section_type = NULL;
1534  E :      switch (GetSectionType(header)) {
1535    :        case kSectionCode:
1536  E :          type = BlockGraph::CODE_BLOCK;
1537  E :          section_type = "code";
1538  E :          break;
1539    :  
1540    :        case kSectionData:
1541  E :          type = BlockGraph::DATA_BLOCK;
1542  E :          section_type = "data";
1543  E :          break;
1544    :  
1545    :        default:
1546  i :          continue;
1547    :      }
1548    :  
1549  E :      if (!CreateSectionGapBlocks(header, type)) {
1550  i :        LOG(ERROR) << "Unable to create gap blocks for " << section_type
1551    :                   << " section \"" << header->Name << "\".";
1552  i :        return false;
1553    :      }
1554  E :    }
1555    :  
1556  E :    return true;
1557  E :  }
1558    :  
1559    :  bool Decomposer::AddReferenceCallback(RelativeAddress src_addr,
1560    :                                        BlockGraph::ReferenceType type,
1561    :                                        BlockGraph::Size size,
1562  E :                                        RelativeAddress dst_addr) {
1563    :    // This is only called by the PEFileParser, and it creates some references
1564    :    // for which there are no corresponding fixup entries.
1565    :    return ValidateOrAddReference(FIXUP_MAY_EXIST, src_addr, type, size, dst_addr,
1566  E :                                  0, &fixup_map_, &references_);
1567  E :  }
1568    :  
1569  E :  bool Decomposer::ParseRelocs() {
1570  E :    if (!image_file_.DecodeRelocs(&reloc_set_)) {
1571  i :      LOG(ERROR) << "Unable to decode image relocs.";
1572  i :      return false;
1573    :    }
1574    :  
1575  E :    PEFile::RelocMap reloc_map;
1576  E :    if (!image_file_.ReadRelocs(reloc_set_, &reloc_map)) {
1577  i :      LOG(ERROR) << "Unable to read image relocs.";
1578  i :      return false;
1579    :    }
1580    :  
1581    :    // Validate each relocation entry against the corresponding fixup entry.
1582  E :    if (!ValidateRelocs(reloc_map))
1583  i :      return false;
1584    :  
1585  E :    return true;
1586  E :  }
1587    :  
1588  E :  bool Decomposer::CreateReferencesFromFixups() {
1589  E :    FixupMap::const_iterator it(fixup_map_.begin());
1590  E :    for (; it != fixup_map_.end(); ++it) {
1591  E :      RelativeAddress src_addr(it->second.location);
1592  E :      uint32 data = 0;
1593  E :      if (!image_file_.ReadImage(src_addr, &data, sizeof(data))) {
1594  i :        LOG(ERROR) << "Unable to read image data for fixup with source at "
1595    :                   << src_addr;
1596  i :        return false;
1597    :      }
1598    :  
1599  E :      RelativeAddress dst_base(it->second.base);
1600  E :      BlockGraph::Offset dst_offset = 0;
1601  E :      switch (it->second.type) {
1602    :        case BlockGraph::PC_RELATIVE_REF: {
1603  E :          dst_offset = src_addr + kPointerSize + data - dst_base;
1604  E :          break;
1605    :        }
1606    :  
1607    :        case BlockGraph::ABSOLUTE_REF: {
1608  E :          dst_offset = image_file_.AbsToRelDisplacement(data) - dst_base.value();
1609  E :          break;
1610    :        }
1611    :  
1612    :        case BlockGraph::RELATIVE_REF: {
1613  E :          dst_offset = data - dst_base.value();
1614  E :          break;
1615    :        }
1616    :  
1617    :        default: {
1618  i :          NOTREACHED() << "Invalid reference type.";
1619  i :          return false;
1620    :        }
1621    :      }
1622    :  
1623    :      if (!AddReference(src_addr, it->second.type, kPointerSize, dst_base,
1624  E :                        dst_offset, &references_)) {
1625  i :        return false;
1626    :      }
1627  E :    }
1628    :  
1629  E :    return true;
1630  E :  }
1631    :  
1632  E :  bool Decomposer::ValidateRelocs(const PEFile::RelocMap& reloc_map) {
1633  E :    PEFile::RelocMap::const_iterator it(reloc_map.begin());
1634  E :    PEFile::RelocMap::const_iterator end(reloc_map.end());
1635  E :    for (; it != end; ++it) {
1636  E :      RelativeAddress src(it->first);
1637  E :      RelativeAddress dummy;
1638    :  
1639    :      if (!ValidateOrAddReference(
1640    :              FIXUP_MUST_EXIST, src, BlockGraph::ABSOLUTE_REF,
1641  E :              sizeof(dummy), dummy, 0, &fixup_map_, &references_)) {
1642  i :        return false;
1643    :      }
1644  E :    }
1645    :  
1646  E :    return true;
1647  E :  }
1648    :  
1649  E :  bool Decomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1650  E :    ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1651    :    SearchResult search_result = FindDiaTable(session,
1652  E :                                              section_contribs.Receive());
1653  E :    if (search_result != kSearchSucceeded) {
1654  i :      if (search_result == kSearchFailed)
1655  i :        LOG(ERROR) << "No section contribution table found.";
1656  i :      return false;
1657    :    }
1658    :  
1659  E :    size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1660    :  
1661  E :    LONG count = 0;
1662  E :    if (section_contribs->get_Count(&count) != S_OK) {
1663  i :      LOG(ERROR) << "Failed to get section contributions enumeration length.";
1664  i :      return false;
1665    :    }
1666    :  
1667  E :    for (LONG visited = 0; visited < count; ++visited) {
1668  E :      ScopedComPtr<IDiaSectionContrib> section_contrib;
1669  E :      ULONG fetched = 0;
1670  E :      HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1671  E :      if (hr != S_OK) {
1672  i :        LOG(ERROR) << "Failed to get DIA section contribution: "
1673    :                   << com::LogHr(hr) << ".";
1674  i :        return false;
1675    :      }
1676  E :      if (fetched == 0)
1677  i :        break;
1678    :  
1679  E :      hr = E_FAIL;
1680  E :      DWORD rva = 0;
1681  E :      DWORD length = 0;
1682  E :      DWORD section_id = 0;
1683  E :      BOOL code = FALSE;
1684  E :      ScopedComPtr<IDiaSymbol> compiland;
1685  E :      ScopedBstr bstr_name;
1686    :      if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1687    :          (hr = section_contrib->get_length(&length)) != S_OK ||
1688    :          (hr = section_contrib->get_addressSection(&section_id)) != S_OK ||
1689    :          (hr = section_contrib->get_code(&code)) != S_OK ||
1690    :          (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1691  E :          (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1692  i :        LOG(ERROR) << "Failed to get section contribution properties: "
1693    :                   << com::LogHr(hr) << ".";
1694  i :        return false;
1695    :      }
1696    :  
1697    :      // Determine if this function was built by a supported compiler.
1698    :      bool is_built_by_supported_compiler =
1699  E :          IsBuiltBySupportedCompiler(compiland.get());
1700    :  
1701    :      // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1702  E :      DCHECK_LT(0u, section_id);
1703  E :      --section_id;
1704    :  
1705    :      // We don't parse the resource section, as it is parsed by the PEFileParser.
1706  E :      if (section_id == rsrc_id)
1707  E :        continue;
1708    :  
1709  E :      std::string name;
1710  E :      if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1711  i :        LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1712  i :        return false;
1713    :      }
1714    :  
1715    :      // Create the block.
1716    :      BlockGraph::BlockType block_type =
1717  E :          code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1718    :      BlockGraph::Block* block = FindOrCreateBlock(block_type,
1719    :                                                   RelativeAddress(rva),
1720    :                                                   length,
1721    :                                                   name.c_str(),
1722  E :                                                   kExpectNoBlock);
1723  E :      if (block == NULL) {
1724  i :        LOG(ERROR) << "Unable to create block.";
1725  i :        return false;
1726    :      }
1727    :  
1728    :      // Set the block attributes.
1729  E :      block->set_attribute(BlockGraph::SECTION_CONTRIB);
1730  E :      if (!is_built_by_supported_compiler)
1731  E :        block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1732  E :    }
1733    :  
1734  E :    return true;
1735  E :  }
1736    :  
1737    :  DiaBrowser::BrowserDirective Decomposer::OnDataSymbol(
1738    :      const DiaBrowser& dia_browser,
1739    :      const DiaBrowser::SymTagVector& sym_tags,
1740  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1741  E :    DCHECK_LT(0u, sym_tags.size());
1742  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1743  E :    DCHECK_EQ(SymTagData, sym_tags.back());
1744    :  
1745  E :    const DiaBrowser::SymbolPtr& data(symbols.back());
1746    :  
1747  E :    HRESULT hr = E_FAIL;
1748  E :    DWORD location_type = LocIsNull;
1749  E :    DWORD rva = 0;
1750  E :    ScopedBstr name_bstr;
1751    :    if (FAILED(hr = data->get_locationType(&location_type)) ||
1752    :        FAILED(hr = data->get_relativeVirtualAddress(&rva)) ||
1753  E :        FAILED(hr = data->get_name(name_bstr.Receive()))) {
1754  i :      LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1755  i :      return DiaBrowser::kBrowserAbort;
1756    :    }
1757    :  
1758    :    // We only parse data symbols with static storage.
1759  E :    if (location_type != LocIsStatic)
1760  E :      return DiaBrowser::kBrowserContinue;
1761    :  
1762    :    // Symbols with an address of zero are essentially invalid. They appear to
1763    :    // have been optimized away by the compiler, but they are still reported.
1764  E :    if (rva == 0)
1765  E :      return DiaBrowser::kBrowserContinue;
1766    :  
1767    :    // TODO(chrisha): We eventually want to get alignment info from the type
1768    :    //     information. This is strictly a lower bound, however, as certain
1769    :    //     data may be used in instructions that impose stricter alignment
1770    :    //     requirements.
1771  E :    size_t length = 0;
1772  E :    if (!GetTypeInfo(data, &length)) {
1773  i :      return DiaBrowser::kBrowserAbort;
1774    :    }
1775    :    // Zero-length data symbols act as 'forward declares' in some sense. They
1776    :    // are always followed by a non-zero length data symbol with the same name
1777    :    // and location.
1778  E :    if (length == 0)
1779  E :      return DiaBrowser::kBrowserContinue;
1780    :  
1781  E :    RelativeAddress addr(rva);
1782  E :    std::string name;
1783  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1784  i :      LOG(ERROR) << "Failed to convert data symbol name to UTF8.";
1785  i :      return DiaBrowser::kBrowserAbort;
1786    :    }
1787    :  
1788    :    // In general we expect data symbols to be completely contained by a block.
1789    :    // The data symbol can exceed the size of the block in the case of data
1790    :    // imports. For some reason the toolchain emits a global data symbol with
1791    :    // type information equal to the type of the data *pointed* to by the import
1792    :    // entry rather than the type of the entry itself. Thus, if the data type
1793    :    // is bigger than the entire IAT this symbol will exceed it. To complicate
1794    :    // matters even more, a poorly written module can import its own export in
1795    :    // which case a linker generated pseudo-import-entry block will be
1796    :    // generated. This won't be part of the IAT, so we can't even filter based
1797    :    // on that. Instead, we simply ignore global data symbols that exceed the
1798    :    // block size.
1799  E :    FindOrCreateBlockDirective directive = kAllowCoveringBlock;
1800  E :    base::StringPiece spname(name);
1801  E :    if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
1802    :      // For global data symbols (no parent symbols) to imported data ("_imp_"
1803    :      // prefix) we allow partially covering blocks.
1804  E :      directive = kAllowPartialCoveringBlock;
1805    :    }
1806    :  
1807    :    BlockGraph::Block* block = FindOrCreateBlock(BlockGraph::DATA_BLOCK,
1808    :                                                 addr, length, spname,
1809  E :                                                 directive);
1810    :  
1811    :    // We've seen null blocks for some symbols in modules compiled using a custom
1812    :    // non-Microsoft toolchain.
1813  E :    if (block == NULL) {
1814  i :      LOG(ERROR) << "Failed to get a block for symbol named " << name << ".";
1815  i :      return DiaBrowser::kBrowserAbort;
1816    :    }
1817    :  
1818  E :    if (block->type() == BlockGraph::CODE_BLOCK) {
1819    :      // The NativeClient bits of chrome.dll consists of hand-written assembly
1820    :      // that is compiled using a custom non-Microsoft toolchain. Unfortunately
1821    :      // for us this toolchain emits 1-byte data symbols instead of code labels.
1822    :      static const char kNaClPrefix[] = "NaCl";
1823    :      if (length == 1 &&
1824  E :          name.compare(0, arraysize(kNaClPrefix) - 1, kNaClPrefix) == 0) {
1825  i :        if (!AddLabelToBlock(addr, name, BlockGraph::CODE_LABEL, block)) {
1826  i :          LOG(ERROR) << "Failed to add label to code block.";
1827  i :          return DiaBrowser::kBrowserAbort;
1828    :        }
1829    :  
1830  i :        return DiaBrowser::kBrowserContinue;
1831    :      }
1832    :    }
1833    :  
1834  E :    if (!AddLabelToBlock(addr, name, BlockGraph::DATA_LABEL, block)) {
1835  i :      LOG(ERROR) << "Failed to add data label to block.";
1836  i :      return DiaBrowser::kBrowserAbort;
1837    :    }
1838    :  
1839  E :    return DiaBrowser::kBrowserContinue;
1840  E :  }
1841    :  
1842    :  DiaBrowser::BrowserDirective Decomposer::OnPublicSymbol(
1843    :      const DiaBrowser& dia_browser,
1844    :      const DiaBrowser::SymTagVector& sym_tags,
1845  E :      const DiaBrowser::SymbolPtrVector& symbols) {
1846  E :    DCHECK_LT(0u, sym_tags.size());
1847  E :    DCHECK_EQ(sym_tags.size(), symbols.size());
1848  E :    DCHECK_EQ(SymTagPublicSymbol, sym_tags.back());
1849  E :    const DiaBrowser::SymbolPtr& symbol(symbols.back());
1850    :  
1851    :    // We don't care about symbols that don't have addresses.
1852  E :    DWORD rva = 0;
1853  E :    if (S_OK != symbol->get_relativeVirtualAddress(&rva))
1854  E :      return DiaBrowser::kBrowserContinue;
1855    :  
1856  E :    ScopedBstr name_bstr;
1857  E :    if (S_OK != symbol->get_name(name_bstr.Receive())) {
1858  i :      LOG(ERROR) << "Failed to get public symbol name.";
1859  i :      return DiaBrowser::kBrowserAbort;
1860    :    }
1861    :  
1862  E :    std::string name;
1863  E :    if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1864  i :      LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1865  i :      return DiaBrowser::kBrowserAbort;
1866    :    }
1867    :  
1868  E :    RelativeAddress addr(rva);
1869  E :    BlockGraph::Block* block = image_->GetBlockByAddress(addr);
1870  E :    if (block == NULL) {
1871  i :      LOG(ERROR) << "No block found for public symbol \"" << name << "\".";
1872  i :      return DiaBrowser::kBrowserAbort;
1873    :    }
1874    :  
1875    :    // Public symbol names are mangled. Remove leading '_' as per
1876    :    // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1877  E :    if (name[0] == '_')
1878  E :      name = name.substr(1);
1879    :  
1880  E :    if (!AddLabelToBlock(addr, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
1881  i :      return DiaBrowser::kBrowserAbort;
1882    :  
1883  E :    return DiaBrowser::kBrowserContinue;
1884  E :  }
1885    :  
1886  E :  bool Decomposer::ProcessStaticInitializers() {
1887    :    typedef std::pair<RelativeAddress, RelativeAddress> AddressPair;
1888    :    typedef std::map<std::string, AddressPair> AddressPairMap;
1889    :  
1890  E :    const RelativeAddress kNull(0);
1891    :  
1892    :    // This stores pairs of addresses, representing the beginning and the end
1893    :    // of each static initializer block. It is keyed with a string, which is
1894    :    // returned by the match group of the corresponding initializer pattern.
1895    :    // The key is necessary to correlate matching labels (as multiple pairs
1896    :    // of labels may match through a single pattern).
1897  E :    AddressPairMap addr_pair_map;
1898    :  
1899    :    // Used for keeping track of which label, if any, we matched.
1900    :    enum MatchType {
1901    :      kMatchNone,
1902    :      kMatchBeginLabel,
1903    :      kMatchEndLabel
1904    :    };
1905    :  
1906    :    // Iterate through all data blocks, looking for known initializer labels.
1907  E :    BlockGraph::AddressSpace::RangeMapConstIter block_it = image_->begin();
1908  E :    for (; block_it != image_->end(); ++block_it) {
1909  E :      const BlockGraph::Block* block = block_it->second;
1910    :      // Skip non-data blocks.
1911  E :      if (block->type() != BlockGraph::DATA_BLOCK)
1912  E :        continue;
1913    :  
1914    :      // Check the block name against each of the initializer patterns.
1915  E :      MatchType match = kMatchNone;
1916  E :      std::string block_name = block->name();
1917  E :      std::string name;
1918  E :      for (size_t i = 0; i < static_initializer_patterns_.size(); ++i) {
1919  E :        REPair& re_pair(static_initializer_patterns_[i]);
1920  E :        if (re_pair.first.FullMatch(block_name, &name))
1921  E :          match = kMatchBeginLabel;
1922  E :        else if (re_pair.second.FullMatch(block_name, &name))
1923  E :          match = kMatchEndLabel;
1924    :  
1925  E :        if (match != kMatchNone)
1926  E :          break;
1927  E :      }
1928    :  
1929    :      // No pattern matched this symbol? Continue to the next one.
1930  E :      if (match == kMatchNone)
1931  E :        continue;
1932    :  
1933    :      // Ensure this symbol exists in the map. Thankfully, addresses default
1934    :      // construct to NULL.
1935  E :      AddressPair& addr_pair = addr_pair_map[name];
1936    :  
1937    :      // Update the bracketing symbol endpoint. Make sure each symbol endpoint
1938    :      // is only seen once.
1939  E :      RelativeAddress* addr = NULL;
1940  E :      RelativeAddress new_addr;
1941  E :      if (match == kMatchBeginLabel) {
1942  E :        addr = &addr_pair.first;
1943  E :        new_addr = block->addr();
1944  E :      } else {
1945  E :        addr = &addr_pair.second;
1946  E :        new_addr = block->addr() + block->size();
1947    :      }
1948  E :      if (*addr != kNull) {
1949  i :        LOG(ERROR) << "Bracketing symbol appears multiple times: "
1950    :                   << block_name;
1951  i :        return false;
1952    :      }
1953  E :      *addr = new_addr;
1954  E :    }
1955    :  
1956    :    // Use the bracketing symbols to make the initializers contiguous.
1957  E :    AddressPairMap::const_iterator init_it = addr_pair_map.begin();
1958  E :    for (; init_it != addr_pair_map.end(); ++init_it) {
1959  E :      RelativeAddress begin_addr = init_it->second.first;
1960  E :      if (begin_addr == kNull) {
1961  i :        LOG(ERROR) << "Bracketing start symbol missing: " << init_it->first;
1962  i :        return false;
1963    :      }
1964    :  
1965  E :      RelativeAddress end_addr = init_it->second.second;
1966  E :      if (end_addr == kNull) {
1967  i :        LOG(ERROR) << "Bracketing end symbol missing: " << init_it->first;
1968  i :        return false;
1969    :      }
1970    :  
1971  E :      if (begin_addr > end_addr) {
1972  i :        LOG(ERROR) << "Bracketing symbols out of order: " << init_it->first;
1973  i :        return false;
1974    :      }
1975    :  
1976    :      // Merge the initializers.
1977  E :      DataSpace::Range range(begin_addr, end_addr - begin_addr);
1978  E :      BlockGraph::Block* merged = image_->MergeIntersectingBlocks(range);
1979    :      std::string name = base::StringPrintf("Bracketed Initializers: %s",
1980  E :                                            init_it->first.c_str());
1981  E :      DCHECK(merged != NULL);
1982  E :      merged->set_name(name);
1983  E :      merged->set_attribute(BlockGraph::COFF_GROUP);
1984  E :    }
1985    :  
1986  E :    return true;
1987  E :  }
1988    :  
1989  E :  bool Decomposer::ProcessDataSymbols(IDiaSymbol* root) {
1990    :    DiaBrowser::MatchCallback on_data_symbol(
1991  E :        base::Bind(&Decomposer::OnDataSymbol, base::Unretained(this)));
1992    :  
1993  E :    DiaBrowser dia_browser;
1994    :    dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1995  E :                           on_data_symbol);
1996    :    dia_browser.AddPattern(Seq(SymTagCompiland, SymTagFunction,
1997    :                               Star(SymTagBlock), SymTagData),
1998  E :                           on_data_symbol);
1999    :  
2000  E :    return dia_browser.Browse(root);
2001  E :  }
2002    :  
2003  E :  bool Decomposer::ProcessPublicSymbols(IDiaSymbol* root) {
2004    :    DiaBrowser::MatchCallback on_public_symbol(
2005  E :        base::Bind(&Decomposer::OnPublicSymbol, base::Unretained(this)));
2006    :  
2007  E :    DiaBrowser dia_browser;
2008  E :    dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
2009    :  
2010  E :    return dia_browser.Browse(root);
2011  E :  }
2012    :  
2013  E :  bool Decomposer::GuessDataBlockAlignments() {
2014  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2015    :    // Iterate through all the image sections.
2016  E :    for (size_t i = 0; i < num_sections; ++i) {
2017  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2018  E :      DCHECK(header != NULL);
2019    :  
2020    :      // Only iterate through data sections.
2021  E :      if (GetSectionType(header) != kSectionData)
2022  E :        continue;
2023    :  
2024  E :      RelativeAddress section_begin(header->VirtualAddress);
2025  E :      size_t section_length = header->Misc.VirtualSize;
2026    :  
2027    :      // Get the range of blocks in this section.
2028    :      BlockGraph::AddressSpace::RangeMapIterPair it_pair =
2029  E :          image_->GetIntersectingBlocks(section_begin, section_length);
2030    :  
2031    :      // Iterate through the blocks in the section, setting their alignment.
2032  E :      BlockGraph::AddressSpace::RangeMapIter it = it_pair.first;
2033  E :      for (; it != it_pair.second; ++it) {
2034  E :        BlockGraph::Block* block = it->second;
2035    :        GuessDataBlockAlignment(block,
2036  E :            image_file_.nt_headers()->OptionalHeader.SectionAlignment);
2037  E :      }
2038  E :    }
2039    :  
2040  E :    return true;
2041  E :  }
2042    :  
2043  E :  bool Decomposer::CreateCodeReferences() {
2044  E :    BlockGraph::BlockMap::iterator it(image_->graph()->blocks_mutable().begin());
2045  E :    BlockGraph::BlockMap::iterator end(image_->graph()->blocks_mutable().end());
2046  E :    for (; it != end; ++it) {
2047  E :      BlockGraph::Block* block = &it->second;
2048  E :      if (block->type() != BlockGraph::CODE_BLOCK)
2049  E :        continue;
2050    :  
2051  E :      if (!CreateCodeReferencesForBlock(block))
2052  i :        return false;
2053  E :    }
2054    :  
2055  E :    return true;
2056  E :  }
2057    :  
2058  E :  bool Decomposer::CreateCodeReferencesForBlock(BlockGraph::Block* block) {
2059  E :    DCHECK(current_block_ == NULL);
2060  E :    current_block_ = block;
2061    :  
2062  E :    RelativeAddress block_addr;
2063  E :    if (!image_->GetAddressOf(block, &block_addr)) {
2064  i :      LOG(ERROR) << "Block \"" << block->name() << "\" has no address.";
2065  i :      return false;
2066    :    }
2067    :  
2068  E :    AbsoluteAddress abs_block_addr;
2069  E :    if (!image_file_.Translate(block_addr, &abs_block_addr)) {
2070  i :      LOG(ERROR) << "Unable to get absolute address for " << block_addr;
2071  i :      return false;
2072    :    }
2073    :  
2074    :    Disassembler::InstructionCallback on_instruction(
2075  E :        base::Bind(&Decomposer::OnInstruction, base::Unretained(this)));
2076    :  
2077    :    // Use block labels and code references as starting points for disassembly.
2078  E :    Disassembler::AddressSet starting_points;
2079    :    GetDisassemblyStartingPoints(block, abs_block_addr, reloc_set_,
2080  E :                                 &starting_points);
2081    :  
2082    :    // If the block has no starting points, then it has no private symbols and
2083    :    // is not BB safe. We mark the block as not safe for basic-block disassembly.
2084    :    if (starting_points.empty() &&
2085  E :        (block->attributes() & BlockGraph::GAP_BLOCK) == 0) {
2086  E :      VLOG(1) << "Block \"" << block->name() << "\" has no private symbols.";
2087  E :      block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2088    :    }
2089    :  
2090    :    // Determine whether or not we are being strict during disassembly.
2091  E :    bool strict = block_graph::CodeBlockAttributesAreBasicBlockSafe(block);
2092  E :    be_strict_with_current_block_ = strict;
2093    :  
2094    :    // Determine the length of the code portion of the block by trimming off any
2095    :    // known trailing data. Also, if we're in strict mode, ensure that our
2096    :    // assumption regarding code/data layout is met.
2097  E :    size_t code_size = 0;
2098    :    if (!BlockHasExpectedCodeDataLayout(block, &code_size) &&
2099  E :        be_strict_with_current_block_) {
2100  i :      LOG(ERROR) << "Block \"" << block->name() << "\" has unexpected code/data "
2101    :                 << "layout.";
2102  i :      return false;
2103    :    }
2104    :  
2105    :    // Disassemble the block.
2106    :    Disassembler disasm(block->data(),
2107    :                        code_size,
2108    :                        abs_block_addr,
2109    :                        starting_points,
2110  E :                        on_instruction);
2111  E :    Disassembler::WalkResult result = disasm.Walk();
2112    :  
2113    :    // If we're strict (that is, we're confident that the block was produced by
2114    :    // cl.exe), then we can use that knowledge to look for calls that appear to be
2115    :    // to non-returning functions that we may not have symbol info for.
2116  E :    if (be_strict_with_current_block_)
2117  E :      LookForNonReturningFunctions(references_, *image_, current_block_, disasm);
2118    :  
2119  E :    DCHECK_EQ(block, current_block_);
2120  E :    current_block_ = NULL;
2121  E :    be_strict_with_current_block_ = true;
2122    :  
2123  E :    switch (result) {
2124    :      case Disassembler::kWalkIncomplete:
2125    :        // There were computed branches that couldn't be chased down.
2126  E :        block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2127  E :        return true;
2128    :  
2129    :      case Disassembler::kWalkTerminated:
2130    :        // This exit condition should only ever occur for non-strict disassembly.
2131    :        // If strict, we should always get kWalkError.
2132  E :        DCHECK(!strict);
2133    :        // This means that they code was malformed, or broke some expected
2134    :        // conventions. This code is not safe for basic block disassembly.
2135  E :        block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2136  E :        return true;
2137    :  
2138    :      case Disassembler::kWalkSuccess:
2139    :        // Were any bytes in the block not accounted for? This generally means
2140    :        // unreachable code, which we see quite often, especially in debug builds.
2141  E :        if (disasm.code_size() != disasm.disassembled_bytes())
2142  E :          block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2143  E :        return true;
2144    :  
2145    :      case Disassembler::kWalkError:
2146  i :        return false;
2147    :  
2148    :      default:
2149  i :        NOTREACHED() << "Unhandled Disassembler WalkResult.";
2150  i :        return false;
2151    :    }
2152  E :  }
2153    :  
2154    :  BlockGraph::Block* Decomposer::CreateBlock(BlockGraph::BlockType type,
2155    :                                             RelativeAddress address,
2156    :                                             BlockGraph::Size size,
2157  E :                                             const base::StringPiece& name) {
2158  E :    BlockGraph::Block* block = image_->AddBlock(type, address, size, name);
2159  E :    if (block == NULL) {
2160  i :      LOG(ERROR) << "Unable to add block at " << address << " with size "
2161    :                 << size << ".";
2162  i :      return NULL;
2163    :    }
2164    :  
2165    :    // Mark the source range from whence this block originates.
2166    :    bool pushed = block->source_ranges().Push(
2167    :        BlockGraph::Block::DataRange(0, size),
2168  E :        BlockGraph::Block::SourceRange(address, size));
2169  E :    DCHECK(pushed);
2170    :  
2171  E :    BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2172  E :    if (section == BlockGraph::kInvalidSectionId) {
2173  i :      LOG(ERROR) << "Block at " << address << " with size " << size
2174    :                 << " lies outside of all sections.";
2175  i :      return NULL;
2176    :    }
2177  E :    block->set_section(section);
2178    :  
2179  E :    const uint8* data = image_file_.GetImageData(address, size);
2180  E :    if (data != NULL)
2181  E :      block->SetData(data, size);
2182    :  
2183  E :    return block;
2184  E :  }
2185    :  
2186    :  BlockGraph::Block* Decomposer::FindOrCreateBlock(
2187    :      BlockGraph::BlockType type,
2188    :      RelativeAddress addr,
2189    :      BlockGraph::Size size,
2190    :      const base::StringPiece& name,
2191  E :      FindOrCreateBlockDirective directive) {
2192  E :    BlockGraph::Block* block = image_->GetBlockByAddress(addr);
2193  E :    if (block != NULL) {
2194    :      // If we got a block we're guaranteed that it at least partially covers
2195    :      // the query range, so we can immediately return it in that case.
2196  E :      if (directive == kAllowPartialCoveringBlock)
2197  E :        return block;
2198    :  
2199    :      // Always allow collisions where the new block is a proper subset of
2200    :      // an existing PE parsed block. The PE parser often knows more than we do
2201    :      // about blocks that need to stick together.
2202  E :      if (block->attributes() & BlockGraph::PE_PARSED)
2203  E :        directive = kAllowCoveringBlock;
2204    :  
2205  E :      bool collision = false;
2206  E :      switch (directive) {
2207    :        case kExpectNoBlock: {
2208  i :          collision = true;
2209  i :          break;
2210    :        }
2211    :        case kAllowIdenticalBlock: {
2212  i :          collision = (block->addr() != addr || block->size() != size);
2213  i :          break;
2214    :        }
2215    :        default: {
2216  E :          DCHECK(directive == kAllowCoveringBlock);
2217    :          collision = block->addr() > addr ||
2218  E :              (block->addr() + block->size()) < addr + size;
2219    :          break;
2220    :        }
2221    :      }
2222    :  
2223  E :      if (collision) {
2224  i :        LOG(ERROR) << "Block collision for \"" << name.as_string() << "\" at "
2225    :                   << addr << "(" << size << ") with existing block \""
2226    :                   << block->name() << "\" at " << block->addr() << " ("
2227    :                   << block->size() << ").";
2228  i :        return NULL;
2229    :      }
2230    :  
2231  E :      return block;
2232    :    }
2233  E :    DCHECK(block == NULL);
2234    :  
2235  E :    return CreateBlock(type, addr, size, name);
2236  E :  }
2237    :  
2238    :  CallbackDirective Decomposer::LookPastInstructionForData(
2239  E :      RelativeAddress instr_end) {
2240    :    // If this instruction terminates at a data boundary (ie: the *next*
2241    :    // instruction will be data or a reloc), we can be certain that a new
2242    :    // lookup table is starting at this address.
2243  E :    if (reloc_set_.find(instr_end) == reloc_set_.end())
2244  E :      return Disassembler::kDirectiveContinue;
2245    :  
2246    :    // Find the block housing the reloc. We expect the reloc to be contained
2247    :    // completely within this block.
2248  i :    BlockGraph::Block* block = image_->GetContainingBlock(instr_end, 4);
2249  i :    if (block != current_block_) {
2250  i :      CHECK(block != NULL);
2251  i :      LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2252    :          << "Found an instruction/data boundary between blocks: "
2253    :          << current_block_->name() << " and " << block->name();
2254  i :      return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2255    :    }
2256    :  
2257  i :    BlockGraph::Offset offset = instr_end - block->addr();
2258    :  
2259    :    // We expect there to be a jump-table data label already.
2260  i :    BlockGraph::Label label;
2261  i :    bool have_label = block->GetLabel(offset, &label);
2262    :    if (!have_label || !label.has_attributes(
2263  i :            BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)) {
2264  i :      LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2265    :          << "Expected there to be a data label marking the jump "
2266    :          << "table at " << block->name() << " + " << offset << ".";
2267    :  
2268    :      // If we're in strict mode, we're a block that obeys standard conventions.
2269    :      // Which means we should already be aware of any jump tables in this block.
2270  i :      if (be_strict_with_current_block_)
2271  i :        return Disassembler::kDirectiveAbort;
2272    :  
2273    :      // If we're not in strict mode, add the jump-table label.
2274  i :      if (have_label) {
2275  i :        CHECK(block->RemoveLabel(offset));
2276    :      }
2277    :  
2278    :      CHECK(block->SetLabel(offset, BlockGraph::Label(
2279    :          base::StringPrintf("<JUMP-TABLE-%d>", offset),
2280  i :          BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)));
2281    :    }
2282    :  
2283  i :    return Disassembler::kDirectiveTerminatePath;
2284  E :  }
2285    :  
2286  E :  void Decomposer::MarkDisassembledPastEnd() {
2287    :    static size_t count = 0;
2288  E :    DCHECK(current_block_ != NULL);
2289  E :    current_block_->set_attribute(BlockGraph::DISASSEMBLED_PAST_END);
2290    :    // TODO(chrisha): The entire "disassembled past end" and non-returning
2291    :    //     function infrastructure can be ripped out once we rework the BB
2292    :    //     disassembler to be straight path, and remove the disassembly phase
2293    :    //     from the decomposer (where it's no longer needed). In the meantime
2294    :    //     we simply crank down this log verbosity due to all of the false
2295    :    //     positives.
2296  E :    VLOG(1) << "Disassembled past end of block or into known data for block \""
2297    :            << current_block_->name() << "\" at " << current_block_->addr()
2298    :            << ".";
2299  E :  }
2300    :  
2301    :  CallbackDirective Decomposer::VisitNonFlowControlInstruction(
2302  E :      RelativeAddress instr_start, RelativeAddress instr_end) {
2303    :    // TODO(chrisha): We could walk the operands and follow references
2304    :    //     explicitly. If any of them are of reference type and there's no
2305    :    //     matching reference, this would be cause to blow up and die (we
2306    :    //     should get all of these as relocs and/or fixups).
2307    :  
2308    :    IntermediateReferenceMap::const_iterator ref_it =
2309  E :        references_.upper_bound(instr_start);
2310    :    IntermediateReferenceMap::const_iterator ref_end =
2311  E :        references_.lower_bound(instr_end);
2312    :  
2313  E :    for (; ref_it != ref_end; ++ref_it) {
2314    :      BlockGraph::Block* ref_block = image_->GetContainingBlock(
2315  E :          ref_it->second.base, 1);
2316  E :      DCHECK(ref_block != NULL);
2317    :  
2318    :      // This is an inter-block reference.
2319  E :      if (ref_block != current_block_) {
2320    :        // There should be no cross-block references to the middle of other
2321    :        // code blocks (to the top is fine, as we could be passing around a
2322    :        // function pointer). The exception is if the remote block is not
2323    :        // generated by cl.exe. In this case, there could be arbitrary labels
2324    :        // that act like functions within the body of that block, and referring
2325    :        // to them is perfectly fine.
2326    :        if (ref_block->type() == BlockGraph::CODE_BLOCK &&
2327    :            ref_it->second.base != ref_block->addr() &&
2328  E :            block_graph::CodeBlockAttributesAreBasicBlockSafe(ref_block)) {
2329  i :          LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2330    :              << "Found a non-control-flow code-block to middle-of-code-block "
2331    :              << "reference from block \"" << current_block_->name()
2332    :              << "\" to block \"" << ref_block->name() << "\".";
2333  i :          return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2334    :        }
2335  E :      } else {
2336    :        // This is an intra-block reference.
2337    :        BlockGraph::Offset ref_offset =
2338  E :            ref_it->second.base - current_block_->addr();
2339    :  
2340    :        // If this is to offset zero, we assume we are taking a pointer to
2341    :        // ourself, which is safe.
2342  E :        if (ref_offset != 0) {
2343    :          // If this is 'clean' code it should be to data, and there should be a
2344    :          // label.
2345  E :          BlockGraph::Label label;
2346  E :          if (!current_block_->GetLabel(ref_offset, &label)) {
2347  i :            LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2348    :                << "Found an intra-block data-reference with no label.";
2349  i :            return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2350  i :          } else {
2351    :            if (!label.has_attributes(BlockGraph::DATA_LABEL) ||
2352  E :                label.has_attributes(BlockGraph::CODE_LABEL)) {
2353  E :              LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2354    :                  << "Found an intra-block data-like reference to a non-data "
2355    :                  << "or code label in block \"" << current_block_->name()
2356    :                  << "\".";
2357  E :              return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2358    :            }
2359    :          }
2360  E :        }
2361    :      }
2362  E :    }
2363    :  
2364  E :    return Disassembler::kDirectiveContinue;
2365  E :  }
2366    :  
2367    :  CallbackDirective Decomposer::VisitPcRelativeFlowControlInstruction(
2368    :      AbsoluteAddress instr_abs,
2369    :      RelativeAddress instr_rel,
2370    :      const _DInst& instruction,
2371  E :      bool end_of_code) {
2372  E :    int fc = META_GET_FC(instruction.meta);
2373  E :    DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
2374  E :    DCHECK_EQ(O_PC, instruction.ops[0].type);
2375  E :    DCHECK_EQ(O_NONE, instruction.ops[1].type);
2376  E :    DCHECK_EQ(O_NONE, instruction.ops[2].type);
2377  E :    DCHECK_EQ(O_NONE, instruction.ops[3].type);
2378    :    DCHECK(instruction.ops[0].size == 8 ||
2379    :        instruction.ops[0].size == 16 ||
2380  E :        instruction.ops[0].size == 32);
2381    :    // Distorm gives us size in bits, we want bytes.
2382  E :    BlockGraph::Size size = instruction.ops[0].size / 8;
2383    :  
2384    :    // Get the reference's address. Note we assume it's in the instruction's
2385    :    // tail end - I don't know of a case where a PC-relative offset in a branch
2386    :    // or call is not the very last thing in an x86 instruction.
2387  E :    AbsoluteAddress abs_src = instr_abs + instruction.size - size;
2388    :    AbsoluteAddress abs_dst = instr_abs + instruction.size +
2389  E :        static_cast<size_t>(instruction.imm.addr);
2390    :  
2391  E :    RelativeAddress src, dst;
2392    :    if (!image_file_.Translate(abs_src, &src) ||
2393  E :        !image_file_.Translate(abs_dst, &dst)) {
2394  i :      LOG(ERROR) << "Unable to translate absolute to relative addresses.";
2395  i :      return Disassembler::kDirectiveAbort;
2396    :    }
2397    :  
2398    :    // Get the block associated with the destination address. It must exist
2399    :    // and be a code block.
2400  E :    BlockGraph::Block* block = image_->GetContainingBlock(dst, 1);
2401  E :    DCHECK(block != NULL);
2402  E :    DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
2403    :  
2404    :    // For short references, we should not see a fixup.
2405  E :    ValidateOrAddReferenceMode mode = FIXUP_MUST_NOT_EXIST;
2406  E :    if (size == kPointerSize) {
2407    :      // Long PC_RELATIVE reference within a single block? FIXUPs aren't
2408    :      // strictly necessary.
2409  E :      if (block->Contains(src, kPointerSize))
2410  E :        mode = FIXUP_MAY_EXIST;
2411  E :      else
2412    :        // But if they're between blocks (section contributions), we expect to
2413    :        // find them.
2414  E :        mode = FIXUP_MUST_EXIST;
2415  E :    } else {
2416    :      // Since we slice by section contributions we no longer see short
2417    :      // references across blocks. If we do, bail!
2418  E :      if (block != current_block_) {
2419  i :        LOG(ERROR) << "Found a short PC-relative reference out of block \""
2420    :                   << current_block_->name() << "\".";
2421  i :        return Disassembler::kDirectiveAbort;
2422    :      }
2423    :    }
2424    :  
2425    :    // Validate or create the reference, as necessary.
2426    :    if (!ValidateOrAddReference(mode, src, BlockGraph::PC_RELATIVE_REF, size,
2427  E :                                dst, 0, &fixup_map_, &references_)) {
2428  i :      LOG(ERROR) << "Failed to validate/create reference originating from "
2429    :                 << "block \"" << current_block_->name() << "\".";
2430  i :      return Disassembler::kDirectiveAbort;
2431    :    }
2432    :  
2433    :    // If this is a call and the destination is a non-returning function,
2434    :    // then indicate that we should terminate this disassembly path.
2435    :    if (fc == FC_CALL &&
2436  E :        (block->attributes() & BlockGraph::NON_RETURN_FUNCTION)) {
2437    :      // TODO(chrisha): For now, we enforce that the call be to the beginning
2438    :      //    of the function. This may not be necessary, but better safe than
2439    :      //    sorry for now.
2440  E :      if (block->addr() != dst) {
2441  i :        LOG(ERROR) << "Calling inside the body of a non-returning function: "
2442    :                   << block->name();
2443  i :        return Disassembler::kDirectiveAbort;
2444    :      }
2445    :  
2446  E :      return Disassembler::kDirectiveTerminatePath;
2447    :    }
2448    :  
2449    :    // If we get here, then we don't think it's a non-returning call. If it's
2450    :    // not an unconditional jump and we're at the end of the code for this block
2451    :    // then we consider this as disassembling past the end.
2452  E :    if (fc != FC_UNC_BRANCH && end_of_code)
2453  i :      MarkDisassembledPastEnd();
2454    :  
2455  E :    return Disassembler::kDirectiveContinue;
2456  E :  }
2457    :  
2458    :  CallbackDirective Decomposer::VisitIndirectMemoryCallInstruction(
2459  E :        const _DInst& instruction, bool end_of_code) {
2460  E :    DCHECK_EQ(FC_CALL, META_GET_FC(instruction.meta));
2461  E :    DCHECK_EQ(O_DISP, instruction.ops[0].type);
2462    :  
2463    :    // TODO(rogerm): Consider changing to image_file_.AbsToRelDisplacement()
2464    :    //     instead of translate. In theory, the indexing into a function-table
2465    :    //     could be statically offset such that the displacement falls outside
2466    :    //     of the image's address space. But, we have never seen the compiler
2467    :    //     generate code like that. This is left to use Translate, which will
2468    :    //     trigger an error in such a case.
2469  E :    AbsoluteAddress disp_addr_abs(static_cast<uint32>(instruction.disp));
2470  E :    RelativeAddress disp_addr_rel;
2471  E :    if (!image_file_.Translate(disp_addr_abs, &disp_addr_rel)) {
2472  i :      LOG(ERROR) << "Unable to translate call address.";
2473  i :      return Disassembler::kDirectiveAbort;
2474    :    }
2475    :  
2476    :    // Try to dereference the address of the call instruction. This can fail
2477    :    // for blocks that are only initialized at runtime, so we don't fail if
2478    :    // we don't find a reference.
2479    :    IntermediateReferenceMap::const_iterator ref_it =
2480  E :        references_.find(disp_addr_rel);
2481  E :    if (ref_it == references_.end())
2482  E :      return Disassembler::kDirectiveContinue;
2483    :  
2484    :    // NOTE: This process derails for bound import tables. In this case the
2485    :    //     attempted dereference above will fail, but we could still actually
2486    :    //     find the import name thunk by inspecting the offset of the memory
2487    :    //     location.
2488    :  
2489    :    // The reference must be direct and 32-bit.
2490  E :    const IntermediateReference& ref = ref_it->second;
2491  E :    DCHECK_EQ(BlockGraph::Reference::kMaximumSize, ref.size);
2492  E :    DCHECK_EQ(0, ref.offset);
2493    :  
2494    :    // Look up the thunk this refers to.
2495  E :    BlockGraph::Block* thunk = image_->GetBlockByAddress(ref.base);
2496  E :    if (thunk == NULL) {
2497  i :      LOG(ERROR) << "Unable to dereference intermediate reference at "
2498    :                 << disp_addr_rel << " to " << ref.base << ".";
2499  i :      return Disassembler::kDirectiveAbort;
2500    :    }
2501    :  
2502  E :    if (ref.type == BlockGraph::RELATIVE_REF) {
2503    :      // If this is a relative reference it must be part of an import address
2504    :      // table (during runtime this address would be patched up with an absolute
2505    :      // reference). Thus we expect the referenced block to be data, an import
2506    :      // name thunk.
2507  E :      DCHECK_EQ(BlockGraph::DATA_BLOCK, thunk->type());
2508  E :    } else {
2509    :      // If this is an absolute address it should actually point directly to
2510    :      // code.
2511  E :      DCHECK_EQ(BlockGraph::ABSOLUTE_REF, ref.type);
2512  E :      DCHECK_EQ(BlockGraph::CODE_BLOCK, thunk->type());
2513    :    }
2514    :  
2515    :    // Either way, if the block is non-returning we terminate this path of
2516    :    // disassembly.
2517  E :    if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
2518  E :      return Disassembler::kDirectiveTerminatePath;
2519    :  
2520  E :    if (end_of_code)
2521  i :      MarkDisassembledPastEnd();
2522    :  
2523  E :    return Disassembler::kDirectiveContinue;
2524  E :  }
2525    :  
2526    :  CallbackDirective Decomposer::OnInstruction(const Disassembler& walker,
2527  E :                                              const _DInst& instruction) {
2528    :    // Get the relative address of this instruction.
2529  E :    AbsoluteAddress instr_abs(static_cast<uint32>(instruction.addr));
2530  E :    RelativeAddress instr_rel;
2531  E :    if (!image_file_.Translate(instr_abs, &instr_rel)) {
2532  i :      LOG(ERROR) << "Unable to translate instruction address.";
2533  i :      return Disassembler::kDirectiveAbort;
2534    :    }
2535  E :    RelativeAddress after_instr_rel = instr_rel + instruction.size;
2536    :  
2537    :  #ifndef NDEBUG
2538    :    // If we're in debug mode, it's helpful to have a pointer directly to the
2539    :    // beginning of this instruction in memory.
2540  E :    BlockGraph::Offset instr_offset = instr_rel - current_block_->addr();
2541  E :    const uint8* instr_data = current_block_->data() + instr_offset;
2542    :  #endif
2543    :  
2544    :    // TODO(chrisha): Certain instructions require aligned data (ie: MMX/SSE
2545    :    //     instructions). We need to follow the data that these instructions
2546    :    //     refer to, and set their alignment appropriately. For now, alignment
2547    :    //     is simply preserved from the original image.
2548    :  
2549  E :    CallbackDirective directive = LookPastInstructionForData(after_instr_rel);
2550  E :    if (IsFatalCallbackDirective(directive))
2551  i :      return directive;
2552    :  
2553    :    // We're at the end of code in this block if we encountered data, or this is
2554    :    // the last instruction to be processed.
2555  E :    RelativeAddress block_end(current_block_->addr() + current_block_->size());
2556    :    bool end_of_code = (directive == Disassembler::kDirectiveTerminatePath) ||
2557  E :        (after_instr_rel >= block_end);
2558    :  
2559  E :    int fc = META_GET_FC(instruction.meta);
2560    :  
2561  E :    if (fc == FC_NONE) {
2562    :      // There's no control flow and we're at the end of the block. Mark the
2563    :      // block as dirty.
2564  E :      if (end_of_code)
2565  i :        MarkDisassembledPastEnd();
2566    :  
2567    :      return CombineCallbackDirectives(directive,
2568  E :          VisitNonFlowControlInstruction(instr_rel, after_instr_rel));
2569    :    }
2570    :  
2571    :    if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
2572  E :        instruction.ops[0].type == O_PC) {
2573    :      // For all branches, calls and conditional branches to PC-relative
2574    :      // addresses, record a PC-relative reference.
2575    :      return CombineCallbackDirectives(directive,
2576    :          VisitPcRelativeFlowControlInstruction(instr_abs,
2577    :                                                instr_rel,
2578    :                                                instruction,
2579  E :                                                end_of_code));
2580    :    }
2581    :  
2582    :    // We explicitly handle indirect memory call instructions. These can often
2583    :    // be tracked down as pointing to a block in this image, or to an import
2584    :    // name thunk from another module.
2585  E :    if (fc == FC_CALL && instruction.ops[0].type == O_DISP) {
2586    :      return CombineCallbackDirectives(directive,
2587  E :          VisitIndirectMemoryCallInstruction(instruction, end_of_code));
2588    :    }
2589    :  
2590    :    // Look out for blocks where disassembly seems to run off the end of the
2591    :    // block. We do not treat interrupts as flow control as execution can
2592    :    // continue past the interrupt.
2593  E :    if (fc != FC_RET && fc != FC_UNC_BRANCH && end_of_code)
2594  E :      MarkDisassembledPastEnd();
2595    :  
2596  E :    return directive;
2597  E :  }
2598    :  
2599    :  bool Decomposer::CreatePEImageBlocksAndReferences(
2600  E :      PEFileParser::PEHeader* header) {
2601    :    PEFileParser::AddReferenceCallback add_reference(
2602  E :        base::Bind(&Decomposer::AddReferenceCallback, base::Unretained(this)));
2603  E :    PEFileParser parser(image_file_, image_, add_reference);
2604    :    parser.set_on_import_thunk(
2605  E :        base::Bind(&Decomposer::OnImportThunkCallback, base::Unretained(this)));
2606    :  
2607  E :    if (!parser.ParseImage(header)) {
2608  i :      LOG(ERROR) << "Unable to parse PE image.";
2609  i :      return false;
2610    :    }
2611    :  
2612  E :    return true;
2613  E :  }
2614    :  
2615  E :  bool Decomposer::FinalizeIntermediateReferences() {
2616  E :    IntermediateReferenceMap::const_iterator it(references_.begin());
2617  E :    IntermediateReferenceMap::const_iterator end(references_.end());
2618    :  
2619  E :    for (; it != end; ++it) {
2620  E :      RelativeAddress src_addr(it->first);
2621  E :      BlockGraph::Block* src = image_->GetBlockByAddress(src_addr);
2622  E :      RelativeAddress dst_base_addr(it->second.base);
2623  E :      RelativeAddress dst_addr(dst_base_addr + it->second.offset);
2624  E :      BlockGraph::Block* dst = image_->GetBlockByAddress(dst_base_addr);
2625    :  
2626  E :      if (src == NULL || dst == NULL) {
2627  i :        LOG(ERROR) << "Reference source or base destination address is out of "
2628    :                   << "range, src: " << src << ", dst: " << dst;
2629  i :        return false;
2630    :      }
2631    :  
2632  E :      RelativeAddress src_start = src->addr();
2633  E :      RelativeAddress dst_start = dst->addr();
2634    :  
2635    :      // Get the offset of the ultimate destination relative to the start of the
2636    :      // destination block.
2637  E :      BlockGraph::Offset dst_offset = dst_addr - dst_start;
2638    :  
2639    :      // Get the offset of the actual referenced object relative to the start of
2640    :      // the destination block.
2641  E :      BlockGraph::Offset dst_base = dst_base_addr - dst_start;
2642    :  
2643    :      BlockGraph::Reference ref(it->second.type,
2644    :                                it->second.size,
2645    :                                dst,
2646    :                                dst_offset,
2647  E :                                dst_base);
2648  E :      src->SetReference(src_addr - src_start, ref);
2649  E :    }
2650    :  
2651  E :    references_.clear();
2652    :  
2653  E :    return true;
2654  E :  }
2655    :  
2656  E :  bool Decomposer::ConfirmFixupsVisited() const {
2657  E :    bool success = true;
2658    :  
2659    :    // Ideally, all fixups should have been visited during decomposition.
2660    :    // TODO(chrisha): Address the root problems underlying the following
2661    :    //     temporary fix.
2662  E :    FixupMap::const_iterator fixup_it = fixup_map_.begin();
2663  E :    for (; fixup_it != fixup_map_.end(); ++fixup_it) {
2664  E :      if (fixup_it->second.visited)
2665  E :        continue;
2666    :  
2667    :      const BlockGraph::Block* block =
2668  E :          image_->GetContainingBlock(fixup_it->first, kPointerSize);
2669  E :      DCHECK(block != NULL);
2670    :  
2671    :      // We know that we currently do not have full disassembly coverage as there
2672    :      // are several orphaned pieces of apparently unreachable code in the CRT
2673    :      // that we do not disassemble, but which may contain jmp or call commands.
2674    :      // Thus, we expect that missed fixups are all PC-relative and lie within
2675    :      // code blocks.
2676    :      if (block->type() == BlockGraph::CODE_BLOCK &&
2677  E :          fixup_it->second.type == BlockGraph::PC_RELATIVE_REF)
2678  E :        continue;
2679    :  
2680  i :      success = false;
2681  i :      LOG(ERROR) << "Unexpected unseen fixup at " << fixup_it->second.location;
2682  i :    }
2683    :  
2684  E :    return success;
2685  E :  }
2686    :  
2687  E :  bool Decomposer::FindPaddingBlocks() {
2688  E :    DCHECK(image_ != NULL);
2689  E :    DCHECK(image_->graph() != NULL);
2690    :  
2691    :    BlockGraph::BlockMap::iterator block_it =
2692  E :        image_->graph()->blocks_mutable().begin();
2693  E :    for (; block_it != image_->graph()->blocks_mutable().end(); ++block_it) {
2694  E :      BlockGraph::Block& block = block_it->second;
2695    :  
2696    :      // Padding blocks must not have any symbol information: no labels,
2697    :      // no references, no referrers, and they must be a gap block.
2698    :      if (block.labels().size() != 0 ||
2699    :          block.references().size() != 0 ||
2700    :          block.referrers().size() != 0 ||
2701  E :          (block.attributes() & BlockGraph::GAP_BLOCK) == 0)
2702  E :        continue;
2703    :  
2704  E :      switch (block.type()) {
2705    :        // Code blocks should be fully defined and consist of only int3s.
2706    :        case BlockGraph::CODE_BLOCK: {
2707    :          if (block.data_size() != block.size() ||
2708  E :              RepeatedValue(block.data(), block.data_size()) != kInt3)
2709  i :            continue;
2710  E :          break;
2711    :        }
2712    :  
2713    :        // Data blocks should be uninitialized or have fully defined data
2714    :        // consisting only of zeros.
2715    :        default: {
2716  E :          DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
2717  E :          if (block.data_size() == 0)  // Uninitialized data blocks are padding.
2718  E :            break;
2719    :          if (block.data_size() != block.size() ||
2720  E :              RepeatedValue(block.data(), block.data_size()) != 0)
2721  i :            continue;
2722    :        }
2723    :      }
2724    :  
2725    :      // If we fall through to this point, then the block is a padding block.
2726  E :      block.set_attribute(BlockGraph::PADDING_BLOCK);
2727  E :    }
2728    :  
2729  E :    return true;
2730  E :  }
2731    :  
2732  E :  bool Decomposer::CreateSections() {
2733    :    // Iterate through the image sections, and create sections in the BlockGraph.
2734  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2735  E :    for (size_t i = 0; i < num_sections; ++i) {
2736  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2737  E :      std::string name = pe::PEFile::GetSectionName(*header);
2738    :      BlockGraph::Section* section = image_->graph()->AddSection(
2739  E :          name, header->Characteristics);
2740  E :      DCHECK(section != NULL);
2741    :  
2742    :      // For now, we expect them to have been created with the same IDs as those
2743    :      // in the original image.
2744  E :      if (section->id() != i) {
2745  i :        LOG(ERROR) << "Unexpected section ID.";
2746  i :        return false;
2747    :      }
2748  E :    }
2749    :  
2750  E :    return true;
2751  E :  }
2752    :  
2753  E :  bool Decomposer::LoadDebugStreams(IDiaSession* dia_session) {
2754  E :    DCHECK(dia_session != NULL);
2755    :  
2756    :    // Load the fixups. These must exist.
2757  E :    PdbFixups pdb_fixups;
2758    :    SearchResult search_result = FindAndLoadDiaDebugStreamByName(
2759  E :        kFixupDiaDebugStreamName, dia_session, &pdb_fixups);
2760  E :    if (search_result != kSearchSucceeded) {
2761  i :      if (search_result == kSearchFailed) {
2762  i :        LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
2763    :                      "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
2764    :      }
2765  i :      return false;
2766    :    }
2767    :  
2768    :    // Load the omap_from table. It is not necessary that one exist.
2769  E :    std::vector<OMAP> omap_from;
2770    :    search_result = FindAndLoadDiaDebugStreamByName(
2771  E :        kOmapFromDiaDebugStreamName, dia_session, &omap_from);
2772  E :    if (search_result == kSearchErrored)
2773  i :      return false;
2774    :  
2775    :    // Translate and validate fixups.
2776  E :    if (!OmapAndValidateFixups(omap_from, pdb_fixups))
2777  i :      return false;
2778    :  
2779  E :    return true;
2780  E :  }
2781    :  
2782    :  bool Decomposer::OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
2783  E :                                         const PdbFixups& pdb_fixups) {
2784  E :    bool have_omap = omap_from.size() != 0;
2785    :  
2786    :    // The resource section in Chrome is modified post-link by a tool that adds a
2787    :    // manifest to it. This causes all of the fixups in the resource section (and
2788    :    // anything beyond it) to be invalid. As long as the resource section is the
2789    :    // last section in the image, this is not a problem (we can safely ignore the
2790    :    // .rsrc fixups, which we know how to parse without them). However, if there
2791    :    // is a section after the resource section, things will have been shifted
2792    :    // and potentially crucial fixups will be invalid.
2793  E :    RelativeAddress rsrc_start(0xffffffff), max_start;
2794  E :    size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2795  E :    for (size_t i = 0; i < num_sections; ++i) {
2796  E :      const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2797  E :      RelativeAddress start(header->VirtualAddress);
2798  E :      if (start > max_start)
2799  E :        max_start = start;
2800    :      if (strncmp(kResourceSectionName,
2801    :                  reinterpret_cast<const char*>(header->Name),
2802  E :                  IMAGE_SIZEOF_SHORT_NAME) == 0) {
2803  E :        rsrc_start = start;
2804  E :        break;
2805    :      }
2806  E :    }
2807    :  
2808    :    // Ensure there are no sections after the resource section.
2809  E :    if (max_start > rsrc_start) {
2810  i :      LOG(ERROR) << kResourceSectionName << " section is not the last section.";
2811  i :      return false;
2812    :    }
2813    :  
2814    :    // Ensure the fixups are all valid, and populate the fixup map.
2815  E :    size_t skipped = 0;
2816  E :    for (size_t i = 0; i < pdb_fixups.size(); ++i) {
2817  E :      if (!pdb_fixups[i].ValidHeader()) {
2818  i :        LOG(ERROR) << "Unknown fixup header: "
2819    :                   << base::StringPrintf("0x%08X.", pdb_fixups[i].header);
2820  i :        return false;
2821    :      }
2822    :  
2823    :      // For now, we skip any offset fixups. We've only seen this in the context
2824    :      // of TLS data access, and we don't mess with TLS structures.
2825  E :      if (pdb_fixups[i].is_offset())
2826  E :        continue;
2827    :  
2828    :      // All fixups we handle should be full size pointers.
2829  E :      DCHECK_EQ(kPointerSize, pdb_fixups[i].size());
2830    :  
2831    :      // Get the original addresses, and map them through OMAP information.
2832    :      // Normally DIA takes care of this for us, but there is no API for
2833    :      // getting DIA to give us FIXUP information, so we have to do it manually.
2834  E :      RelativeAddress rva_location(pdb_fixups[i].rva_location);
2835  E :      RelativeAddress rva_base(pdb_fixups[i].rva_base);
2836  E :      if (have_omap) {
2837  i :        rva_location = pdb::TranslateAddressViaOmap(omap_from, rva_location);
2838  i :        rva_base = pdb::TranslateAddressViaOmap(omap_from, rva_base);
2839    :      }
2840    :  
2841    :      // If these are part of the .rsrc section, ignore them.
2842  E :      if (rva_location >= rsrc_start)
2843  E :        continue;
2844    :  
2845    :      // Ensure they live within the image, and refer to things within the
2846    :      // image.
2847    :      if (!image_file_.Contains(rva_location, kPointerSize) ||
2848  E :          !image_file_.Contains(rva_base, 1)) {
2849  i :        LOG(ERROR) << "Fixup refers to addresses outside of image.";
2850  i :        return false;
2851    :      }
2852    :  
2853    :      // Add the fix up, and ensure the source address is unique.
2854  E :      Fixup fixup = { PdbFixupTypeToReferenceType(pdb_fixups[i].type),
2855  E :                      pdb_fixups[i].refers_to_code(),
2856  E :                      pdb_fixups[i].is_data(),
2857  E :                      false,
2858  E :                      rva_location,
2859  E :                      rva_base };
2860  E :      bool added = fixup_map_.insert(std::make_pair(rva_location, fixup)).second;
2861  E :      if (!added) {
2862  i :        LOG(ERROR) << "Colliding fixups at " << rva_location;
2863  i :        return false;
2864    :      }
2865  E :    }
2866    :  
2867  E :    return true;
2868  E :  }
2869    :  
2870    :  bool Decomposer::RegisterStaticInitializerPatterns(
2871  E :      const base::StringPiece& begin, const base::StringPiece& end) {
2872    :    // Ensuring the patterns each have exactly one capturing group.
2873    :    REPair re_pair = std::make_pair(RE(begin.as_string()),
2874  E :                                    RE(end.as_string()));
2875    :    if (re_pair.first.NumberOfCapturingGroups() != 1 ||
2876  E :        re_pair.second.NumberOfCapturingGroups() != 1)
2877  i :      return false;
2878    :  
2879  E :    static_initializer_patterns_.push_back(re_pair);
2880    :  
2881  E :    return true;
2882  E :  }
2883    :  
2884    :  bool Decomposer::RegisterNonReturningFunction(
2885  E :      const base::StringPiece& function_name) {
2886  E :    return non_returning_functions_.insert(function_name.as_string()).second;
2887  E :  }
2888    :  
2889    :  bool Decomposer::RegisterNonReturningImport(
2890    :      const base::StringPiece& module_name,
2891  E :      const base::StringPiece& function_name) {
2892  E :    StringSet& module_set = non_returning_imports_[module_name.as_string()];
2893  E :    return module_set.insert(function_name.as_string()).second;
2894  E :  }
2895    :  
2896    :  bool Decomposer::LoadBlockGraphFromPdbStream(const PEFile& image_file,
2897    :                                               pdb::PdbStream* block_graph_stream,
2898  E :                                               ImageLayout* image_layout) {
2899  E :    DCHECK(block_graph_stream != NULL);
2900  E :    DCHECK(image_layout != NULL);
2901  E :    LOG(INFO) << "Reading block-graph and image layout from the PDB.";
2902    :  
2903    :    // Initialize an input archive pointing to the stream.
2904  E :    scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
2905  E :    if (!byte_stream->Init(block_graph_stream))
2906  i :      return false;
2907  E :    DCHECK(byte_stream.get() != NULL);
2908    :  
2909  E :    core::ScopedInStreamPtr pdb_in_stream;
2910    :    pdb_in_stream.reset(core::CreateByteInStream(
2911  E :        byte_stream->data(), byte_stream->data() + byte_stream->length()));
2912    :  
2913    :    // Read the header.
2914  E :    uint32 stream_version = 0;
2915  E :    unsigned char compressed = 0;
2916    :    if (!pdb_in_stream->Read(sizeof(stream_version),
2917    :                             reinterpret_cast<core::Byte*>(&stream_version)) ||
2918    :        !pdb_in_stream->Read(sizeof(compressed),
2919  E :                             reinterpret_cast<core::Byte*>(&compressed))) {
2920  i :      LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
2921  i :      return false;
2922    :    }
2923    :  
2924    :    // Check the stream version.
2925  E :    if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
2926  E :      LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
2927    :                 << " version (got " << stream_version << ", expected "
2928    :                 << pdb::kSyzygyBlockGraphStreamVersion << ").";
2929  E :      return false;
2930    :    }
2931    :  
2932    :    // If the stream is compressed insert the decompression filter.
2933  E :    core::InStream* in_stream = pdb_in_stream.get();
2934  E :    scoped_ptr<core::ZInStream> zip_in_stream;
2935  E :    if (compressed != 0) {
2936  E :      zip_in_stream.reset(new core::ZInStream(in_stream));
2937  E :      if (!zip_in_stream->Init()) {
2938  i :        LOG(ERROR) << "Unable to initialize ZInStream.";
2939  i :        return false;
2940    :      }
2941  E :      in_stream = zip_in_stream.get();
2942    :    }
2943    :  
2944    :    // Deserialize the image-layout.
2945  E :    core::NativeBinaryInArchive in_archive(in_stream);
2946  E :    block_graph::BlockGraphSerializer::Attributes attributes = 0;
2947    :    if (!LoadBlockGraphAndImageLayout(
2948  E :        image_file, &attributes, image_layout, &in_archive)) {
2949  i :      LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
2950  i :      return false;
2951    :    }
2952    :  
2953  E :    return true;
2954  E :  }
2955    :  
2956    :  bool Decomposer::LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
2957    :                                         const PEFile& image_file,
2958    :                                         ImageLayout* image_layout,
2959  E :                                         bool* stream_exists) {
2960  E :    DCHECK(image_layout != NULL);
2961  E :    DCHECK(stream_exists != NULL);
2962    :  
2963  E :    pdb::PdbFile pdb_file;
2964  E :    pdb::PdbReader pdb_reader;
2965  E :    if (!pdb_reader.Read(pdb_path, &pdb_file)) {
2966  i :      LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
2967    :                 << "\".";
2968  i :      return NULL;
2969    :    }
2970    :  
2971    :    // Try to get the block-graph stream from the PDB.
2972    :    scoped_refptr<pdb::PdbStream> block_graph_stream =
2973  E :        GetBlockGraphStreamFromPdb(&pdb_file);
2974  E :    if (block_graph_stream.get() == NULL) {
2975  E :      *stream_exists = false;
2976  E :      return false;
2977    :    }
2978    :  
2979    :    // The PDB contains a block-graph stream, the block-graph and the image layout
2980    :    // will be read from this stream.
2981  E :    *stream_exists = true;
2982    :    if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
2983  E :                                     image_layout)) {
2984  i :      return false;
2985    :    }
2986    :  
2987  E :    return true;
2988  E :  }
2989    :  
2990    :  scoped_refptr<pdb::PdbStream> Decomposer::GetBlockGraphStreamFromPdb(
2991  E :      pdb::PdbFile* pdb_file) {
2992  E :    scoped_refptr<pdb::PdbStream> block_graph_stream;
2993    :    // Get the PDB header and try to get the block-graph ID stream from it.
2994  E :    pdb::PdbInfoHeader70 pdb_header = {0};
2995  E :    pdb::NameStreamMap name_stream_map;
2996    :    if (!ReadHeaderInfoStream(pdb_file->GetStream(pdb::kPdbHeaderInfoStream),
2997    :                             &pdb_header,
2998  E :                             &name_stream_map)) {
2999  i :      LOG(ERROR) << "Failed to read header info stream.";
3000  i :      return block_graph_stream;
3001    :    }
3002    :    pdb::NameStreamMap::const_iterator name_it = name_stream_map.find(
3003  E :        pdb::kSyzygyBlockGraphStreamName);
3004  E :    if (name_it == name_stream_map.end()) {
3005  E :      return block_graph_stream;
3006    :    }
3007    :  
3008    :    // Get the block-graph stream and ensure that it's not empty.
3009  E :    block_graph_stream = pdb_file->GetStream(name_it->second);
3010  E :    if (block_graph_stream.get() == NULL) {
3011  i :      LOG(ERROR) << "Failed to read the block-graph stream from the PDB.";
3012  i :      return block_graph_stream;
3013    :    }
3014  E :    if (block_graph_stream->length() == 0) {
3015  i :      LOG(ERROR) << "The block-graph stream is empty.";
3016  i :      return block_graph_stream;
3017    :    }
3018    :  
3019  E :    return block_graph_stream;
3020  E :  }
3021    :  
3022    :  bool Decomposer::OnImportThunkCallback(const char* module_name,
3023    :                                         const char* symbol_name,
3024  E :                                         BlockGraph::Block* thunk) {
3025  E :    DCHECK(module_name != NULL);
3026  E :    DCHECK(symbol_name != NULL);
3027  E :    DCHECK(thunk != NULL);
3028    :  
3029    :    // Look for the module first.
3030    :    StringSetMap::const_iterator module_it =
3031  E :        non_returning_imports_.find(std::string(module_name));
3032  E :    if (module_it == non_returning_imports_.end())
3033  E :      return true;
3034    :  
3035    :    // Look for the symbol within the module.
3036  E :    if (module_it->second.count(std::string(symbol_name)) == 0)
3037  E :      return true;
3038    :  
3039    :    // If we get here then the imported symbol is found. Decorate the thunk.
3040  E :    thunk->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
3041  E :    VLOG(1) << "Forcing non-returning attribute on imported symbol \""
3042    :            << symbol_name << "\" from module \"" << module_name << "\".";
3043    :  
3044  E :    return true;
3045  E :  }
3046    :  
3047    :  }  // namespace pe

Coverage information generated Thu Jul 04 09:34:53 2013.