Coverage for /Syzygy/block_graph/basic_block_decomposer.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
91.9%4224590.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // Implementation of basic block decomposer.
  16    :  //
  17    :  // TODO(rogerm): Refactor this to just do a straight disassembly of all bytes
  18    :  //     (up to the start of embedded data: jump and case tables) to a list of
  19    :  //     instructions, then chop up the instruction list into basic blocks in
  20    :  //     a second pass, splicing the instructions into the basic-block instruction
  21    :  //     lists and generating successors.
  22    :  
  23    :  #include "syzygy/block_graph/basic_block_decomposer.h"
  24    :  
  25    :  #include <algorithm>
  26    :  #include <vector>
  27    :  
  28    :  #include "base/logging.h"
  29    :  #include "base/stringprintf.h"
  30    :  #include "syzygy/block_graph/basic_block.h"
  31    :  #include "syzygy/block_graph/basic_block_subgraph.h"
  32    :  #include "syzygy/block_graph/block_graph.h"
  33    :  #include "syzygy/block_graph/block_util.h"
  34    :  
  35    :  #include "mnemonics.h"  // NOLINT
  36    :  
  37    :  namespace block_graph {
  38    :  
  39    :  namespace {
  40    :  
  41    :  using block_graph::BasicBlock;
  42    :  using block_graph::BasicBlockReference;
  43    :  using block_graph::BasicBlockReferrer;
  44    :  using block_graph::BasicBlockSubGraph;
  45    :  using block_graph::BlockGraph;
  46    :  using block_graph::Instruction;
  47    :  using block_graph::Successor;
  48    :  using core::Disassembler;
  49    :  
  50    :  typedef BlockGraph::Block Block;
  51    :  typedef BlockGraph::Offset Offset;
  52    :  typedef BlockGraph::Size Size;
  53    :  typedef BasicBlockSubGraph::BBAddressSpace BBAddressSpace;
  54    :  typedef BBAddressSpace::Range Range;
  55    :  typedef BBAddressSpace::RangeMap RangeMap;
  56    :  typedef BBAddressSpace::RangeMapConstIter RangeMapConstIter;
  57    :  typedef BBAddressSpace::RangeMapIter RangeMapIter;
  58    :  
  59    :  // We use a (somewhat) arbitrary value as the disassembly address for a block
  60    :  // so we can tell the difference between a reference to the beginning of the
  61    :  // block (offset=0) and a null address.
  62    :  const size_t kDisassemblyAddress = 65536;
  63    :  
  64    :  // Look up the reference made from an instruction's byte range within the
  65    :  // given block. The reference should start AFTER the instruction starts
  66    :  // and there should be exactly 1 reference in the byte range.
  67    :  // Returns true if the reference was found, false otherwise.
  68    :  bool GetReferenceOfInstructionAt(const Block* block,
  69    :                                   Offset instr_offset,
  70    :                                   Size instr_size,
  71  E :                                   BlockGraph::Reference* ref) {
  72  E :    DCHECK(block != NULL);
  73  E :    DCHECK_LE(0, instr_offset);
  74  E :    DCHECK_LT(0U, instr_size);
  75  E :    DCHECK(ref != NULL);
  76    :  
  77    :    // Find the first reference following the instruction offset.
  78    :    Block::ReferenceMap::const_iterator ref_iter =
  79  E :        block->references().upper_bound(instr_offset);
  80    :  
  81    :    // If no reference is found then we're done.
  82  E :    if (ref_iter == block->references().end())
  83  i :      return false;
  84    :  
  85    :    // If the reference occurs outside the instruction then we're done.
  86  E :    Offset next_instr_offset = instr_offset + instr_size;
  87  E :    if (ref_iter->first >= next_instr_offset)
  88  E :      return false;
  89    :  
  90    :    // Otherwise, the reference should fit into the instruction.
  91    :    CHECK_LE(static_cast<size_t>(next_instr_offset),
  92  E :             ref_iter->first + ref_iter->second.size());
  93    :  
  94    :    // And it should be the only reference in the instruction.
  95  E :    if (ref_iter != block->references().begin()) {
  96  E :      Block::ReferenceMap::const_iterator prev_iter = ref_iter;
  97  E :      --prev_iter;
  98    :      CHECK_GE(static_cast<size_t>(instr_offset),
  99  E :               prev_iter->first + prev_iter->second.size());
 100  E :    }
 101  E :    Block::ReferenceMap::const_iterator next_iter = ref_iter;
 102  E :    ++next_iter;
 103    :    CHECK(next_iter == block->references().end() ||
 104  E :          next_iter->first >= next_instr_offset);
 105    :  
 106  E :    *ref = ref_iter->second;
 107  E :    return true;
 108  E :  }
 109    :  
 110    :  }  // namespace
 111    :  
 112    :  BasicBlockDecomposer::BasicBlockDecomposer(const BlockGraph::Block* block,
 113    :                                             BasicBlockSubGraph* subgraph)
 114    :      : Disassembler(block->data(),
 115    :                     block->size(),
 116    :                     AbsoluteAddress(kDisassemblyAddress),
 117    :                     Disassembler::InstructionCallback()),
 118    :        block_(block),
 119    :        subgraph_(subgraph),
 120    :        current_block_start_(0),
 121  E :        check_decomposition_results_(true) {
 122    :    // TODO(rogerm): Once we're certain this is stable for all input binaries
 123    :    //     turn on check_decomposition_results_ by default only ifndef NDEBUG.
 124  E :    DCHECK(block != NULL);
 125  E :    DCHECK(block->type() == BlockGraph::CODE_BLOCK);
 126  E :    DCHECK(CodeBlockAttributesAreBasicBlockSafe(block));
 127  E :    DCHECK(subgraph != NULL);
 128  E :  }
 129    :  
 130  E :  bool BasicBlockDecomposer::Decompose() {
 131  E :    DCHECK(subgraph_->basic_blocks().empty());
 132  E :    DCHECK(subgraph_->block_descriptions().empty());
 133  E :    DCHECK(subgraph_->original_address_space().empty());
 134  E :    subgraph_->set_original_block(block_);
 135    :  
 136  E :    InitUnvisitedAndJumpTargets();
 137    :  
 138  E :    WalkResult result = Walk();
 139    :    if (result != Disassembler::kWalkSuccess &&
 140  E :        result != Disassembler::kWalkIncomplete) {
 141  i :      return false;
 142    :    }
 143    :  
 144    :    typedef BasicBlockSubGraph::BlockDescription BlockDescription;
 145  E :    subgraph_->block_descriptions().push_back(BlockDescription());
 146  E :    BlockDescription& desc = subgraph_->block_descriptions().back();
 147  E :    desc.name = block_->name();
 148  E :    desc.type = block_->type();
 149  E :    desc.alignment = block_->alignment();
 150  E :    desc.attributes = block_->attributes();
 151  E :    desc.section = block_->section();
 152    :  
 153  E :    Offset offset = 0;
 154  E :    RangeMapConstIter it = subgraph_->original_address_space().begin();
 155  E :    for (; it != subgraph_->original_address_space().end(); ++it) {
 156  E :      DCHECK_EQ(it->first.start(), offset);
 157  E :      desc.basic_block_order.push_back(it->second);
 158  E :      offset += it->first.size();
 159  E :    }
 160    :  
 161  E :    return true;
 162  E :  }
 163    :  
 164  E :  void BasicBlockDecomposer::InitUnvisitedAndJumpTargets() {
 165  E :    jump_targets_.clear();
 166    :    // We initialize our jump_targets_ and unvisited sets to the set of
 167    :    // referenced code locations. This covers all locations which are
 168    :    // externally referenced, as well as those that are internally referenced
 169    :    // via a branching instruction or jump table.
 170    :    BlockGraph::Block::ReferrerSet::const_iterator ref_iter =
 171  E :        block_->referrers().begin();
 172  E :    for (; ref_iter != block_->referrers().end(); ++ref_iter) {
 173  E :      BlockGraph::Reference ref;
 174  E :      bool found = ref_iter->first->GetReference(ref_iter->second, &ref);
 175  E :      DCHECK(found);
 176  E :      DCHECK_EQ(block_, ref.referenced());
 177  E :      DCHECK_LE(0, ref.base());
 178  E :      DCHECK_LT(static_cast<size_t>(ref.base()), block_->size());
 179  E :      DCHECK_EQ(ref.base(), ref.offset());
 180    :  
 181    :      // Look for the first label past the reference. Back up if we can to the
 182    :      // previous label.
 183    :      BlockGraph::Block::LabelMap::const_iterator label_iter =
 184  E :          block_->labels().upper_bound(ref.base());
 185  E :      if (label_iter != block_->labels().begin())
 186  E :        --label_iter;
 187    :  
 188    :      // If there is no previous label, or it is not a data label, then this is
 189    :      // a safe jump target.
 190    :      if (label_iter == block_->labels().end() ||
 191    :          label_iter->first > ref.offset() ||
 192  E :          !label_iter->second.has_attributes(BlockGraph::DATA_LABEL)) {
 193  E :        AbsoluteAddress addr(code_addr_ + ref.base());
 194  E :        Unvisited(addr);
 195  E :        jump_targets_.insert(addr);
 196    :      }
 197  E :    }
 198  E :  }
 199    :  
 200    :  Disassembler::CallbackDirective BasicBlockDecomposer::OnInstruction(
 201  E :      AbsoluteAddress addr, const _DInst& inst) {
 202  E :    Offset offset = addr - code_addr_;
 203    :  
 204    :    // If this instruction has run into known data, then we have a problem in
 205    :    // the decomposer.
 206  E :    BlockGraph::Label label;
 207    :    CHECK(!block_->GetLabel(offset, &label) ||
 208  E :          !label.has_attributes(BlockGraph::DATA_LABEL))
 209    :        << "Disassembling into data at offset " << offset << " of "
 210    :        << block_->name() << ".";
 211    :  
 212  E :    VLOG(3) << "Disassembled " << GET_MNEMONIC_NAME(inst.opcode)
 213    :            << " instruction (" << static_cast<int>(inst.size)
 214    :            << " bytes) at offset " << offset << ".";
 215    :  
 216    :    current_instructions_.push_back(
 217  E :        Instruction(inst, offset, inst.size, code_ + offset));
 218  E :    if (label.IsValid())
 219  E :      current_instructions_.back().set_label(label);
 220    :  
 221    :    // If continuing this basic-block would disassemble into known data then
 222    :    // end the current basic-block.
 223    :    if (block_->GetLabel(offset + inst.size, &label) &&
 224  E :        label.has_attributes(BlockGraph::DATA_LABEL)) {
 225  E :      return kDirectiveTerminatePath;
 226    :    }
 227    :  
 228    :    // If this instruction is a call to a non-returning function, then this is
 229    :    // essentially a control flow operation, and we need to end this basic block.
 230    :    // We'll schedule the disassembly of any instructions which follow it as
 231    :    // a separate basic block, and mark that basic block as unreachable in a
 232    :    // post pass.
 233    :    if (META_GET_FC(inst.meta) == FC_CALL &&
 234  E :        (inst.ops[0].type == O_PC || inst.ops[0].type == O_DISP)) {
 235  E :      BlockGraph::Reference ref;
 236  E :      bool found = GetReferenceOfInstructionAt(block_, offset, inst.size, &ref);
 237  E :      CHECK(found);
 238    :      if (Instruction::CallsNonReturningFunction(inst, ref.referenced(),
 239  E :                                                 ref.offset())) {
 240  E :        Unvisited(addr + inst.size);
 241  E :        return kDirectiveTerminatePath;
 242    :      }
 243    :    }
 244    :  
 245  E :    return kDirectiveContinue;
 246  E :  }
 247    :  
 248    :  Disassembler::CallbackDirective BasicBlockDecomposer::OnBranchInstruction(
 249  E :      AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
 250    :    // Note: Both addr and dest are fabricated addresses. The code_addr_ has
 251    :    //     been selected such that addr will never be 0; similarly, dest should
 252    :    //     only be 0 for control flow instructions having no explicit destination.
 253    :    //     Do not use dest to resolve the destination, instead find the
 254    :    //     corresponding reference in the byte range of the original instruction.
 255    :  
 256    :    // The branch instruction should have already been appended to the
 257    :    // instruction list.
 258    :    DCHECK_EQ(0, ::memcmp(&current_instructions_.back().representation(),
 259    :                          &inst,
 260  E :                          sizeof(inst)));
 261    :  
 262    :    // Make sure we understand the branching condition. If we don't, then there's
 263    :    // an instruction we have failed to consider.
 264  E :    Successor::Condition condition = Successor::OpCodeToCondition(inst.opcode);
 265  E :    CHECK_NE(Successor::kInvalidCondition, condition)
 266    :        << "Received unknown condition for branch instruction: "
 267    :        << GET_MNEMONIC_NAME(inst.opcode) << ".";
 268    :  
 269    :    // If this is a conditional branch add the inverse conditional successor to
 270    :    // represent the fall-through. If we don't understand the inverse, then
 271    :    // there's an instruction we have failed to consider.
 272  E :    if (META_GET_FC(inst.meta) == FC_CND_BRANCH) {
 273    :      Successor::Condition inverse_condition =
 274  E :          Successor::InvertCondition(condition);
 275  E :      CHECK_NE(Successor::kInvalidCondition, inverse_condition)
 276    :          << "Non-invertible condition seen for branch instruction: "
 277    :          << GET_MNEMONIC_NAME(inst.opcode) << ".";
 278    :  
 279    :      // Create an (unresolved) successor pointing to the next instruction.
 280    :      current_successors_.push_front(
 281    :          Successor(inverse_condition,
 282    :                    (addr + inst.size) - code_addr_,
 283    :                    BasicBlock::kNoOffset,
 284  E :                    0));
 285  E :      jump_targets_.insert(addr + inst.size);
 286    :    }
 287    :  
 288    :    // Note that some control flow instructions have no explicit target (for
 289    :    // example, RET, SYS* and computed branches); for these dest will be 0.
 290    :    // We do not explicitly model these with successor relationships. Instead,
 291    :    // we leave the instruction (and its corresponding references, in the case
 292    :    // of computed jumps) intact and move on.
 293  E :    if (dest.value() != 0) {
 294    :      // Take the last instruction out of the instruction list, we'll represent
 295    :      // it as a successor instead.
 296  E :      Successor::Offset instr_offset = current_instructions_.back().offset();
 297  E :      Successor::Size instr_size = current_instructions_.back().size();
 298  E :      BlockGraph::Label instr_label = current_instructions_.back().label();
 299  E :      current_instructions_.pop_back();
 300  E :      DCHECK_EQ(addr - code_addr_, instr_offset);
 301  E :      DCHECK_EQ(inst.size, instr_size);
 302    :  
 303    :      // Figure out where the branch is going by finding the reference that's
 304    :      // inside the instruction's byte range.
 305  E :      BlockGraph::Reference ref;
 306    :      bool found = GetReferenceOfInstructionAt(
 307  E :          block_, instr_offset, instr_size, &ref);
 308    :  
 309    :      // Create the appropriate successor depending on whether or not the target
 310    :      // is intra- or inter-block.
 311  E :      if (!found || ref.referenced() == block_) {
 312    :        // This is an intra-block reference. The target basic block may not
 313    :        // exist yet, so we'll defer patching up this reference until later.
 314  E :        Offset target_offset = dest - code_addr_;
 315  E :        CHECK_LE(0, target_offset);
 316  E :        CHECK_LT(static_cast<size_t>(target_offset), code_size_);
 317    :        current_successors_.push_front(
 318  E :            Successor(condition, target_offset, instr_offset, instr_size));
 319  E :        jump_targets_.insert(dest);
 320  E :      } else {
 321    :        // This is an inter-block jump. We can create a fully resolved reference.
 322    :        BasicBlockReference bb_ref(
 323  E :            ref.type(), ref.size(), ref.referenced(), ref.offset(), ref.base());
 324    :        current_successors_.push_front(
 325  E :            Successor(condition, bb_ref, instr_offset, instr_size));
 326    :      }
 327    :  
 328  E :      if (instr_label.IsValid())
 329  E :        current_successors_.front().set_label(instr_label);
 330  E :    }
 331    :  
 332    :    // This marks the end of a basic block. Note that the disassembler will
 333    :    // handle ending the instruction run and beginning a new one for the next
 334    :    // basic block (including the branch-not-taken arc).
 335  E :    return kDirectiveContinue;
 336  E :  }
 337    :  
 338    :  // Called every time disassembly is started from a new address. Will be
 339    :  // called for at least every address in unvisited_.
 340    :  Disassembler::CallbackDirective BasicBlockDecomposer::OnStartInstructionRun(
 341  E :      AbsoluteAddress start_address) {
 342    :    // The address of the beginning of the current basic block.
 343  E :    current_block_start_ = start_address;
 344  E :    DCHECK(current_instructions_.empty());
 345  E :    DCHECK(current_successors_.empty());
 346  E :    return kDirectiveContinue;
 347  E :  }
 348    :  
 349    :  // Called when a walk from a given entry point has terminated.
 350    :  Disassembler::CallbackDirective BasicBlockDecomposer::OnEndInstructionRun(
 351  E :      AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
 352    :    // If an otherwise straight run of instructions is split because it crosses
 353    :    // a basic block boundary we need to set up the implicit control flow arc
 354    :    // here.
 355  E :    if (control_flow == kControlFlowContinues) {
 356  E :      DCHECK(current_successors_.empty());
 357  E :      DCHECK(!current_instructions_.empty());
 358  E :      DCHECK(!current_instructions_.back().IsImplicitControlFlow());
 359    :  
 360    :      current_successors_.push_front(
 361    :          Successor(Successor::kConditionTrue,
 362    :                    (addr + inst.size) - code_addr_,  // To be resolved later.
 363    :                    BasicBlock::kNoOffset,
 364  E :                    0));
 365    :    }
 366    :  
 367    :    // We have reached the end of the current walk or we handled a conditional
 368    :    // branch. Let's mark this as the end of a basic block.
 369  E :    size_t basic_block_size = addr - current_block_start_ + inst.size;
 370  E :    DCHECK_LT(0U, basic_block_size);
 371    :    if (!InsertBasicBlockRange(current_block_start_,
 372    :                               basic_block_size,
 373  E :                               BasicBlock::BASIC_CODE_BLOCK)) {
 374  i :      return kDirectiveAbort;
 375    :    }
 376    :  
 377  E :    return kDirectiveContinue;
 378  E :  }
 379    :  
 380  E :  Disassembler::CallbackDirective BasicBlockDecomposer::OnDisassemblyComplete() {
 381    :    // Split code blocks at branch targets.
 382  E :    if (!SplitCodeBlocksAtBranchTargets()) {
 383  i :      LOG(ERROR) << "Failed to split code blocks at branch targets.";
 384  i :      return kDirectiveAbort;
 385    :    }
 386    :  
 387    :    // By this point, we should have basic blocks for all visited code.
 388  E :    CheckAllJumpTargetsStartABasicCodeBlock();
 389    :  
 390    :    // Demarcate the data basic blocks. There should be no overlap with code.
 391  E :    if (!FillInDataBlocks()) {
 392  i :      LOG(ERROR) << "Failed to fill in data basic-block ranges.";
 393  i :      return kDirectiveAbort;
 394    :    }
 395    :  
 396    :    // We may not have covered some ranges of the macro block. For all such
 397    :    // ranges, build basic blocks and mark them as padding. This might
 398    :    // include unreachable code in unoptimized input binaries.
 399  E :    if (!FillInPaddingBlocks()) {
 400  i :      LOG(ERROR) << "Failed to fill in padding basic-block ranges.";
 401  i :      return kDirectiveAbort;
 402    :    }
 403    :  
 404    :    // We should now have contiguous block ranges that cover every byte in the
 405    :    // macro block. Verify that this is so.
 406  E :    CheckHasCompleteBasicBlockCoverage();
 407    :  
 408    :    // We should have propagated all of the labels in the original block into
 409    :    // the basic-block subgraph.
 410  E :    CheckAllLabelsArePreserved();
 411    :  
 412    :    // Populate the referrers in the basic block data structures by copying
 413    :    // them from the original source block.
 414  E :    if (!CopyExternalReferrers()) {
 415  i :      LOG(ERROR) << "Failed to populate basic-block referrers.";
 416  i :      return kDirectiveAbort;
 417    :    }
 418    :  
 419    :    // Populate the references in the basic block data structures by copying
 420    :    // them from the original source block. This does not handle the successor
 421    :    // references.
 422  E :    if (!CopyReferences()) {
 423  i :      LOG(ERROR) << "Failed to populate basic-block references.";
 424  i :      return kDirectiveAbort;
 425    :    }
 426    :  
 427    :    // Wire up the the basic-block successors. These are not handled by
 428    :    // CopyReferences(), above.
 429  E :    if (!ResolveSuccessors()) {
 430  i :      LOG(ERROR) << "Failed to resolve basic-block successors.";
 431  i :      return kDirectiveAbort;
 432    :    }
 433    :  
 434    :    // All the control flow we have derived should be valid.
 435  E :    CheckAllControlFlowIsValid();
 436    :  
 437    :    // ... and we're done.
 438  E :    return kDirectiveContinue;
 439  E :  }
 440    :  
 441  E :  void BasicBlockDecomposer::CheckAllJumpTargetsStartABasicCodeBlock() const {
 442  E :    if (!check_decomposition_results_)
 443  i :      return;
 444    :  
 445  E :    AddressSet::const_iterator addr_iter(jump_targets_.begin());
 446  E :    for (; addr_iter != jump_targets_.end(); ++addr_iter) {
 447    :      // The target basic-block should be a code basic-block.
 448  E :      BasicBlock* target_bb = subgraph_->GetBasicBlockAt(*addr_iter - code_addr_);
 449  E :      CHECK(target_bb != NULL);
 450  E :      CHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, target_bb->type());
 451  E :    }
 452  E :  }
 453    :  
 454  E :  void BasicBlockDecomposer::CheckHasCompleteBasicBlockCoverage() const {
 455  E :    if (!check_decomposition_results_)
 456  i :      return;
 457    :  
 458    :    const BBAddressSpace& basic_block_address_space =
 459  E :        subgraph_->original_address_space();
 460    :  
 461    :    // Walk through the basic-block address space.
 462  E :    Offset next_start = 0;
 463  E :    RangeMapConstIter it(basic_block_address_space.begin());
 464  E :    for (; it != basic_block_address_space.end(); ++it) {
 465  E :      CHECK_EQ(it->first.start(), next_start);
 466  E :      CHECK_EQ(it->first.size(), it->second->size());
 467  E :      next_start += it->first.size();
 468  E :    }
 469    :  
 470    :    // At this point, if there were no gaps, next start will be the same as the
 471    :    // full size of the block we're decomposing.
 472  E :    CHECK_EQ(code_size_, static_cast<size_t>(next_start));
 473  E :  }
 474    :  
 475  E :  void BasicBlockDecomposer::CheckAllControlFlowIsValid() const {
 476  E :    if (!check_decomposition_results_)
 477  i :      return;
 478    :  
 479    :    // Check that the subgraph is valid. This will make sure that the
 480    :    // instructions and successors generally make sense.
 481  E :    CHECK(subgraph_->IsValid());
 482    :  
 483    :    // The only thing left to check is that synthesized flow-through
 484    :    // successors refer to the adjacent basic-blocks.
 485  E :    RangeMapConstIter it(subgraph_->original_address_space().begin());
 486  E :    for (; it != subgraph_->original_address_space().end(); ++it) {
 487  E :      const BasicBlock* bb = it->second;
 488  E :      if (bb->type() != BasicBlock::BASIC_CODE_BLOCK)
 489  E :        continue;
 490    :  
 491  E :      const BasicBlock::Instructions& instructions = bb->instructions();
 492  E :      const BasicBlock::Successors& successors = bb->successors();
 493    :  
 494    :      // There may be at most 2 successors.
 495  E :      switch (successors.size()) {
 496    :        case 0:
 497  E :          break;
 498    :  
 499    :        case 1:
 500    :          // If the successor is synthesized, then flow is from this basic-block
 501    :          // to the next adjacent one.
 502  E :          if (successors.back().instruction_offset() == -1) {
 503  E :            RangeMapConstIter next(it);
 504  E :            ++next;
 505  E :            CHECK(next != subgraph_->original_address_space().end());
 506  E :            CHECK_EQ(successors.back().reference().basic_block(), next->second);
 507  E :          }
 508  E :          break;
 509    :  
 510    :        case 2: {
 511    :          // Exactly one of the successors should have been synthesized.
 512  E :          bool front_synthesized = successors.front().instruction_offset() == -1;
 513  E :          bool back_synthesized = successors.back().instruction_offset() == -1;
 514  E :          CHECK_NE(front_synthesized, back_synthesized);
 515    :  
 516    :          // The synthesized successor flows from this basic-block to the next
 517    :          // adjacent one.
 518    :          const Successor& synthesized =
 519  E :              front_synthesized ? successors.front() : successors.back();
 520  E :          RangeMapConstIter next(it);
 521  E :          ++next;
 522  E :          CHECK(next != subgraph_->original_address_space().end());
 523  E :          CHECK_EQ(synthesized.reference().basic_block(), next->second);
 524  E :          break;
 525    :        }
 526    :  
 527    :        default:
 528  i :          NOTREACHED();
 529    :      }
 530  E :    }
 531  E :  }
 532    :  
 533  E :  void BasicBlockDecomposer::CheckAllLabelsArePreserved() const {
 534  E :    if (!check_decomposition_results_)
 535  i :      return;
 536    :  
 537  E :    const Block* original_block = subgraph_->original_block();
 538  E :    if (original_block == NULL)
 539  i :      return;
 540    :  
 541  E :    const Block::LabelMap original_labels = original_block->labels();
 542  E :    if (original_labels.empty())
 543  i :      return;
 544    :  
 545    :    // A map to track which labels (by offset) have been found in the subgraph.
 546  E :    std::map<Offset, bool> labels_found;
 547    :  
 548    :    // Initialize the map of labels found in the subgraph.
 549  E :    Block::LabelMap::const_iterator label_iter = original_labels.begin();
 550  E :    for (; label_iter != original_labels.end(); ++label_iter)
 551  E :      labels_found.insert(std::make_pair(label_iter->first, false));
 552    :  
 553    :    // Walk through the subgraph and mark all of the labels found.
 554    :    BasicBlockSubGraph::BBCollection::const_iterator bb_iter =
 555  E :        subgraph_->basic_blocks().begin();
 556  E :    for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
 557    :      // Account for labels attached to basic-blocks.
 558  E :      const BasicBlock& bb = bb_iter->second;
 559  E :      if (bb.has_label()) {
 560  E :        BlockGraph::Label label;
 561  E :        CHECK(original_block->GetLabel(bb.offset(), &label));
 562  E :        CHECK(bb.label() == label);
 563  E :        labels_found[bb.offset()] = true;
 564  E :      }
 565    :  
 566    :      // Account for labels attached to instructions.
 567    :      BasicBlock::Instructions::const_iterator inst_iter =
 568  E :          bb.instructions().begin();
 569  E :      for (; inst_iter != bb.instructions().end(); ++inst_iter) {
 570  E :        const Instruction& inst = *inst_iter;
 571  E :        if (inst.has_label()) {
 572  E :          BlockGraph::Label label;
 573  E :          CHECK(original_block->GetLabel(inst.offset(), &label));
 574  E :          CHECK(inst.label() == label);
 575  E :          labels_found[inst.offset()] = true;
 576  E :        }
 577  E :      }
 578    :  
 579    :      // Account for labels attached to successors.
 580    :      BasicBlock::Successors::const_iterator succ_iter =
 581  E :          bb.successors().begin();
 582  E :      for (; succ_iter != bb.successors().end(); ++succ_iter) {
 583  E :        const Successor& succ = *succ_iter;
 584  E :        if (succ.has_label()) {
 585  E :          BlockGraph::Label label;
 586  E :          CHECK_NE(BasicBlock::kNoOffset, succ.instruction_offset());
 587  E :          CHECK(original_block->GetLabel(succ.instruction_offset(), &label));
 588  E :          CHECK(succ.label() == label);
 589  E :          labels_found[succ.instruction_offset()] = true;
 590  E :        }
 591  E :      }
 592  E :    }
 593    :  
 594    :    // We should have the right number of labels_found (check if we added
 595    :    // something to the wrong place).
 596  E :    CHECK_EQ(original_labels.size(), labels_found.size());
 597    :  
 598    :    // Make sure all of the items in labels_found have been set to true.
 599  E :    std::map<Offset, bool>::const_iterator found_iter = labels_found.begin();
 600  E :    for (; found_iter != labels_found.end(); ++found_iter) {
 601  E :      CHECK(found_iter->second);
 602  E :    }
 603  E :  }
 604    :  
 605    :  bool BasicBlockDecomposer::InsertBasicBlockRange(AbsoluteAddress addr,
 606    :                                                   size_t size,
 607  E :                                                   BasicBlockType type) {
 608  E :    DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_instructions_.empty());
 609  E :    DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_successors_.empty());
 610    :  
 611  E :    BasicBlock::Offset offset = addr - code_addr_;
 612  E :    DCHECK_LE(0, offset);
 613    :  
 614    :    // Find or create a name for this basic block. Reserve the label, if any,
 615    :    // to propagate to the basic block if there are no instructions in the
 616    :    // block to carry the label(s).
 617  E :    BlockGraph::Label label;
 618  E :    std::string basic_block_name;
 619  E :    if (block_->GetLabel(offset, &label)) {
 620  E :      basic_block_name = label.ToString();
 621  E :    } else {
 622    :      basic_block_name =
 623    :          base::StringPrintf("<anonymous-%04X-%s>",
 624    :                             addr.value(),
 625  E :                             BasicBlock::BasicBlockTypeToString(type));
 626    :    }
 627    :  
 628    :    // Create the basic block.
 629    :    BasicBlock* new_basic_block = subgraph_->AddBasicBlock(
 630  E :        basic_block_name, type, offset, size, code_ + offset);
 631  E :    if (new_basic_block == NULL)
 632  i :      return false;
 633    :  
 634    :    // Code basic-blocks carry their labels in their instructions and successors.
 635    :    // Data basic-blocks carry their labels at the head of the basic blocks.
 636    :    // A padding basic-block might also be labeled if the block contains
 637    :    // unreachable code (for example, INT3 or NOP instructions following a call
 638    :    // to a non-returning function).
 639  E :    if (type != BasicBlock::BASIC_CODE_BLOCK && label.IsValid()) {
 640  E :      new_basic_block->set_label(label);
 641    :    }
 642    :  
 643    :    // Populate code basic-block with instructions and successors.
 644  E :    if (type == BasicBlock::BASIC_CODE_BLOCK) {
 645  E :      new_basic_block->instructions().swap(current_instructions_);
 646  E :      new_basic_block->successors().swap(current_successors_);
 647    :    }
 648    :  
 649  E :    return true;
 650  E :  }
 651    :  
 652  E :  bool BasicBlockDecomposer::SplitCodeBlocksAtBranchTargets() {
 653    :    // TODO(rogerm): Refactor the basic-block splitting inner-function to the
 654    :    //     BasicBlockSubGraph. Note that the subgraph currently maintains a
 655    :    //     picture of the original address space of the source block. This should
 656    :    //     also be factored out; the original address space is only relevant to
 657    :    //     the BasicBlockDecomposer.
 658  E :    AddressSet::const_iterator jump_target_iter(jump_targets_.begin());
 659  E :    for (; jump_target_iter != jump_targets_.end(); ++jump_target_iter) {
 660    :      // Resolve the target basic-block.
 661  E :      Offset target_offset = *jump_target_iter - code_addr_;
 662  E :      BasicBlock* target_bb = NULL;
 663  E :      BBAddressSpace::Range target_bb_range;
 664    :      CHECK(subgraph_->FindBasicBlock(target_offset, &target_bb,
 665  E :                                      &target_bb_range));
 666    :  
 667    :      // If we're jumping to the start of a basic block, there isn't any work
 668    :      // to do.
 669  E :      if (target_offset == target_bb_range.start())
 670  E :        continue;
 671    :  
 672    :      // Otherwise, we have found a basic-block that we need to split. Let's
 673    :      // create a backup copy of the target basic-block and remove the original
 674    :      // from the basic-block address space. We'll replace it with two new
 675    :      // blocks split at the target offset.
 676  E :      size_t left_split_size = target_offset - target_bb_range.start();
 677  E :      BasicBlock target_bb_copy(*target_bb);
 678  E :      subgraph_->original_address_space().Remove(target_bb_range);
 679  E :      subgraph_->basic_blocks().erase(target_bb->id());
 680  E :      target_bb = &target_bb_copy;
 681    :  
 682    :      // Now we split up containing_range into two new ranges and replace
 683    :      // containing_range with the two new entries.
 684    :  
 685    :      // Setup the first "half" of the basic block. Note that we are reusing
 686    :      // current_instructions_ and current_successors_ so that we can use
 687    :      // InsertBlockRange to create the new basic-blocks.
 688  E :      DCHECK(current_instructions_.empty());
 689  E :      DCHECK(current_successors_.empty());
 690    :      while (!target_bb->instructions().empty() &&
 691  E :             target_bb->instructions().front().offset() < target_offset) {
 692    :        current_instructions_.splice(current_instructions_.end(),
 693    :                                     target_bb->instructions(),
 694  E :                                     target_bb->instructions().begin());
 695  E :      }
 696    :  
 697    :      // The next offset (to an instruction or successor) should correspond to
 698    :      // the target offset.
 699  E :      if (!target_bb->instructions().empty()) {
 700  E :        DCHECK_EQ(target_offset, target_bb->instructions().front().offset());
 701  E :      } else {
 702  i :        DCHECK(!target_bb->successors().empty());
 703    :        DCHECK_EQ(target_offset,
 704  i :                  target_bb->successors().front().instruction_offset());
 705    :      }
 706    :  
 707    :      // Set-up the flow-through successor for the first "half".
 708    :      current_successors_.push_back(Successor(
 709  E :          Successor::kConditionTrue, target_offset, BasicBlock::kNoOffset, 0));
 710    :  
 711    :      // Create the basic-block representing the first "half".
 712    :      if (!InsertBasicBlockRange(code_addr_ + target_bb_range.start(),
 713    :                                 left_split_size,
 714  E :                                 target_bb->type())) {
 715  i :        LOG(ERROR) << "Failed to insert first half of split block.";
 716  i :        return false;
 717    :      }
 718    :  
 719    :      // Create the basic-block representing the second "half".
 720  E :      DCHECK(current_instructions_.empty());
 721  E :      DCHECK(current_successors_.empty());
 722  E :      current_instructions_.swap(target_bb->instructions());
 723  E :      current_successors_.swap(target_bb->successors());
 724    :      if (!InsertBasicBlockRange(code_addr_ + target_offset,
 725    :                                 target_bb_range.size() - left_split_size,
 726  E :                                 target_bb->type())) {
 727  i :        LOG(ERROR) << "Failed to insert second half of split block.";
 728  i :        return false;
 729    :      }
 730  E :    }
 731    :  
 732  E :    return true;
 733  E :  }
 734    :  
 735  E :  bool BasicBlockDecomposer::FillInDataBlocks() {
 736  E :    BlockGraph::Block::LabelMap::const_iterator iter = block_->labels().begin();
 737  E :    BlockGraph::Block::LabelMap::const_iterator end = block_->labels().end();
 738  E :    for (; iter != end; ++iter) {
 739  E :      if (!iter->second.has_attributes(BlockGraph::DATA_LABEL))
 740  E :        continue;
 741    :  
 742  E :      BlockGraph::Block::LabelMap::const_iterator next = iter;
 743  E :      ++next;
 744    :  
 745  E :      BlockGraph::Offset bb_start = iter->first;
 746  E :      BlockGraph::Offset bb_end = (next == end) ? block_->size() : next->first;
 747  E :      size_t bb_size = bb_end - bb_start;
 748  E :      AbsoluteAddress bb_addr(code_addr_ + bb_start);
 749  E :      if (!InsertBasicBlockRange(bb_addr, bb_size, BasicBlock::BASIC_DATA_BLOCK))
 750  i :        return false;
 751  E :    }
 752  E :    return true;
 753  E :  }
 754    :  
 755  E :  bool BasicBlockDecomposer::FillInPaddingBlocks() {
 756    :    BBAddressSpace& basic_block_address_space =
 757  E :        subgraph_->original_address_space();
 758    :  
 759    :    // Add an initial interstitial if needed.
 760    :    size_t interstitial_size = basic_block_address_space.empty() ?
 761  E :        code_size_ : basic_block_address_space.begin()->first.start();
 762  E :    DCHECK_LE(0U, interstitial_size);
 763  E :    if (interstitial_size > 0) {
 764    :      if (!InsertBasicBlockRange(code_addr_,
 765    :                                 interstitial_size,
 766  i :                                 BasicBlock::BASIC_PADDING_BLOCK)) {
 767  i :        LOG(ERROR) << "Failed to insert initial padding block at 0";
 768  i :        return false;
 769    :      }
 770    :    }
 771    :  
 772    :    // Handle all remaining gaps, including the end.
 773  E :    RangeMapConstIter curr_range = basic_block_address_space.begin();
 774  E :    for (; curr_range != basic_block_address_space.end(); ++curr_range) {
 775  E :      RangeMapConstIter next_range = curr_range;
 776  E :      ++next_range;
 777    :      AbsoluteAddress curr_range_end =
 778  E :          code_addr_ + curr_range->first.start() + curr_range->first.size();
 779    :  
 780  E :      interstitial_size = 0;
 781  E :      if (next_range == basic_block_address_space.end()) {
 782  E :        DCHECK_LE(curr_range_end, code_addr_ + code_size_);
 783  E :        interstitial_size = code_addr_ + code_size_ - curr_range_end;
 784  E :      } else {
 785  E :        DCHECK_LE(curr_range_end, code_addr_ + next_range->first.start());
 786    :        interstitial_size =
 787  E :            code_addr_ + next_range->first.start() - curr_range_end;
 788    :      }
 789    :  
 790  E :      if (interstitial_size > 0) {
 791    :        if (!InsertBasicBlockRange(curr_range_end,
 792    :                                   interstitial_size,
 793  E :                                   BasicBlock::BASIC_PADDING_BLOCK)) {
 794  i :          LOG(ERROR) << "Failed to insert padding block at "
 795    :                     << curr_range_end.value();
 796  i :          return false;
 797    :        }
 798    :      }
 799  E :    }
 800    :  
 801  E :    return true;
 802  E :  }
 803    :  
 804  E :  bool BasicBlockDecomposer::CopyExternalReferrers() {
 805  E :    const BlockGraph::Block::ReferrerSet& referrers = block_->referrers();
 806  E :    BlockGraph::Block::ReferrerSet::const_iterator iter = referrers.begin();
 807  E :    for (; iter != referrers.end(); ++iter) {
 808    :      // Find the reference this referrer record describes.
 809  E :      const BlockGraph::Block* referrer = iter->first;
 810  E :      DCHECK(referrer != NULL);
 811    :  
 812    :      // We only care about external referrers. All intra-block referrers and
 813    :      // references will be handled when we populate block_'s references for
 814    :      // basic code/padding blocks, instructions and successors.
 815  E :      if (referrer == block_)
 816  E :        continue;
 817    :  
 818    :      // This is an external referrer. Find the reference in the referring block.
 819  E :      Offset source_offset = iter->second;
 820  E :      BlockGraph::Reference reference;
 821  E :      bool found = referrer->GetReference(source_offset, &reference);
 822  E :      DCHECK(found);
 823    :  
 824    :      // Find the basic block the reference refers to. It can only have an
 825    :      // offset that's different from the base if it's not a code block.
 826  E :      BasicBlock* target_bb = subgraph_->GetBasicBlockAt(reference.base());
 827  E :      DCHECK(target_bb != NULL);
 828    :      DCHECK(reference.base() == reference.offset() ||
 829  E :             target_bb->type() != BasicBlock::BASIC_CODE_BLOCK);
 830    :  
 831    :      // Insert the referrer into the target bb's referrer set. Note that there
 832    :      // is no corresponding reference update to the referring block. The
 833    :      // target bb will track these so a BlockBuilder can properly update
 834    :      // the referrers when merging a subgraph back into the block-graph.
 835    :      bool inserted = target_bb->referrers().insert(
 836  E :          BasicBlockReferrer(referrer, source_offset)).second;
 837  E :      DCHECK(inserted);
 838  E :    }
 839    :  
 840  E :    return true;
 841  E :  }
 842    :  
 843    :  template<typename ItemType>
 844  E :  bool BasicBlockDecomposer::CopyReferences(ItemType* item) {
 845  E :    DCHECK(item != NULL);
 846    :  
 847    :    // Figure out the bounds of item.
 848  E :    BlockGraph::Offset start_offset = item->offset();
 849  E :    BlockGraph::Offset end_offset = start_offset + item->size();
 850    :  
 851    :    // Get iterators encompassing all references within the bounds of item.
 852    :    BlockGraph::Block::ReferenceMap::const_iterator ref_iter =
 853  E :       block_->references().lower_bound(start_offset);
 854    :    BlockGraph::Block::ReferenceMap::const_iterator end_iter =
 855  E :       block_->references().lower_bound(end_offset);
 856    :  
 857  E :    for (; ref_iter != end_iter; ++ref_iter) {
 858    :      // Calculate the local offset of this reference within item.
 859  E :      BlockGraph::Offset local_offset = ref_iter->first - start_offset;
 860  E :      const BlockGraph::Reference& reference = ref_iter->second;
 861    :  
 862    :      // We expect long references for everything except flow control.
 863  E :      CHECK_EQ(4U, reference.size());
 864  E :      DCHECK_LE(local_offset + reference.size(), item->GetMaxSize());
 865    :  
 866  E :      if (reference.referenced() != block_) {
 867    :        // For external references, we can directly reference the other block.
 868    :        bool inserted = item->SetReference(
 869    :            local_offset,
 870    :            BasicBlockReference(reference.type(), reference.size(),
 871    :                                reference.referenced(), reference.offset(),
 872  E :                                reference.base()));
 873  E :        DCHECK(inserted);
 874  E :      } else {
 875    :        // For intra block_ references, find the corresponding basic block in
 876    :        // the basic block address space.
 877  E :        BasicBlock* target_bb = subgraph_->GetBasicBlockAt(reference.base());
 878  E :        DCHECK(target_bb != NULL);
 879    :  
 880    :        // Create target basic-block relative values for the base and offset.
 881  E :        CHECK_EQ(reference.offset(), reference.base());
 882    :  
 883    :        // Insert a reference to the target basic block.
 884    :        bool inserted = item->SetReference(
 885    :            local_offset,
 886  E :            BasicBlockReference(reference.type(), reference.size(), target_bb));
 887  E :        DCHECK(inserted);
 888    :      }
 889  E :    }
 890  E :    return true;
 891  E :  }
 892    :  
 893  E :  bool BasicBlockDecomposer::CopyReferences() {
 894    :    // Copy the references for the source range of each basic-block (by
 895    :    // instruction for code basic-blocks). External referrers and successors are
 896    :    // handled in separate passes.
 897    :    BasicBlockSubGraph::BBCollection::iterator bb_iter =
 898  E :        subgraph_->basic_blocks().begin();
 899  E :    for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
 900  E :      BasicBlock* bb = &bb_iter->second;
 901  E :      if (bb->type() == BasicBlock::BASIC_CODE_BLOCK) {
 902  E :        BasicBlock::Instructions::iterator inst_iter = bb->instructions().begin();
 903  E :        for (; inst_iter != bb->instructions().end(); ++inst_iter) {
 904  E :          if (!CopyReferences(&(*inst_iter)))
 905  i :            return false;
 906  E :        }
 907  E :      } else {
 908  E :        if (!CopyReferences(bb))
 909  i :          return false;
 910    :      }
 911  E :    }
 912  E :    return true;
 913  E :  }
 914    :  
 915  E :  bool BasicBlockDecomposer::ResolveSuccessors() {
 916    :    BasicBlockSubGraph::BBCollection::iterator bb_iter =
 917  E :        subgraph_->basic_blocks().begin();
 918  E :    for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
 919    :      // Only code basic-blocks have successors and instructions.
 920  E :      BasicBlock* bb = &bb_iter->second;
 921  E :      if (bb->type() != BasicBlock::BASIC_CODE_BLOCK) {
 922  E :        DCHECK(bb->successors().empty());
 923  E :        DCHECK(bb->instructions().empty());
 924  E :        continue;
 925    :      }
 926    :  
 927  E :      BasicBlock::Successors::iterator succ_iter = bb->successors().begin();
 928  E :      BasicBlock::Successors::iterator succ_iter_end = bb->successors().end();
 929  E :      for (; succ_iter != succ_iter_end; ++succ_iter) {
 930  E :        if (succ_iter->reference().IsValid()) {
 931    :          // The branch target is already resolved; it must have been inter-block.
 932  E :          DCHECK(succ_iter->reference().block() != block_);
 933  E :          DCHECK(succ_iter->reference().block() != NULL);
 934  E :          continue;
 935    :        }
 936    :  
 937    :        // Find the basic block the successor references.
 938    :        BasicBlock* target_bb =
 939  E :            subgraph_->GetBasicBlockAt(succ_iter->bb_target_offset());
 940  E :        DCHECK(target_bb != NULL);
 941    :  
 942    :        // We transform all successor branches into 4-byte pc-relative targets.
 943    :        bool inserted = succ_iter->SetReference(
 944  E :            BasicBlockReference(BlockGraph::PC_RELATIVE_REF, 4, target_bb));
 945  E :        DCHECK(inserted);
 946  E :        DCHECK(succ_iter->reference().IsValid());
 947  E :      }
 948  E :    }
 949    :  
 950  E :    return true;
 951  E :  }
 952    :  
 953    :  }  // namespace block_graph

Coverage information generated Thu Sep 06 11:30:46 2012.