Coverage for /Syzygy/block_graph/basic_block_decomposer.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
0.0%0057.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // A class that attempts to disassemble a function into basic blocks.
  16    :  //
  17    :  // Given a function block (dubbed macro block), this disassembler attempts to
  18    :  // cut it up into sequences of contiguous instruction runs and data blocks. A
  19    :  // contiguous instruction run is defined as a set of instructions that under
  20    :  // normal operation will always run from start to end. This class requires that
  21    :  // all external references to addresses within a function block have an
  22    :  // associated label.
  23    :  
  24    :  #ifndef SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
  25    :  #define SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
  26    :  
  27    :  #include <set>
  28    :  #include <string>
  29    :  
  30    :  #include "base/basictypes.h"
  31    :  #include "base/callback.h"
  32    :  #include "base/string_piece.h"
  33    :  #include "syzygy/block_graph/basic_block.h"
  34    :  #include "syzygy/block_graph/basic_block_subgraph.h"
  35    :  #include "syzygy/block_graph/block_graph.h"
  36    :  #include "syzygy/core/address.h"
  37    :  #include "syzygy/core/disassembler.h"
  38    :  #include "distorm.h"  // NOLINT
  39    :  
  40  m :  namespace block_graph {
  41    :  
  42    :  // This class re-disassembles an already-processed code block (referred to
  43    :  // herein as a macro block) and breaks it up into basic blocks.
  44    :  //
  45    :  // A basic block is defined here as one of:
  46    :  //
  47    :  // 1) A series of code instructions that will be executed contiguously.
  48    :  // 2) A chunk of data (as determined by it being labeled as such).
  49    :  // 3) Padding (or unreachable code)
  50    :  //
  51    :  // The break-down into basic blocks happens in six passes:
  52    :  //
  53    :  // 1) Code disassembly starting from the block's inbound code references. This
  54    :  //    carves all of the basic code blocks and creates control flow (successor)
  55    :  //    relationships. While the basic blocks are being created, intra-block
  56    :  //    successors cannot be resolved and are instead referenced by block offset;
  57    :  //    inter-block successors are immediately resolved.
  58    :  // 2) Data block construction to carve out statically embedded data.
  59    :  // 3) Padding block construction to fill any gaps.
  60    :  // 4) Copying all inbound references (referrers) to their corresponding
  61    :  //    basic block.
  62    :  // 5) Copying all references originating in the block to their corresponding
  63    :  //    basic block.
  64    :  // 6) Wiring up all unresolved intra-block successors.
  65    :  //
  66    :  // The block to be decomposed must have been produced by a successful
  67    :  // PE decomposition (or some facsimile thereof).
  68  m :  class BasicBlockDecomposer {
  69  m :   public:
  70  m :    typedef core::AbsoluteAddress AbsoluteAddress;
  71  m :    typedef block_graph::BasicBlock BasicBlock;
  72  m :    typedef BasicBlock::BasicBlockType BasicBlockType;
  73  m :    typedef block_graph::BlockGraph BlockGraph;
  74  m :    typedef BlockGraph::Offset Offset;
  75  m :    typedef block_graph::BasicBlockSubGraph BasicBlockSubGraph;
  76    :  
  77    :    // Initialize the BasicBlockDecomposer instance.
  78    :    // @param block The block to be decomposed
  79    :    // @param subgraph The basic-block sub-graph data structure to populate.
  80    :    // @pre block is safe for basic-block decomposition. That is,
  81    :    //     CodeBlockAttributesAreBasicBlockSafe(block) returns true.
  82  m :    BasicBlockDecomposer(const BlockGraph::Block* block,
  83  m :                         BasicBlockSubGraph* subgraph);
  84    :  
  85    :    // Decomposes a function macro block into its constituent basic blocks.
  86    :    //
  87    :    // Immediately following a successful decomposition of a block to
  88    :    // basic-blocks, subgraph will contain all the basic-blocks found in the
  89    :    // source block and exactly one block description: that of the source block.
  90    :    //
  91    :    // Following decomposition, additional block descriptions can be created,
  92    :    // new basic blocks added, and basic blocks shuffled between the descriptions.
  93    :    // The subgraph can then be coalesced back into the BlockGraph from
  94    :    // which the original block came.
  95  m :    bool Decompose();
  96    :  
  97  m :   protected:
  98  m :    typedef std::map<Offset, BasicBlockReference> BasicBlockReferenceMap;
  99  m :    typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
 100  m :    typedef BlockGraph::Block::SourceRange SourceRange;
 101  m :    typedef BlockGraph::Size Size;
 102  m :    typedef std::set<Offset> JumpTargets;
 103    :  
 104    :    // Returns the source range that coincides with the data range
 105    :    // [@p offset, [@p offset + @p size) in the original block.
 106  m :    SourceRange GetSourceRange(Offset offset, Size size) const;
 107    :  
 108    :    // Find the basic block, and corresponding byte-range, that contains the
 109    :    // given offset.
 110    :    // @param offset the starting offset you with the returned basic-block/range
 111    :    //     to contain.
 112    :    // @param basic_block the basic-block containing @p offset.
 113    :    // @param range the byte-range in which @p basic_offset resides, which
 114    :    //     contains @p offset.
 115  m :    bool FindBasicBlock(Offset offset,
 116  m :                        BasicBlock** basic_block,
 117  m :                        BBAddressSpace::Range* range) const;
 118    :  
 119    :    // Find the basic block that begins at the given offset.
 120    :    // @param offset The starting offset of the basic block you want to find.
 121    :    // @pre The basic block subgraph is derived from an original block (and
 122    :    //     thus has an address space) and has been broken down into all of its
 123    :    //     constituent basic blocks (i.e., post disassembly and basic-block
 124    :    //     splitting).
 125  m :    BasicBlock* GetBasicBlockAt(Offset offset) const;
 126    :  
 127    :    // Helper function to end the current basic block and begin a new one
 128    :    // at @p offset. This is only to be used during disassembly.
 129  m :    bool EndCurrentBasicBlock(Offset end_offset);
 130    :  
 131    :    // Walk the function's code in a linear fashion, decomposing the block into
 132    :    // code and data basic blocks forming an original address space.
 133  m :    bool Disassemble();
 134    :  
 135    :    // Performs an initial linear pass at disassembling the code bytes of the
 136    :    // block into rudimentary basic blocks. The initial set of basic blocks are
 137    :    // each terminated at branch points. A subsequent pass will further split
 138    :    // basic blocks at branch destinations, see SplitCodeBlocksAtBranchTargets().
 139  m :    bool ParseInstructions();
 140    :  
 141    :    // @name Helpers for ParseInstructions().
 142    :    // @{
 143    :  
 144    :    // Initializes jump_targets_ to the set of referenced code locations.
 145    :    // This covers all locations which are externally referenced, as well as
 146    :    // those that are internally referenced via jump tables. These jump targets
 147    :    // may be otherwise un-discoverable through disassembly.
 148    :    // @param code_begin_offset the offset at which code bytes begin.
 149    :    // @param code_end_offset the first offset above @p code_begin_offset at
 150    :    //     which the bytes are no longer code.
 151  m :    void InitJumpTargets(Offset code_begin_offset, Offset code_end_offset);
 152    :  
 153    :    // Decode the bytes at @p offset into @p instruction. This function takes
 154    :    // into consideration the range of offsets which denote code.
 155    :    // @param offset The offset of into block_ at which to start decoding.
 156    :    // @param code_end_offset The offset at which the bytes cease to be code.
 157    :    // @param instruction this value will be populated on success.
 158    :    // @returns true on success; false otherwise.
 159    :    // @note Used by ParseInstructions().
 160  m :    bool DecodeInstruction(Offset offset,
 161  m :                           Offset code_end_offset,
 162  m :                           Instruction* instruction) const;
 163    :  
 164    :    // Called for each instruction, this creates the Instruction object
 165    :    // corresponding to @p instruction, or terminates the current basic block
 166    :    // if @p instruction is a branch point.
 167    :    // @param instruction the decoded instruction.
 168    :    // @param offset the offset at which @p instruction occurs in the block.
 169    :    // @returns true on success; false otherwise.
 170    :    // @note Used by ParseInstructions().
 171  m :    bool HandleInstruction(const Instruction& instruction, Offset offset);
 172    :  
 173    :    // @}
 174    :  
 175    :    // @name Validation functions.
 176    :    // @{
 177    :    // Verifies that every identified jump target in the original code block
 178    :    // resolves to the start of a basic code block in the original code blocks
 179    :    // basic-block address space. This is protected for unit-testing purposes.
 180  m :    void CheckAllJumpTargetsStartABasicCodeBlock() const;
 181    :  
 182    :    // Verifies that the address space derived from the original code block
 183    :    // fully covers the original code block. This is protected for unit-testing
 184    :    // purposes.
 185  m :    void CheckHasCompleteBasicBlockCoverage() const;
 186    :  
 187    :    // Verifies that all basic blocks in the address space derived from the
 188    :    // original code block have valid successors or end in an instruction that
 189    :    // does not yield successors.  This is protected for unit-testing purposes.
 190  m :    void CheckAllControlFlowIsValid() const;
 191    :  
 192    :    // Verifies that all labels in the original block are present in the
 193    :    // decomposed basic-block subgraph.
 194  m :    void CheckAllLabelsArePreserved() const;
 195    :    // @}
 196    :  
 197    :    // Split code basic-blocks at branch targets such that no basic-block
 198    :    // has a reference that it not to its head.
 199  m :    bool SplitCodeBlocksAtBranchTargets();
 200    :  
 201    :    // Creates basic blocks for all known data symbols in the block.
 202    :    // @returns true on success.
 203  m :    bool FillInDataBlocks();
 204    :  
 205    :    // Propagate the referrers from the original block into the basic blocks
 206    :    // so that referrers can be tracked as the basic blocks are manipulated.
 207  m :    bool CopyExternalReferrers();
 208    :  
 209    :    // Helper function to populate @p refs with the set of references originating
 210    :    // from its source range in the original block.
 211  m :    bool CopyReferences(Offset item_offset,
 212  m :                        Size item_size,
 213  m :                        BasicBlockReferenceMap* refs);
 214    :  
 215    :    // Propagate the references from the original block into the basic blocks
 216    :    // so that they can be tracked as the basic blocks are manipulated.
 217  m :    bool CopyReferences();
 218    :  
 219    :    // Resolve intra-block control flow references and referrers.
 220  m :    bool ResolveSuccessors();
 221    :  
 222    :    // Convert any unreachable code basic block into padding basic blocks.
 223  m :    void MarkUnreachableCodeAsPadding();
 224    :  
 225    :    // Inserts a basic block range into the decomposition.
 226  m :    bool InsertBasicBlockRange(Offset offset,
 227  m :                               size_t size,
 228  m :                               BasicBlockType type);
 229    :  
 230    :    // The block being disassembled.
 231  m :    const BlockGraph::Block* const block_;
 232    :  
 233    :    // The basic-block sub-graph to which the block will be decomposed.
 234  m :    BasicBlockSubGraph* subgraph_;
 235    :  
 236    :    // The layout of the original block into basic blocks in subgraph_.
 237  m :    BBAddressSpace original_address_space_;
 238    :  
 239    :    // Tracks locations our conditional branches jump to. Used to fix up basic
 240    :    // blocks by breaking up those that have a jump target in the middle.
 241  m :    JumpTargets jump_targets_;
 242    :  
 243    :    // The start offset of the current basic block during a walk.
 244  m :    Offset current_block_start_;
 245    :  
 246    :    // The list of instructions in the current basic block.
 247  m :    BasicBlock::Instructions current_instructions_;
 248    :  
 249    :    // The set of successors for the current basic block.
 250  m :    BasicBlock::Successors current_successors_;
 251    :  
 252    :    // A debugging flag indicating whether the decomposition results should be
 253    :    // CHECKed.
 254  m :    bool check_decomposition_results_;
 255  m :  };
 256    :  
 257  m :  }  // namespace block_graph
 258    :  
 259    :  #endif  // SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_

Coverage information generated Thu Jul 04 09:34:53 2013.