Coverage for /Syzygy/block_graph/basic_block_decomposer.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
100.0%330.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // A class that attempts to disassemble a function into basic blocks.
  16    :  //
  17    :  // Given a function block (dubbed macro block), this disassembler attempts to
  18    :  // cut it up into sequences of contiguous instruction runs and data blocks. A
  19    :  // contiguous instruction run is defined as a set of instructions that under
  20    :  // normal operation will always run from start to end. This class requires that
  21    :  // all external references to addresses within a function block have an
  22    :  // associated label.
  23    :  
  24    :  #ifndef SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
  25    :  #define SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_
  26    :  
  27    :  #include <set>
  28    :  #include <string>
  29    :  
  30    :  #include "base/basictypes.h"
  31    :  #include "base/callback.h"
  32    :  #include "base/strings/string_piece.h"
  33    :  #include "syzygy/block_graph/basic_block.h"
  34    :  #include "syzygy/block_graph/basic_block_subgraph.h"
  35    :  #include "syzygy/block_graph/block_graph.h"
  36    :  #include "syzygy/core/address.h"
  37    :  #include "syzygy/core/disassembler.h"
  38    :  #include "distorm.h"  // NOLINT
  39    :  
  40    :  namespace block_graph {
  41    :  
  42    :  // This class re-disassembles an already-processed code block (referred to
  43    :  // herein as a macro block) and breaks it up into basic blocks.
  44    :  //
  45    :  // A basic block is defined here as one of:
  46    :  //
  47    :  // 1) A series of code instructions that will be executed contiguously.
  48    :  // 2) A chunk of data (as determined by it being labeled as such).
  49    :  // 3) Padding (or unreachable code)
  50    :  //
  51    :  // The break-down into basic blocks happens in six passes:
  52    :  //
  53    :  // 1) Code disassembly starting from the block's inbound code references. This
  54    :  //    carves all of the basic code blocks and creates control flow (successor)
  55    :  //    relationships. While the basic blocks are being created, intra-block
  56    :  //    successors cannot be resolved and are instead referenced by block offset;
  57    :  //    inter-block successors are immediately resolved.
  58    :  // 2) Data block construction to carve out statically embedded data.
  59    :  // 3) Padding block construction to fill any gaps.
  60    :  // 4) Copying all inbound references (referrers) to their corresponding
  61    :  //    basic block.
  62    :  // 5) Copying all references originating in the block to their corresponding
  63    :  //    basic block.
  64    :  // 6) Wiring up all unresolved intra-block successors.
  65    :  //
  66    :  // The block to be decomposed must have been produced by a successful
  67    :  // PE decomposition (or some facsimile thereof).
  68    :  class BasicBlockDecomposer {
  69    :   public:
  70    :    typedef core::AbsoluteAddress AbsoluteAddress;
  71    :    typedef block_graph::BasicBlock BasicBlock;
  72    :    typedef BasicBlock::BasicBlockType BasicBlockType;
  73    :    typedef block_graph::BlockGraph BlockGraph;
  74    :    typedef BlockGraph::Offset Offset;
  75    :    typedef block_graph::BasicBlockSubGraph BasicBlockSubGraph;
  76    :  
  77    :    // Initialize the BasicBlockDecomposer instance.
  78    :    // @param block The block to be decomposed
  79    :    // @param subgraph The basic-block sub-graph data structure to populate.
  80    :    //     This can be NULL if the results of the decomposition aren't
  81    :    //     necessary.
  82    :    BasicBlockDecomposer(const BlockGraph::Block* block,
  83    :                         BasicBlockSubGraph* subgraph);
  84    :  
  85    :    // Decomposes a function macro block into its constituent basic blocks.
  86    :    //
  87    :    // Immediately following a successful decomposition of a block to
  88    :    // basic-blocks, subgraph will contain all the basic-blocks found in the
  89    :    // source block and exactly one block description: that of the source block.
  90    :    //
  91    :    // Following decomposition, additional block descriptions can be created,
  92    :    // new basic blocks added, and basic blocks shuffled between the descriptions.
  93    :    // The subgraph can then be coalesced back into the BlockGraph from
  94    :    // which the original block came.
  95    :    //
  96    :    // This can set various status bits indicating the reason for the failure.
  97    :    bool Decompose();
  98    :  
  99    :    // @returns true if the decomposition failed because of unsupported
 100    :    //     instructions.
 101  E :    bool contains_unsupported_instructions() const {
 102  E :      return contains_unsupported_instructions_;
 103  E :    }
 104    :  
 105    :   protected:
 106    :    typedef std::map<Offset, BasicBlockReference> BasicBlockReferenceMap;
 107    :    typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
 108    :    typedef BlockGraph::Block::SourceRange SourceRange;
 109    :    typedef BlockGraph::Size Size;
 110    :    typedef std::set<Offset> JumpTargets;
 111    :  
 112    :    // Returns the source range that coincides with the data range
 113    :    // [@p offset, [@p offset + @p size) in the original block.
 114    :    SourceRange GetSourceRange(Offset offset, Size size) const;
 115    :  
 116    :    // Find the basic block, and corresponding byte-range, that contains the
 117    :    // given offset.
 118    :    // @param offset the starting offset you with the returned basic-block/range
 119    :    //     to contain.
 120    :    // @param basic_block the basic-block containing @p offset.
 121    :    // @param range the byte-range in which @p basic_offset resides, which
 122    :    //     contains @p offset.
 123    :    bool FindBasicBlock(Offset offset,
 124    :                        BasicBlock** basic_block,
 125    :                        BBAddressSpace::Range* range) const;
 126    :  
 127    :    // Find the basic block that begins at the given offset.
 128    :    // @param offset The starting offset of the basic block you want to find.
 129    :    // @pre The basic block subgraph is derived from an original block (and
 130    :    //     thus has an address space) and has been broken down into all of its
 131    :    //     constituent basic blocks (i.e., post disassembly and basic-block
 132    :    //     splitting).
 133    :    BasicBlock* GetBasicBlockAt(Offset offset) const;
 134    :  
 135    :    // Helper function to end the current basic block and begin a new one
 136    :    // at @p offset. This is only to be used during disassembly.
 137    :    bool EndCurrentBasicBlock(Offset end_offset);
 138    :  
 139    :    // Walk the function's code in a linear fashion, decomposing the block into
 140    :    // code and data basic blocks forming an original address space.
 141    :    bool Disassemble();
 142    :  
 143    :    // Determines the code range of the block, and creates any data blocks. This
 144    :    // will return false if an invalid block layout is encountered.
 145    :    // @param end Will be filled in with the end of the code range.
 146    :    bool GetCodeRangeAndCreateDataBasicBlocks(Offset* end);
 147    :  
 148    :    // Performs an initial linear pass at disassembling the code bytes of the
 149    :    // block into rudimentary basic blocks. The initial set of basic blocks are
 150    :    // each terminated at branch points. A subsequent pass will further split
 151    :    // basic blocks at branch destinations, see SplitCodeBlocksAtBranchTargets().
 152    :    bool ParseInstructions();
 153    :  
 154    :    // @name Helpers for ParseInstructions().
 155    :    // @{
 156    :  
 157    :    // Initializes jump_targets_ to the set of referenced code locations.
 158    :    // This covers all locations which are externally referenced, as well as
 159    :    // those that are internally referenced via jump tables. These jump targets
 160    :    // may be otherwise un-discoverable through disassembly.
 161    :    // @param code_end_offset the first offset above @p code_begin_offset at
 162    :    //     which the bytes are no longer code.
 163    :    void InitJumpTargets(Offset code_end_offset);
 164    :  
 165    :    // Decode the bytes at @p offset into @p instruction. This function takes
 166    :    // into consideration the range of offsets which denote code.
 167    :    // @param offset The offset of into block_ at which to start decoding.
 168    :    // @param code_end_offset The offset at which the bytes cease to be code.
 169    :    // @param instruction this value will be populated on success.
 170    :    // @returns true on success; false otherwise.
 171    :    // @note Used by ParseInstructions().
 172    :    bool DecodeInstruction(Offset offset,
 173    :                           Offset code_end_offset,
 174    :                           Instruction* instruction) const;
 175    :  
 176    :    // Called for each instruction, this creates the Instruction object
 177    :    // corresponding to @p instruction, or terminates the current basic block
 178    :    // if @p instruction is a branch point.
 179    :    // @param instruction the decoded instruction.
 180    :    // @param offset the offset at which @p instruction occurs in the block.
 181    :    // @returns true on success; false otherwise.
 182    :    // @note Used by ParseInstructions().
 183    :    bool HandleInstruction(const Instruction& instruction, Offset offset);
 184    :  
 185    :    // @}
 186    :  
 187    :    // @name Validation functions.
 188    :    // @{
 189    :    // Verifies that every identified jump target in the original code block
 190    :    // resolves to the start of a basic code block in the original code blocks
 191    :    // basic-block address space. This is protected for unit-testing purposes.
 192    :    void CheckAllJumpTargetsStartABasicCodeBlock() const;
 193    :  
 194    :    // Verifies that the address space derived from the original code block
 195    :    // fully covers the original code block. This is protected for unit-testing
 196    :    // purposes.
 197    :    void CheckHasCompleteBasicBlockCoverage() const;
 198    :  
 199    :    // Verifies that all basic blocks in the address space derived from the
 200    :    // original code block have valid successors or end in an instruction that
 201    :    // does not yield successors.  This is protected for unit-testing purposes.
 202    :    void CheckAllControlFlowIsValid() const;
 203    :  
 204    :    // Verifies that all labels in the original block are present in the
 205    :    // decomposed basic-block subgraph.
 206    :    void CheckAllLabelsArePreserved() const;
 207    :    // @}
 208    :  
 209    :    // Split code basic-blocks at branch targets such that no basic-block
 210    :    // has a reference that it not to its head.
 211    :    void SplitCodeBlocksAtBranchTargets();
 212    :  
 213    :    // Propagate the referrers from the original block into the basic blocks
 214    :    // so that referrers can be tracked as the basic blocks are manipulated.
 215    :    void CopyExternalReferrers();
 216    :  
 217    :    // Helper function to populate @p refs with the set of references originating
 218    :    // from its source range in the original block.
 219    :    void CopyReferences(Offset item_offset,
 220    :                        Size item_size,
 221    :                        BasicBlockReferenceMap* refs);
 222    :  
 223    :    // Propagate the references from the original block into the basic blocks
 224    :    // so that they can be tracked as the basic blocks are manipulated.
 225    :    void CopyReferences();
 226    :  
 227    :    // Resolve intra-block control flow references and referrers.
 228    :    void ResolveSuccessors();
 229    :  
 230    :    // Convert any unreachable code basic block into padding basic blocks.
 231    :    void MarkUnreachableCodeAsPadding();
 232    :  
 233    :    // Inserts a basic block range into the decomposition.
 234    :    bool InsertBasicBlockRange(Offset offset,
 235    :                               size_t size,
 236    :                               BasicBlockType type);
 237    :  
 238    :    // The block being disassembled.
 239    :    const BlockGraph::Block* const block_;
 240    :  
 241    :    // The basic-block sub-graph to which the block will be decomposed.
 242    :    BasicBlockSubGraph* subgraph_;
 243    :  
 244    :    // The layout of the original block into basic blocks in subgraph_.
 245    :    BBAddressSpace original_address_space_;
 246    :  
 247    :    // Tracks locations our conditional branches jump to. Used to fix up basic
 248    :    // blocks by breaking up those that have a jump target in the middle.
 249    :    JumpTargets jump_targets_;
 250    :  
 251    :    // The start offset of the current basic block during a walk.
 252    :    Offset current_block_start_;
 253    :  
 254    :    // The list of instructions in the current basic block.
 255    :    BasicBlock::Instructions current_instructions_;
 256    :  
 257    :    // The set of successors for the current basic block.
 258    :    BasicBlock::Successors current_successors_;
 259    :  
 260    :    // A debugging flag indicating whether the decomposition results should be
 261    :    // CHECKed.
 262    :    bool check_decomposition_results_;
 263    :  
 264    :    // If no explicit subgraph was provided then we need to use one as scratch
 265    :    // space in order to do some work.
 266    :    scoped_ptr<BasicBlockSubGraph> scratch_subgraph_;
 267    :  
 268    :    // Decomposition failure flags.
 269    :    bool contains_unsupported_instructions_;
 270    :  };
 271    :  
 272    :  }  // namespace block_graph
 273    :  
 274    :  #endif  // SYZYGY_BLOCK_GRAPH_BASIC_BLOCK_DECOMPOSER_H_

Coverage information generated Thu Jan 14 17:40:38 2016.