Coverage for /Syzygy/pe/decomposer.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
100.0%220.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // The decomposer decomposes a given image file into a series of blocks
  16    :  // and references by reference to the image's symbols and disassembled
  17    :  // executable code.
  18    :  #ifndef SYZYGY_PE_DECOMPOSER_H_
  19    :  #define SYZYGY_PE_DECOMPOSER_H_
  20    :  
  21    :  #include <windows.h>  // NOLINT
  22    :  #include <dia2.h>
  23    :  #include <map>
  24    :  #include <set>
  25    :  #include <string>
  26    :  #include <vector>
  27    :  
  28    :  #include "base/files/file_path.h"
  29    :  #include "pcrecpp.h"  // NOLINT
  30    :  #include "syzygy/block_graph/block_graph.h"
  31    :  #include "syzygy/core/disassembler.h"
  32    :  #include "syzygy/core/serialization.h"
  33    :  #include "syzygy/pdb/pdb_data.h"
  34    :  #include "syzygy/pe/dia_browser.h"
  35    :  #include "syzygy/pe/image_layout.h"
  36    :  #include "syzygy/pe/pe_file.h"
  37    :  #include "syzygy/pe/pe_file_parser.h"
  38    :  
  39    :  // Fwd.
  40    :  namespace pdb {
  41    :  class PdbStream;
  42    :  class PdbFile;
  43    :  }  // namespace pdb
  44    :  
  45    :  namespace pe {
  46    :  
  47    :  class Decomposer {
  48    :   public:
  49    :    // A struct for storing fixups.
  50    :    struct Fixup;
  51    :    // Used for storing references before the block graph is complete.
  52    :    struct IntermediateReference;
  53    :  
  54    :    typedef block_graph::BlockGraph BlockGraph;
  55    :    typedef core::AbsoluteAddress AbsoluteAddress;
  56    :    typedef core::RelativeAddress RelativeAddress;
  57    :    typedef core::AddressSpace<RelativeAddress, size_t, std::string> DataSpace;
  58    :    typedef core::Disassembler Disassembler;
  59    :    typedef Disassembler::CallbackDirective CallbackDirective;
  60    :    typedef std::map<RelativeAddress, Fixup> FixupMap;
  61    :    typedef std::map<RelativeAddress, IntermediateReference>
  62    :        IntermediateReferenceMap;
  63    :  
  64    :    // Initializes the decomposer for a given image file.
  65    :    // @param image_file the image file to decompose.
  66    :    explicit Decomposer(const PEFile& image_file);
  67    :  
  68    :    // Decomposes the image file into a BlockGraph and an ImageLayout, which
  69    :    // have the breakdown of code and data blocks with typed references and
  70    :    // information on where the blocks resided in the original image,
  71    :    // respectively.
  72    :    // @returns true on success, false on failure. If @p stats is non-null, it
  73    :    // will be populated with decomposition coverage statistics.
  74    :    bool Decompose(ImageLayout* image_layout);
  75    :  
  76    :    // @{
  77    :    // TODO(chrisha): Expose a mechanism for bulk-importing these via some JSON
  78    :    //     representation. We will likely want to expose this on the command-line
  79    :    //     of any utility using Decomposer.
  80    :  
  81    :    // Registers a pair of static initializer search patterns. Each of these
  82    :    // patterns will be converted to a regular expression, and they are required
  83    :    // to produce exactly one match group. The match group must be the same for
  84    :    // each of the patterns in order for the symbols to be correlated to each
  85    :    // other.
  86    :    // @param begin the regular-expression used to find the open bracketing
  87    :    //     symbol.
  88    :    // @param end the regular-expression used to find the end bracketing symbol.
  89    :    // @returns true on success, false otherwise.
  90    :    bool RegisterStaticInitializerPatterns(const base::StringPiece& begin,
  91    :                                           const base::StringPiece& end);
  92    :  
  93    :    // Registers a function as non-returning. This can be used to set
  94    :    // no-return semantics for functions whose debug information is missing or
  95    :    // incomplete.
  96    :    // @param function_name the undecorated function name.
  97    :    // @returns true if the function was added, false if it already existed in
  98    :    //     the set.
  99    :    bool RegisterNonReturningFunction(const base::StringPiece& function_name);
 100    :  
 101    :    // Registers an imported symbol as a non-returning function. This can be used
 102    :    // to set no-return semantics for imported functions (we don't get symbol
 103    :    // information for these).
 104    :    // @param module_name the name of the imported module.
 105    :    // @param function_name the undecorated function name.
 106    :    // @returns true if the function was added, false if it already existed in
 107    :    //     the set.
 108    :    bool RegisterNonReturningImport(const base::StringPiece& module_name,
 109    :                                    const base::StringPiece& function_name);
 110    :    // @}
 111    :  
 112    :    // Sets the PDB path to be used. If this is not called it will be inferred
 113    :    // using the information in the module, and searched for using the OS
 114    :    // search functionality.
 115    :    // @param pdb_path the path to the PDB file to be used in decomposing the
 116    :    //     image.
 117  E :    void set_pdb_path(const base::FilePath& pdb_path) { pdb_path_ = pdb_path; }
 118    :  
 119    :    // Accessor to the PDB path. If Decompose has been called successfully this
 120    :    // will reflect the path of the PDB file that was used to perform the
 121    :    // decomposition.
 122    :    // @returns the PDB path.
 123  E :    const base::FilePath& pdb_path() const { return pdb_path_; }
 124    :  
 125    :   protected:
 126    :    typedef std::map<RelativeAddress, std::string> DataLabels;
 127    :    typedef std::vector<pdb::PdbFixup> PdbFixups;
 128    :  
 129    :    // Searches for (if necessary) the PDB file to be used in the decomposition,
 130    :    // and validates that the file exists and matches the module.
 131    :    bool FindAndValidatePdbPath();
 132    :  
 133    :    // Parse functions and thunks, using their data to annotate blocks.
 134    :    bool ProcessCodeSymbols(IDiaSymbol* globals);
 135    :    // Parses all function symbols.
 136    :    bool ProcessFunctionSymbols(IDiaSymbol* globals);
 137    :    // Create a function or thunk symbol.
 138    :    // @pre @p function is a function or a thunk.
 139    :    bool ProcessFunctionOrThunkSymbol(IDiaSymbol* function);
 140    :    // Create labels for @p function, which corresponds to @p block.
 141    :    bool CreateLabelsForFunction(IDiaSymbol* function, BlockGraph::Block* block);
 142    :    // Create blocks for all thunks in @p globals.
 143    :    // @note thunks are offspring of Compilands.
 144    :    bool ProcessThunkSymbols(IDiaSymbol* globals);
 145    :  
 146    :    // Enumerates labels in @p globals and adds them to the corresponding (code)
 147    :    // blocks.
 148    :    bool CreateGlobalLabels(IDiaSymbol* globals);
 149    :  
 150    :    // Creates a gap block of type @p block_type for the given range. For use by
 151    :    // CreateSectionGapBlocks.
 152    :    bool CreateGapBlock(BlockGraph::BlockType block_type,
 153    :                        RelativeAddress address,
 154    :                        BlockGraph::Size size);
 155    :    // Create blocks of type @p block_type for any gaps in the image
 156    :    // section represented by @p header.
 157    :    bool CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
 158    :                                BlockGraph::BlockType block_type);
 159    :    // Creates gap blocks.
 160    :    bool CreateGapBlocks();
 161    :  
 162    :    // Processes the SectionContribution table, creating code/data blocks from it.
 163    :    bool CreateBlocksFromSectionContribs(IDiaSession* session);
 164    :  
 165    :    // Guesses data block alignments and padding.
 166    :    bool GuessDataBlockAlignments();
 167    :    // Process static initializer data labels, ensuring they remain contiguous.
 168    :    bool ProcessStaticInitializers();
 169    :  
 170    :    // These process symbols in the DIA tree via DiaBrowser and the following
 171    :    // callbacks.
 172    :    bool ProcessDataSymbols(IDiaSymbol* root);
 173    :    bool ProcessPublicSymbols(IDiaSymbol* root);
 174    :  
 175    :    // DiaBrowser callbacks.
 176    :    DiaBrowser::BrowserDirective OnDataSymbol(
 177    :        const DiaBrowser& dia_browser,
 178    :        const DiaBrowser::SymTagVector& sym_tags,
 179    :        const DiaBrowser::SymbolPtrVector& symbols);
 180    :    DiaBrowser::BrowserDirective OnPublicSymbol(
 181    :        const DiaBrowser& dia_browser,
 182    :        const DiaBrowser::SymTagVector& sym_tags,
 183    :        const DiaBrowser::SymbolPtrVector& symbols);
 184    :  
 185    :    // Translates intermediate references to block->block references.
 186    :    bool FinalizeIntermediateReferences();
 187    :  
 188    :    // Checks that the fixups were all visited.
 189    :    bool ConfirmFixupsVisited() const;
 190    :  
 191    :    // Searches through the final block graph, and labels blocks that are
 192    :    // simply padding blocks. This must be called after all references are
 193    :    // finalized.
 194    :    bool FindPaddingBlocks();
 195    :  
 196    :    // Parses the section headers and creates BlockGraph sections.
 197    :    bool CreateSections();
 198    :  
 199    :    // Parses the various debug streams. This populates fixup_map_ as well.
 200    :    bool LoadDebugStreams(IDiaSession* dia_session);
 201    :  
 202    :    // Validates a reference against a matching fixup, or creates a new
 203    :    // intermediate reference from @p src_addr to @p dst_addr of
 204    :    // type @p type and size @p size with optional name @p name. This assumes
 205    :    // an offset of zero.
 206    :    // @returns true if the reference was successfully added, false otherwise.
 207    :    bool AddReferenceCallback(RelativeAddress src_addr,
 208    :                              BlockGraph::ReferenceType type,
 209    :                              BlockGraph::Size size,
 210    :                              RelativeAddress dst_addr);
 211    :    // Parse the relocation entries.
 212    :    bool ParseRelocs();
 213    :    // Uses the fixup map to create cross-block references. These contain
 214    :    // relative references, lookup tables, absolute references, PC-relative from
 215    :    // code references, etc.
 216    :    bool CreateReferencesFromFixups();
 217    :    // Walk relocations and validate them against the fixups.
 218    :    bool ValidateRelocs(const PEFile::RelocMap& reloc_map);
 219    :    // Disassemble all code blocks and create code->code references.
 220    :    bool CreateCodeReferences();
 221    :    // Disassemble @p block and invoke @p on_instruction for each instruction
 222    :    // encountered.
 223    :    bool CreateCodeReferencesForBlock(BlockGraph::Block* block);
 224    :  
 225    :    // Parses the PE BlockGraph header and other important PE structures,
 226    :    // adds them as blocks to the image, and creates the references
 227    :    // they contain.
 228    :    bool CreatePEImageBlocksAndReferences(PEFileParser::PEHeader* header);
 229    :  
 230    :    // Creates a new block with the given properties, and attaches the
 231    :    // data to it. This assumes that no conflicting block exists.
 232    :    BlockGraph::Block* CreateBlock(BlockGraph::BlockType type,
 233    :                                   RelativeAddress address,
 234    :                                   BlockGraph::Size size,
 235    :                                   const base::StringPiece& name);
 236    :  
 237    :    enum FindOrCreateBlockDirective {
 238    :      // Expect that no block exists in the given range and that a block will be
 239    :      // created.
 240    :      kExpectNoBlock,
 241    :      // Allow the existence of a block with identical range to that provided.
 242    :      kAllowIdenticalBlock,
 243    :      // Allow the existence of a block that completely covers the provided range.
 244    :      kAllowCoveringBlock,
 245    :      // Allow the existence of a block that contains the start of the provided
 246    :      // range, but which may not fully contain the provided range.
 247    :      kAllowPartialCoveringBlock,
 248    :    };
 249    :    // Create block for the given @p address and @p size of the given @p type,
 250    :    // or return an existant block that has the same @p type, @p address and
 251    :    // @p size. Care must be taken in using the returned block. Regardless of the
 252    :    // provided directive, the block that is returned may be a strict superset
 253    :    // of the requested range, and offsets into it may need to be calculated.
 254    :    // @returns the block created or found, or NULL if there's a conflicting block
 255    :    //    for the address range.
 256    :    BlockGraph::Block* FindOrCreateBlock(BlockGraph::BlockType type,
 257    :                                         RelativeAddress address,
 258    :                                         BlockGraph::Size size,
 259    :                                         const base::StringPiece& name,
 260    :                                         FindOrCreateBlockDirective directive);
 261    :  
 262    :    // @name OnInstruction helper functions.
 263    :    // @{
 264    :    void MarkDisassembledPastEnd();
 265    :    CallbackDirective LookPastInstructionForData(RelativeAddress instr_end);
 266    :    CallbackDirective VisitNonFlowControlInstruction(RelativeAddress instr_start,
 267    :                                                     RelativeAddress instr_end);
 268    :    CallbackDirective VisitPcRelativeFlowControlInstruction(
 269    :        AbsoluteAddress instr_abs,
 270    :        RelativeAddress instr_rel,
 271    :        const _DInst& instruction,
 272    :        bool end_of_code);
 273    :    CallbackDirective VisitIndirectMemoryCallInstruction(
 274    :        const _DInst& instruction, bool end_of_code);
 275    :    CallbackDirective OnInstruction(const Disassembler& disassembler,
 276    :                                    const _DInst& instruction);
 277    :    // @}
 278    :  
 279    :    // Repairs the DIA "FIXUPS" with any loaded OMAP information, validates them,
 280    :    // and stores them in the given FixupMap.
 281    :    bool OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
 282    :                               const PdbFixups& pdb_fixups);
 283    :  
 284    :    // Check if there's a block-graph stream in the PDB and load it in this case.
 285    :    // @param pdb_path The path of the PDB file.
 286    :    // @param image_file The image file we're decomposing. This is used to set
 287    :    //     block data pointers.
 288    :    // @param image_layout The image-layout we're trying to populate.
 289    :    // @param stream_exist A pointer to a boolean to indicate if the block-graph
 290    :    //     stream exists in the PDB.
 291    :    // @return true if the block-graph has been successfully loaded, false
 292    :    //     otherwise.
 293    :    bool LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
 294    :                               const PEFile& image_file,
 295    :                               ImageLayout* image,
 296    :                               bool* stream_exists);
 297    :  
 298    :    // Load a block-graph from a PDB stream.
 299    :    // @param image_file The image file we're decomposing. This is used to set
 300    :    //     block data pointers.
 301    :    // @param block_graph_stream The stream containing the block-graph.
 302    :    // @param image_layout The image-layout we're trying to populate.
 303    :    // @return true if the block-graph has been successfully loaded, false
 304    :    //     otherwise.
 305    :    bool LoadBlockGraphFromPdbStream(const PEFile& image_file,
 306    :                                     pdb::PdbStream* block_graph_stream,
 307    :                                     ImageLayout* image_layout);
 308    :  
 309    :    // Try to get the block-graph stream from a PDB.
 310    :    // @param pdb_file The PDB file from which the stream will be read.
 311    :    // @returns a scoped pointer to a the stream in case of success, otherwise
 312    :    //     the pointer will contain a NULL reference.
 313    :    scoped_refptr<pdb::PdbStream> GetBlockGraphStreamFromPdb(
 314    :        pdb::PdbFile* pdb_file);
 315    :  
 316    :    // Callback for use with PEFileParser. Will set the NON_RETURN_FUNCTION
 317    :    // attribute for imports that are found in the non_returning_imports_ set.
 318    :    bool OnImportThunkCallback(const char* module_name,
 319    :                               const char* symbol_name,
 320    :                               BlockGraph::Block* thunk);
 321    :  
 322    :    // The image address space we're decomposing to.
 323    :    BlockGraph::AddressSpace* image_;
 324    :  
 325    :    // The image file we're decomposing.
 326    :    // Note that the resultant BlockGraph will contain pointers to the
 327    :    // data in the image file, so the user must ensure the image file
 328    :    // outlives the BlockGraph.
 329    :    const PEFile& image_file_;
 330    :  
 331    :    // The path to the PDB file to be used in decomposing the image.
 332    :    base::FilePath pdb_path_;
 333    :  
 334    :    // Stores intermediate references before the block graph is complete.
 335    :    IntermediateReferenceMap references_;
 336    :  
 337    :    typedef std::set<BlockGraph::Block*> BlockSet;
 338    :    typedef std::set<BlockGraph::AddressSpace::Range> RangeSet;
 339    :    typedef std::map<RelativeAddress, std::string> LabelMap;
 340    :    typedef std::set<RelativeAddress> RelativeAddressSet;
 341    :    typedef pcrecpp::RE RE;
 342    :    typedef std::pair<RE, RE> REPair;
 343    :    typedef std::vector<REPair> REPairs;
 344    :    typedef std::set<std::string> StringSet;
 345    :    typedef std::map<std::string, StringSet> StringSetMap;
 346    :  
 347    :    // @name State tracking for the disassembler.
 348    :    // @{
 349    :    // The block we're currently disassembling. We need this for use in the
 350    :    // OnInstruction callback.
 351    :    BlockGraph::Block* current_block_;
 352    :    // Used to indicate the decomposer's handling of the current block. Needed
 353    :    // for OnInstruction callback.
 354    :    bool be_strict_with_current_block_;
 355    :    // @}
 356    :  
 357    :    // Keeps track of reloc entry information, which is used by various
 358    :    // pieces of the decomposer.
 359    :    PEFile::RelocSet reloc_set_;
 360    :  
 361    :    // Keeps track of fixups, which are necessary if we want to move around
 362    :    // code and data. These are keyed by the location in the image of the
 363    :    // reference. We keep them around so that the disassembly phase can be
 364    :    // validated against them.
 365    :    FixupMap fixup_map_;
 366    :    // A set of static initializer search pattern pairs. These are used to
 367    :    // ensure we don't break up blocks of static initializer function pointers.
 368    :    REPairs static_initializer_patterns_;
 369    :    // A set of functions known to be non-returning but not tagged as such in the
 370    :    // debug symbols.
 371    :    StringSet non_returning_functions_;
 372    :    // A map of module names, each containing a set of known non-returning
 373    :    // functions.
 374    :    StringSetMap non_returning_imports_;
 375    :  };
 376    :  
 377    :  // This stores fixups, but in a format more convenient for us than the
 378    :  // basic PdbFixup struct.
 379    :  struct Decomposer::Fixup {
 380    :    BlockGraph::ReferenceType type;
 381    :    bool refers_to_code;
 382    :    bool is_data;
 383    :    // Has this fixup been visited by our decomposition?
 384    :    bool visited;
 385    :    RelativeAddress location;
 386    :    RelativeAddress base;
 387    :  };
 388    :  
 389    :  // During decomposition we collect references in this format, e.g.
 390    :  // address->address. After thunking up the entire image into blocks,
 391    :  // we convert them to block->block references.
 392    :  // TODO(siggi): Is there reason to keep these in an address space to guard
 393    :  //     against overlapping references?
 394    :  struct Decomposer::IntermediateReference {
 395    :    BlockGraph::ReferenceType type;
 396    :    BlockGraph::Size size;
 397    :    // A reference actually takes the form of a pointer that is offset
 398    :    // from a base address (its intended target). Direct references will
 399    :    // have offset = 0, but this allows us to represent offset references
 400    :    // into data as seen in loop induction variables, etc.
 401    :    RelativeAddress base;
 402    :    BlockGraph::Offset offset;
 403    :  };
 404    :  
 405    :  }  // namespace pe
 406    :  
 407    :  #endif  // SYZYGY_PE_DECOMPOSER_H_

Coverage information generated Thu Jul 04 09:34:53 2013.