Coverage for /Syzygy/block_graph/block_graph.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
100.0%1161160.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // A block graph is a an abstract graph of blocks, each of which has an ID,
  16    :  // a type, a size and a few other properties. Each block represents either code
  17    :  // or data, and blocks can reference one another through references of various
  18    :  // types.
  19    :  //
  20    :  // The BlockGraph also stores minimum knowledge of sections (names and
  21    :  // characteristics), and each block belongs to at most one section. In this
  22    :  // sense, a BlockGraph acts as top-level division of blocks.
  23    :  
  24    :  #ifndef SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
  25    :  #define SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
  26    :  
  27    :  #include <hash_map>
  28    :  #include <map>
  29    :  #include <set>
  30    :  #include <string>
  31    :  #include <vector>
  32    :  
  33    :  #include "base/basictypes.h"
  34    :  #include "base/file_util.h"
  35    :  #include "base/string_piece.h"
  36    :  #include "syzygy/common/align.h"
  37    :  #include "syzygy/core/address.h"
  38    :  #include "syzygy/core/address_space.h"
  39    :  
  40    :  namespace block_graph {
  41    :  
  42    :  // Forward declaration.
  43    :  class BlockGraphSerializer;
  44    :  
  45    :  // The invalid address can never occur in an graph, it's used as default
  46    :  // value for block addresses.
  47    :  extern const core::RelativeAddress kInvalidAddress;
  48    :  
  49    :  #define BLOCK_ATTRIBUTE_ENUM(F) \
  50    :      /* Set for functions declared non-returning. */ \
  51    :      F(NON_RETURN_FUNCTION) \
  52    :      /* Set for blocks that are inferred by the decomposer. */ \
  53    :      F(GAP_BLOCK) \
  54    :      /* Set for blocks that are parsed by the PEFileParser. These */ \
  55    :      /* blocks are unmovable, indivisible, etc, and have to be treated */ \
  56    :      /* specially. */ \
  57    :      F(PE_PARSED) \
  58    :      /* Set for blocks that are created from section contribution */ \
  59    :      /* information. */ \
  60    :      F(SECTION_CONTRIB) \
  61    :      /* This is used to indicate that a block consists purely of padding */ \
  62    :      /* data. */ \
  63    :      F(PADDING_BLOCK) \
  64    :      /* Indicates blocks that contain inline assembly. */ \
  65    :      F(HAS_INLINE_ASSEMBLY) \
  66    :      /* Indicates that the block was built by a compiler whose precise */ \
  67    :      /* behaviour and semantics we are unfamiliar with. */ \
  68    :      F(BUILT_BY_UNSUPPORTED_COMPILER) \
  69    :      /* Indicates that the block has been built by the Syzygy toolchain, and */ \
  70    :      /* thus is inherently safe for basic-block decomposition without having */ \
  71    :      /* to perform the myriad of safety checks we do otherwise. */ \
  72    :      F(BUILT_BY_SYZYGY) \
  73    :      /* This is set for blocks whose initial disassembly was incomplete. */ \
  74    :      /* This is not necessarily an error, as we see have seen blocks with */ \
  75    :      /* unreachable code, even in release mode. */ \
  76    :      F(INCOMPLETE_DISASSEMBLY) \
  77    :      /* This is set for blocks whose disassembly was unable to finish due to */ \
  78    :      /* an error. This block has violated assumptions that we make or */ \
  79    :      /* conventions that we have observed the compiler to use. It is not safe */\
  80    :      /* for basic block disassembly. */ \
  81    :      F(ERRORED_DISASSEMBLY) \
  82    :      /* This is set for functions that have exception handling enabled. */ \
  83    :      /* Without delving far deeper into the specifics, it is unsafe to basic */ \
  84    :      /* block decompose these blocks. */ \
  85    :      F(HAS_EXCEPTION_HANDLING) \
  86    :      /* This is set for blocks whose disassembly went off the end of the */ \
  87    :      /* block, or into data. These blocks have control flow that we are not */ \
  88    :      /* aware of, or are otherwise malformed. */ \
  89    :      F(DISASSEMBLED_PAST_END) \
  90    :      /* This is set for blocks that have a thunk symbol pointing to them. */ \
  91    :      /* Typically thunk blocks are compiler or linker-generated, such as */ \
  92    :      /* e.g. import thunks, delay load import thunks, etc. */ \
  93    :      F(THUNK) \
  94    :      /* This is set for blocks that have been parsed as COFF groups. The */ \
  95    :      /* contents of these blocks are semantically indivisible. */ \
  96    :      F(COFF_GROUP) \
  97    :      /* This always needs to be set to the next available attribute bit. */ \
  98    :      F(BLOCK_ATTRIBUTES_MAX)
  99    :  
 100    :  // The BlockGraph is a top-level container for Blocks.
 101    :  class BlockGraph {
 102    :   public:
 103    :    typedef core::RelativeAddress RelativeAddress;
 104    :  
 105    :    typedef size_t SectionId;
 106    :    typedef size_t BlockId;
 107    :    typedef size_t Size;
 108    :    typedef ptrdiff_t Offset;
 109    :    typedef uint32 BlockAttributes;
 110    :    typedef uint32 LabelAttributes;
 111    :  
 112    :    // The BlockGraph maintains a list of sections, and each block belongs
 113    :    // to one of them. This is the set of information we keep regarding them.
 114    :    struct Section;
 115    :    // The section map contains all sections, indexed by id.
 116    :    typedef std::map<SectionId, Section> SectionMap;
 117    :  
 118    :    static const SectionId kInvalidSectionId;
 119    :  
 120    :    // Assign distinct bit IDs to each attribute constant.
 121    :    enum BlockAttributeIdEnum {
 122    :  #define DECLARE_ENUM_BIT(name) name##_BIT,
 123    :      BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM_BIT)
 124    :  #undef DECLARE_ENUM_BIT
 125    :    };
 126    :  
 127    :    enum BlockAttributeEnum {
 128    :  #define DECLARE_ENUM(name) name = (1 << name##_BIT),
 129    :      BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM)
 130    :  #undef DECLARE_ENUM
 131    :    };
 132    :  
 133    :    // Returns a string containing the names of the supplied attributes.
 134    :    static std::string BlockAttributesToString(BlockAttributes attrs);
 135    :  
 136    :    enum BlockType {
 137    :      CODE_BLOCK,
 138    :      DATA_BLOCK,
 139    :  
 140    :      // NOTE: This must always be last, and kBlockType must be kept in sync
 141    :      // with this enum.
 142    :      BLOCK_TYPE_MAX
 143    :    };
 144    :  
 145    :    static const char* BlockTypeToString(BlockType type);
 146    :  
 147    :    // Label attributes. Attributes of the form _END_LABEL type actually
 148    :    // point to the first byte past the range they delineate. To make the
 149    :    // semantics of moving labels easier, we shift these labels left by one and
 150    :    // make them follow the last byte of the delineated range.
 151    :    enum LabelAttributesEnum {
 152    :      // The label points to an entry-point in a code block.
 153    :      CODE_LABEL = (1 << 0),
 154    :  
 155    :      // Mark the start and end of the debuggable portion of a code block.
 156    :      DEBUG_START_LABEL = (1 << 1),
 157    :      DEBUG_END_LABEL = (1 << 2),
 158    :  
 159    :      // Mark the start and end of an embedded scope in a code block.
 160    :      SCOPE_START_LABEL = (1 << 3),
 161    :      SCOPE_END_LABEL = (1 << 4),
 162    :  
 163    :      // Marks the location of a (virtual table?) call.
 164    :      CALL_SITE_LABEL = (1 << 5),
 165    :  
 166    :      // The label points to the start of a jump table. The length is inferred
 167    :      // by the location of the next label, or the end of the block. This will
 168    :      // also have DATA_LABEL set.
 169    :      JUMP_TABLE_LABEL = (1 << 6),
 170    :      // The label points to the start of a case table. The length is inferred
 171    :      // by the location of the next label, or the end of the block. This will
 172    :      // also have DATA_LABEL set.
 173    :      CASE_TABLE_LABEL = (1 << 7),
 174    :      // The label originated from a data symbol. The length is inferred by the
 175    :      // location of the next label, or the end of the block. The type of data
 176    :      // is unknown.
 177    :      DATA_LABEL = (1 << 8),
 178    :  
 179    :      // Used to mark a label that was derived from a public symbol. These are
 180    :      // usually actually pointing to code and global data symbols, but we can't
 181    :      // always tell (there may be public symbols pointing to data in a code
 182    :      // block).
 183    :      PUBLIC_SYMBOL_LABEL = (1 << 9),
 184    :  
 185    :      // This always needs to be the most significant bit.
 186    :      LABEL_ATTRIBUTES_MAX = (1 << 10),
 187    :    };
 188    :  
 189    :    static std::string LabelAttributesToString(LabelAttributes label_attributes);
 190    :  
 191    :    enum ReferenceType {
 192    :      PC_RELATIVE_REF,
 193    :      ABSOLUTE_REF,
 194    :      RELATIVE_REF,
 195    :      FILE_OFFSET_REF,
 196    :      // Must be last!
 197    :      REFERENCE_TYPE_MAX,
 198    :    };
 199    :  
 200    :    // Forward declarations.
 201    :    class AddressSpace;
 202    :    class Block;
 203    :    class Label;
 204    :    class Reference;
 205    :  
 206    :    // The block map contains all blocks, indexed by id.
 207    :    typedef std::map<BlockId, Block> BlockMap;
 208    :  
 209    :    BlockGraph();
 210    :    ~BlockGraph();
 211    :  
 212    :    // Adds a section with the given name.
 213    :    //
 214    :    // @param name The section name.
 215    :    // @param characteristics The section characteristics.
 216    :    // @returns the newly created section.
 217    :    Section* AddSection(const base::StringPiece& name, uint32 characteristics);
 218    :  
 219    :    // Finds a section with the given name, returning NULL if no such section
 220    :    // exists.
 221    :    //
 222    :    // @param name The section name.
 223    :    // @returns the section if one is found, NULL otherwise.
 224    :    Section* FindSection(const base::StringPiece& name);
 225    :  
 226    :    // Find or adds section with the given name.
 227    :    //
 228    :    // If a section with the given name already exists, updates its
 229    :    // characteristics and returns it. Otherwise, creates a new section and
 230    :    // returns it. If multiple sections exist with the given name, the first
 231    :    // one encountered is returned.
 232    :    //
 233    :    // TODO(chrisha): The semantics of this function are a little odd. It would
 234    :    //     make more sense for it to return only if a section with matching name
 235    :    //     AND characteristics is found, otherwise to create a new one.
 236    :    //
 237    :    // @param name The section name.
 238    :    // @param characteristics The section characteristics.
 239    :    // @returns the new or found section.
 240    :    Section* FindOrAddSection(const base::StringPiece& name,
 241    :                              uint32 characteristics);
 242    :  
 243    :    // Removes the given section from the BlockGraph.
 244    :    //
 245    :    // The section must belong to this block graph. Be aware that this can leave
 246    :    // Blocks with dangling section_ids.
 247    :    //
 248    :    // @param section The section to remove.
 249    :    // @returns true on success, false otherwise.
 250    :    bool RemoveSection(Section* section);
 251    :  
 252    :    // Removes the section with the given id from the BlockGraph.
 253    :    //
 254    :    // @param section The id of the section to remove.
 255    :    // @returns true on success, false otherwise.
 256    :    bool RemoveSectionById(SectionId id);
 257    :  
 258    :    // Add @p block of type @p type and @p size and
 259    :    // return the new block.
 260    :    // @returns the new block.
 261    :    Block* AddBlock(BlockType type, Size size, const base::StringPiece& name);
 262    :  
 263    :    // Deletes the given block from the BlockGraph. The block must belong to this
 264    :    // block graph, and have no references or referrers. Returns true on success,
 265    :    // false otherwise. On failure, the BlockGraph has not been changed.
 266    :    bool RemoveBlock(Block* block);
 267    :  
 268    :    // Deletes the block with the given @p id from the block graph. The block id
 269    :    // must be valid, and the block must have no references or referrers. Returns
 270    :    // true on success, false otherwise. On failure, the BlockGraph has not been
 271    :    // changed.
 272    :    bool RemoveBlockById(BlockId id);
 273    :  
 274    :    // Accessors.
 275  E :    const SectionMap& sections() const { return sections_; }
 276  E :    SectionMap& sections_mutable() { return sections_; }
 277  E :    const BlockMap& blocks() const { return blocks_; }
 278  E :    BlockMap& blocks_mutable() { return blocks_; }
 279    :  
 280    :    // @{
 281    :    // Retrieve the section with the given id.
 282    :    //
 283    :    // @param id The id of the section to retrieve.
 284    :    // @returns the section in question or NULL if no such section.
 285    :    Section* GetSectionById(SectionId id);
 286    :    const Section* GetSectionById(SectionId id) const;
 287    :    // @}
 288    :  
 289    :    // @{
 290    :    // Retrieve the block with id.
 291    :    // @returns the block in question or NULL if no such block.
 292    :    Block* GetBlockById(BlockId id);
 293    :    const Block* GetBlockById(BlockId id) const;
 294    :    // @}
 295    :  
 296    :   private:
 297    :    // Give BlockGraphSerializer access to our innards for serialization.
 298    :    friend BlockGraphSerializer;
 299    :  
 300    :    // Removes a block by the iterator to it. The iterator must be valid.
 301    :    bool RemoveBlockByIterator(BlockMap::iterator it);
 302    :  
 303    :    // All sections we contain.
 304    :    SectionMap sections_;
 305    :  
 306    :    // Our section ID allocator.
 307    :    SectionId next_section_id_;
 308    :  
 309    :    // All blocks we contain.
 310    :    BlockMap blocks_;
 311    :  
 312    :    // Our block ID allocator.
 313    :    BlockId next_block_id_;
 314    :  };
 315    :  
 316    :  // The BlockGraph maintains a list of sections, and each block belongs
 317    :  // to one of them. This is the set of information we keep regarding them.
 318    :  struct BlockGraph::Section {
 319    :    // Default constructor. Required for serialization.
 320  E :    Section() : id_(kInvalidSectionId), characteristics_(0) {
 321  E :    }
 322    :  
 323    :    // Full constructor.
 324    :    //
 325    :    // @param id The section id. This must not be kInvalidSectionId.
 326    :    // @param name The name of the section. Must not be empty or NULL.
 327    :    // @param characteristics The characteristics of the section.
 328  E :    Section(SectionId id, const base::StringPiece& name, uint32 characteristics)
 329    :        : id_(id), name_(), characteristics_(characteristics) {
 330  E :      DCHECK_NE(kInvalidSectionId, id);
 331  E :      DCHECK(name != NULL);
 332  E :      name.CopyToString(&name_);
 333  E :      DCHECK(!name_.empty());
 334  E :    }
 335    :  
 336    :    // Get the id of this section.
 337    :    //
 338    :    // @returns the id of the section.
 339  E :    SectionId id() const { return id_; }
 340    :  
 341    :    // Get the name of this section.
 342    :    //
 343    :    // @returns the section name.
 344  E :    const std::string& name() const { return name_; }
 345    :  
 346    :    // Sets the name for this section.
 347    :    //
 348    :    // @param name The name of the section. If NULL or empty, this will fail.
 349    :    // @returns true if the name is set, false otherwise.
 350    :    bool set_name(const base::StringPiece& name);
 351    :  
 352    :    // Get the characteristics of this section.
 353    :    //
 354    :    // @returns the section characteristics.
 355  E :    uint32 characteristics() const { return characteristics_; }
 356    :  
 357    :    // Sets the characteristics for this section.
 358    :    //
 359    :    // @param characteristics The new characteristics to set.
 360  E :    void set_characteristics(uint32 characteristics) {
 361  E :      characteristics_ = characteristics;
 362  E :    }
 363    :  
 364    :    // Sets a one or more additional characteristics for this section.
 365    :    //
 366    :    // @param characteristic The new characteristic(s) to set for this section.
 367  E :    void set_characteristic(uint32 characteristic) {
 368  E :      characteristics_ |= characteristic;
 369  E :    }
 370    :  
 371    :    // Clears one or more characteristics for this section.
 372    :    //
 373    :    // @param characteristic The characteristic(s) to clear for this section.
 374  E :    void clear_characteristic(uint32 characteristic) {
 375  E :      characteristics_ &= ~characteristic;
 376  E :    }
 377    :  
 378    :    // @name Serialization functions.
 379    :    // @{
 380    :    bool Save(core::OutArchive* out_archive) const;
 381    :    bool Load(core::InArchive* in_archive);
 382    :    // @}
 383    :  
 384    :    // A simple comparison operator for serialization tests.
 385  E :    bool operator==(const Section& other) const {
 386    :      return id_ == other.id_ && name_ == other.name_ &&
 387  E :          characteristics_ == other.characteristics_;
 388  E :    }
 389    :  
 390    :    // A not-equal comparison operator.
 391  E :    bool operator!=(const Section& other) const {
 392  E :      return !operator==(other);
 393  E :    }
 394    :  
 395    :   private:
 396    :    // The id of the section. This has no particular meaning other than as a way
 397    :    // to identify sections uniquely.
 398    :    SectionId id_;
 399    :    // The name of the section. This will be truncated to a max of 8 characters
 400    :    // on output.
 401    :    std::string name_;
 402    :    // The section characteristics, a bitmask of IMAGE_SCN_* values.
 403    :    uint32 characteristics_;
 404    :  };
 405    :  
 406    :  // A label denotes the beginning (or end) of a sub-region within a (code)
 407    :  // block. In particular, a code label represents an instruction boundary
 408    :  // at which disassembly can begin and a data label represents the beginning
 409    :  // of embedded data.
 410    :  class BlockGraph::Label {
 411    :   public:
 412    :    // Default constructor.
 413  E :    Label() : attributes_(0) {
 414  E :    }
 415    :  
 416    :    // Full constructor.
 417  E :    Label(const base::StringPiece& name, LabelAttributes attributes)
 418    :        : name_(name.begin(), name.end()), attributes_(attributes) {
 419  E :    }
 420    :  
 421    :    // Copy construction.
 422  E :    Label(const Label& other)
 423    :        : name_(other.name_), attributes_(other.attributes_) {
 424  E :    }
 425    :  
 426    :    // @name Accessors.
 427    :    // @{
 428  E :    const std::string& name() const { return name_; }
 429    :    // @}
 430    :  
 431    :    // A helper function for logging and debugging.
 432    :    std::string ToString() const;
 433    :  
 434    :    // Equality comparator for unittesting.
 435  E :    bool operator==(const Label& other) const {
 436  E :      return name_ == other.name_ && attributes_ == other.attributes_;
 437  E :    }
 438    :  
 439    :    // The label attributes are a bitmask. You can set them wholesale,
 440    :    // or set and clear them individually by bitmasking.
 441  E :    LabelAttributes attributes() const { return attributes_; }
 442  E :    void set_attributes(LabelAttributes attributes) { attributes_ = attributes; }
 443    :  
 444    :    // Set or clear one or more attributes.
 445  E :    void set_attribute(LabelAttributes attribute) { attributes_ |= attribute; }
 446  E :    void clear_attribute(LabelAttributes attribute) { attributes_ &= ~attribute; }
 447    :  
 448    :    // Determines if all or any of the given attributes are set.
 449  E :    bool has_attributes(LabelAttributes attributes) const {
 450  E :      return (attributes_ & attributes) == attributes;
 451  E :    }
 452  E :    bool has_any_attributes(LabelAttributes attributes) const {
 453  E :      return (attributes_ & attributes) != 0;
 454  E :    }
 455    :  
 456    :    // @returns true if this label is valid, false otherwise.
 457    :    bool IsValid() const;
 458    :  
 459    :    // Tests a set of label attributes for validity.
 460    :    // @param attributes the attributes to test.
 461    :    // @returns true if the provided attributes are valid, false otherwise.
 462    :    static bool AreValidAttributes(LabelAttributes attributes);
 463    :  
 464    :   private:
 465    :    // The name by which this label is known.
 466    :    std::string name_;
 467    :  
 468    :    // The disposition of the bytes found at this label.
 469    :    LabelAttributes attributes_;
 470    :  };
 471    :  
 472    :  // A block represents a block of either code or data.
 473    :  //
 474    :  // Since blocks may be split and up and glued together in arbitrary ways, each
 475    :  // block maintains an address-space over its data, associating ranges of block
 476    :  // data to ranges of bytes in the original image. This effectively encodes OMAP
 477    :  // data, allowing the PDB file to be updated.
 478    :  //
 479    :  // Each block also stores references to other blocks in the graph, their
 480    :  // relative location within the block and their type and size.
 481    :  //
 482    :  // Each block has a set of attributes, including a size, a name and a
 483    :  // "current" address. Most of those attributes are mutable, and are set in the
 484    :  // process of creating and manipulating images and graph address spaces.
 485    :  class BlockGraph::Block {
 486    :   public:
 487    :    // Set of the blocks that have a reference to this block.
 488    :    // This is keyed on block and source offset (not destination offset),
 489    :    // to allow one to easily locate and remove the backreferences on change or
 490    :    // deletion.
 491    :    typedef std::pair<Block*, Offset> Referrer;
 492    :    typedef std::set<Referrer> ReferrerSet;
 493    :  
 494    :    // Map of references that this block makes to other blocks.
 495    :    typedef std::map<Offset, Reference> ReferenceMap;
 496    :  
 497    :    // Represents a range of data in this block.
 498    :    typedef core::AddressRange<Offset, Size> DataRange;
 499    :  
 500    :    // Represents a range of data in the original image.
 501    :    typedef core::AddressRange<RelativeAddress, Size> SourceRange;
 502    :  
 503    :    // A map between bytes in this block and bytes in the original image.
 504    :    typedef core::AddressRangeMap<DataRange, SourceRange> SourceRanges;
 505    :  
 506    :    // Typed labels associated with various offsets in the block. Some of these
 507    :    // labels (of type CODE_LABEL) represent code start points for disassembly
 508    :    // while others (of type DATA_LABEL) represent the start of embedded data
 509    :    // within the block. Note that, while possible, it is NOT guaranteed that
 510    :    // all basic blocks are marked with a label. Basic block decomposition should
 511    :    // disassemble from the code labels to discover all basic blocks.
 512    :    typedef std::map<Offset, Label> LabelMap;
 513    :  
 514    :    // Blocks need to be default constructible for serialization.
 515    :    Block();
 516    :  
 517    :    Block(BlockId id,
 518    :          BlockType type,
 519    :          Size size,
 520    :          const base::StringPiece& name);
 521    :    ~Block();
 522    :  
 523    :    // Accessors.
 524  E :    BlockId id() const { return id_; }
 525  E :    BlockType type() const { return type_; }
 526  E :    void set_type(BlockType type) { type_ = type; }
 527    :  
 528  E :    Size size() const { return size_; }
 529    :  
 530    :    // Set the total size of the block. Note that allocated data_size_ must
 531    :    // always be less than or equal to the total size.
 532  E :    void set_size(Size size) {
 533  E :      DCHECK_LE(data_size_, size);
 534  E :      size_ = size;
 535  E :    }
 536    :  
 537  E :    const std::string& name() const { return name_; }
 538  E :    void set_name(const base::StringPiece& name) { name.CopyToString(&name_); }
 539    :  
 540  E :    Size alignment() const { return alignment_; }
 541  E :    void set_alignment(Size alignment) {
 542    :      // Ensure that alignment is a non-zero power of two.
 543  E :      DCHECK(common::IsPowerOfTwo(alignment));
 544  E :      alignment_ = alignment;
 545  E :    }
 546    :  
 547    :    // The address of the block is set any time the block is assigned
 548    :    // an address in an address space.
 549  E :    RelativeAddress addr() const { return addr_; }
 550  E :    void set_addr(RelativeAddress addr) { addr_ = addr; }
 551    :  
 552    :    // The section ID for the block. These IDs are wrt to the SectionMap in the
 553    :    // parent BlockGraph.
 554  E :    SectionId section() const { return section_; }
 555  E :    void set_section(SectionId section) { section_ = section; }
 556    :  
 557    :    // The block attributes are a bitmask. You can set them wholesale,
 558    :    // or set and clear them individually by bitmasking.
 559  E :    BlockAttributes attributes() const { return attributes_; }
 560  E :    void set_attributes(BlockAttributes attributes) { attributes_ = attributes; }
 561    :  
 562    :    // Set or clear one or more attributes.
 563  E :    void set_attribute(BlockAttributes attribute) { attributes_ |= attribute; }
 564  E :    void clear_attribute(BlockAttributes attribute) {
 565  E :      attributes_ &= ~attribute;
 566  E :    }
 567    :  
 568    :    // This is true iff data_ is in the ownership of the block.
 569    :    // Iff true, the block will delete [] data_ on destruction or when
 570    :    // data is overwritten.
 571  E :    bool owns_data() const { return owns_data_; }
 572    :  
 573    :    // Makes room for the given amount of data at the given offset. This is
 574    :    // special in that it will patch up any labels, source ranges and referrers
 575    :    // that land beyond the newly created data, shifting them to the right by
 576    :    // @p size. If the data for this block is actually allocated it will also
 577    :    // patch up the allocated data by zeroing the newly allocate range of data,
 578    :    // and shifting the tail by @p size. If the new data is strictly implicit
 579    :    // (offset > data_size), then the allocated data is not affected in any way
 580    :    // unless @p always_allocate_data is true.
 581    :    //
 582    :    // @param offset the offset at which to insert the new data.
 583    :    // @param size the size of the new data to be inserted.
 584    :    // @param always_allocate_data if true, then data_size will be grown if
 585    :    //     necessary to ensure that the newly created data can be written.
 586    :    // @pre 0 <= offset <= size()
 587    :    void InsertData(Offset offset, Size size, bool always_allocate_data);
 588    :  
 589    :    // Removes the data in the given range. This will refuse to remove labels,
 590    :    // references and referrers that land in the range, and will fail if any
 591    :    // exist. It will also shift any labels, references and referrers that land
 592    :    // beyond the end of the removed range. Source ranges will also be fixed. If
 593    :    // the removed range lies within the initialized data then the data will also
 594    :    // be truncated/shifted as necessary.
 595    :    //
 596    :    // @param offset the offset at which to remove data.
 597    :    // @param size the size of the data to remove, in bytes.
 598    :    // @returns true on success, false otherwise.
 599    :    // @pre 0 <= offset <= size
 600    :    bool RemoveData(Offset offset, Size size);
 601    :  
 602    :    // Performs an inline resize of data in a BlockGraph. If the data is shrinking
 603    :    // this equates to a RemoveData operation. If it is growing it equates to an
 604    :    // InsertData operation.
 605    :    //
 606    :    // @param offset the offset of the data to resize.
 607    :    // @param current_size the current size of the data to resize.
 608    :    // @param new_size the desired size of the data.
 609    :    // @param always_allocate_data if true, then data_size will be grown if
 610    :    //     necessary to ensure that the resized data can be written.
 611    :    // @returns true on success, false otherwise.
 612    :    // @pre 0 <= offset <= size
 613    :    bool InsertOrRemoveData(Offset offset, Size current_size, Size new_size,
 614    :                            bool always_allocate_data);
 615    :  
 616    :    // Set the data the block refers to.
 617    :    // @param data NULL or the data this block refers to.
 618    :    //     The underlying data must outlive this block.
 619    :    // @param data_size the size of data, or zero if data == NULL.
 620    :    // @pre data_size <= size().
 621    :    void SetData(const uint8* data, size_t data_size);
 622    :  
 623    :    // Allocates and returns a new data buffer of the given size. The returned
 624    :    // data will have been initialized to zero.
 625    :    // @pre data_size <= size().
 626    :    uint8* AllocateData(size_t data_size);
 627    :  
 628    :    // Makes a copy of data, returns a pointer to the copy.
 629    :    // @pre data_size <= size().
 630    :    uint8* CopyData(size_t data_size, const void* data);
 631    :  
 632    :    // Resizes data to new_size by truncating or zero-extending the current data.
 633    :    // @pre new_size <= size().
 634    :    const uint8* ResizeData(size_t new_size);
 635    :  
 636    :    // Returns a mutable copy of the block's data. If the block doesn't own
 637    :    // the data on entry, it'll be copied and the copy returned to the caller.
 638    :    uint8* GetMutableData();
 639    :  
 640    :    // The data bytes the block refers to.
 641  E :    const uint8* data() const { return data_; }
 642    :  
 643    :    // The data size may be smaller than the block size (see size()),
 644    :    // when the block e.g. refers to data that's all or part
 645    :    // zero-initialized by the linker/loader.
 646  E :    size_t data_size() const { return data_size_; }
 647    :  
 648  E :    const ReferenceMap& references() const { return references_; }
 649  E :    const ReferrerSet& referrers() const { return referrers_; }
 650  E :    const SourceRanges& source_ranges() const { return source_ranges_; }
 651  E :    SourceRanges& source_ranges() { return source_ranges_; }
 652  E :    const LabelMap& labels() const { return labels_; }
 653    :  
 654    :    // Returns true if there are any other blocks holding a reference to this one.
 655    :    bool HasExternalReferrers() const;
 656    :  
 657    :    // Set the reference at @p offset to @p ref.
 658    :    // If there's a pre-existing reference at @p offset, this overrides it.
 659    :    // @param offset offset of the reference into this block.
 660    :    // @param ref the reference to add.
 661    :    // @returns true iff this inserts a new reference.
 662    :    bool SetReference(Offset offset, const Reference& ref);
 663    :  
 664    :    // Retrieve the reference at @p offset if one exists.
 665    :    // @param reference on success returns the reference @p offset.
 666    :    // @returns true iff there was a reference at @p offset.
 667    :    bool GetReference(Offset offset, Reference* reference) const;
 668    :  
 669    :    // Remove the reference at @p offset.
 670    :    // @returns true iff there was a reference at @p offset.
 671    :    bool RemoveReference(Offset offset);
 672    :  
 673    :    // Remove all references from this block. This is handy when removing a block
 674    :    // from the block graph.
 675    :    bool RemoveAllReferences();
 676    :  
 677    :    // Set a label to @p offset.
 678    :    // A label in code marks the location of the start of an instruction -
 679    :    // e.g. a location where disassembly can usefully commence. Labels
 680    :    // appear to be inserted by the VS tool chain where e.g. a switch
 681    :    // statement is implemented with a jump table, to note the location
 682    :    // of the jump destinations.
 683    :    // @param offset the offset of the label to set.
 684    :    // @param name the name of the label.
 685    :    // @param attributes the attributes of the label.
 686    :    // @returns true iff a new label is inserted.
 687    :    // @note that only one label can exist at each offset, and the first
 688    :    //     label set at any offset will stay there.
 689    :    // @{
 690    :    bool SetLabel(Offset offset, const Label& label);
 691    :    bool SetLabel(Offset offset,
 692    :                  const base::StringPiece& name,
 693  E :                  LabelAttributes attributes) {
 694  E :      return SetLabel(offset, Label(name, attributes));
 695  E :    }
 696    :    // @}
 697    :  
 698    :    // Gets the label at the given @p offset.
 699    :    // @param offset the offset of the label to get.
 700    :    // @param label the string to receive the label.
 701    :    // @return true if the label exists, false otherwise.
 702    :    bool GetLabel(Offset offset, Label* label) const;
 703    :  
 704    :    // Removes the label at the given @p offset.
 705    :    // @param offset the offset of the label to remove.
 706    :    // @return true if the label existed and was removed, false it it did not
 707    :    //     exist.
 708    :    bool RemoveLabel(Offset offset);
 709    :  
 710    :    // Returns true iff the block has a label at @p offset.
 711    :    // @param offset the offset of the label to search for.
 712    :    bool HasLabel(Offset offset) const;
 713    :  
 714    :    // Change all references to this block to refer to @p new_block instead,
 715    :    // while offsetting each reference by @p offset.
 716    :    // @note this fails if any of the transferred references end up with offsets
 717    :    //     less than zero, or greater than new_block->size().
 718    :    // @returns true iff all references were transferred successfully.
 719    :    bool TransferReferrers(Offset offset, Block* new_block);
 720    :  
 721    :    // Returns true if this block contains the given range of bytes.
 722    :    bool Contains(RelativeAddress address, size_t size) const;
 723    :  
 724    :   protected:
 725    :    // Give BlockGraph access to our innards for serialization.
 726    :    friend class BlockGraph;
 727    :    // Give BlockGraphSerializer access to our innards for serialization.
 728    :    friend class BlockGraphSerializer;
 729    :  
 730    :    // Allocates and returns a new data buffer of the given size. The returned
 731    :    // data buffer will not have been initialized in any way.
 732    :    uint8* AllocateRawData(size_t size);
 733    :  
 734    :    BlockId id_;
 735    :    BlockType type_;
 736    :    Size size_;
 737    :    Size alignment_;
 738    :    std::string name_;
 739    :    RelativeAddress addr_;
 740    :  
 741    :    SectionId section_;
 742    :    BlockAttributes attributes_;
 743    :  
 744    :    ReferenceMap references_;
 745    :    ReferrerSet referrers_;
 746    :    SourceRanges source_ranges_;
 747    :    LabelMap labels_;
 748    :  
 749    :    // True iff data_ is ours to deallocate with delete [].
 750    :    // If this is false, data_ must be guaranteed to outlive the block.
 751    :    bool owns_data_;
 752    :    // A pointer to the code or data we represent.
 753    :    const uint8* data_;
 754    :    // Size of the above.
 755    :    size_t data_size_;
 756    :  };
 757    :  
 758    :  // A graph address space endows a graph with a non-overlapping ordering
 759    :  // on blocks, where each block occupies zero or one address ranges in the
 760    :  // address space. No two blocks may overlap in an address space.
 761    :  class BlockGraph::AddressSpace {
 762    :   public:
 763    :    typedef core::AddressSpace<RelativeAddress, BlockGraph::Size, Block*>
 764    :        AddressSpaceImpl;
 765    :    typedef AddressSpaceImpl::Range Range;
 766    :    typedef AddressSpaceImpl::RangeMap RangeMap;
 767    :    typedef AddressSpaceImpl::RangeMapIter RangeMapIter;
 768    :    typedef AddressSpaceImpl::RangeMapConstIter RangeMapConstIter;
 769    :    typedef AddressSpaceImpl::RangeMapIterPair RangeMapIterPair;
 770    :    typedef AddressSpaceImpl::RangeMapConstIterPair RangeMapConstIterPair;
 771    :  
 772    :    // Constructs a new empty address space.
 773    :    // @p start to @p start + @p size on @p graph.
 774    :    explicit AddressSpace(BlockGraph* graph);
 775    :  
 776    :    // Add a block of type @p type and @p size at @p address to our associated
 777    :    // graph, and return the new block.
 778    :    // @returns the new block, or NULL if the new block would overlap
 779    :    //     an existing block.
 780    :    Block* AddBlock(BlockType type,
 781    :                    RelativeAddress addr,
 782    :                    Size size,
 783    :                    const base::StringPiece& name);
 784    :  
 785    :    // Merges all blocks that intersect @p range into a single block.
 786    :    // Moves labels and references from the intersecting blocks to the
 787    :    // merged block, and changes referring blocks to refer to the new,
 788    :    // merged block. Removes the original blocks from the BlockGraph.
 789    :    // @returns the new, merged block if there was at least one intersecting
 790    :    //     block in @p range, or NULL otherwise.
 791    :    Block* MergeIntersectingBlocks(const Range& range);
 792    :  
 793    :    // Insert existing block @p block at @p address.
 794    :    // @returns true on success, or false if the @p block would overlap
 795    :    //     an existing block.
 796    :    bool InsertBlock(RelativeAddress addr, Block* block);
 797    :  
 798    :    // Returns a pointer to the block containing address, or NULL
 799    :    // if no block contains address.
 800    :    Block* GetBlockByAddress(RelativeAddress addr) const;
 801    :  
 802    :    // Returns a pointer to the block containing the address range
 803    :    // [address, address + size), or NULL if no block contains that
 804    :    // range.
 805    :    Block* GetContainingBlock(RelativeAddress addr, Size size) const;
 806    :  
 807    :    // Finds the first block, if any that intersects
 808    :    // [@p address, @p address + @p size).
 809    :    Block* GetFirstIntersectingBlock(RelativeAddress address, Size size);
 810    :  
 811    :    // Check whether the address space contains @p block.
 812    :    // @param block the block in question.
 813    :    // @returns true if the block is in the address space, false otherwise.
 814    :    bool ContainsBlock(const Block* block);
 815    :  
 816    :    // Locates all blocks that intersect [@p address, @p address + @p size).
 817    :    // @returns a pair of iterators that iterate over the found blocks.
 818    :    RangeMapConstIterPair GetIntersectingBlocks(RelativeAddress address,
 819    :                                                Size size) const;
 820    :    RangeMapIterPair GetIntersectingBlocks(RelativeAddress address, Size size);
 821    :  
 822    :    // Retrieve the address off @p block.
 823    :    // @param block the block in question.
 824    :    // @param addr on success, returns the address of @p block in this
 825    :    //     address space.
 826    :    // @returns true on success, false if @p block is not in this
 827    :    //     address space.
 828    :    bool GetAddressOf(const Block* block, RelativeAddress* addr) const;
 829    :  
 830    :    // Accessor.
 831  E :    BlockGraph* graph() { return graph_; }
 832  E :    const BlockGraph* graph() const { return graph_; }
 833    :  
 834  E :    RangeMapConstIter begin() const {
 835  E :      return address_space_.ranges().begin();
 836  E :    }
 837    :  
 838  E :    RangeMapConstIter end() const {
 839  E :      return address_space_.ranges().end();
 840  E :    }
 841    :  
 842  E :    size_t size() const {
 843  E :      return address_space_.ranges().size();
 844  E :    }
 845    :  
 846  E :    const AddressSpaceImpl& address_space_impl() const {
 847  E :      return address_space_;
 848  E :    }
 849    :  
 850    :   private:
 851    :    bool InsertImpl(RelativeAddress addr, Block* block);
 852    :  
 853    :    typedef stdext::hash_map<const Block*, RelativeAddress> BlockAddressMap;
 854    :  
 855    :    AddressSpaceImpl address_space_;
 856    :    BlockAddressMap block_addresses_;
 857    :    BlockGraph* graph_;
 858    :  };
 859    :  
 860    :  // Represents a reference from one block to another. References may be offset.
 861    :  // That is, they may refer to an object at a given location, but actually point
 862    :  // to a location that is some fixed distance away from that object. This allows,
 863    :  // for example, non-zero based indexing into a table. The object that is
 864    :  // intended to be dereferenced is called the 'base' of the offset.
 865    :  //
 866    :  // BlockGraph references are from a location (offset) in one block, to some
 867    :  // location in another block. The referenced block itself plays the role of the
 868    :  // 'base' of the reference, with the offset of the reference being stored as
 869    :  // an integer from the beginning of the block. However, basic block
 870    :  // decomposition requires breaking the block into smaller pieces and thus we
 871    :  // need to carry around an explicit base value, indicating which byte in the
 872    :  // block is intended to be referenced.
 873    :  //
 874    :  // A direct reference to a location will have the same value for 'base' and
 875    :  // 'offset'.
 876    :  //
 877    :  // Here is an example:
 878    :  //
 879    :  //        /----------\
 880    :  //        +---------------------------+
 881    :  //  O     |          B                | <--- Referenced block
 882    :  //        +---------------------------+      B = base
 883    :  //  \-----/                                  O = offset
 884    :  //
 885    :  class BlockGraph::Reference {
 886    :   public:
 887    :    Reference() :
 888  E :        type_(RELATIVE_REF), size_(0), referenced_(NULL), offset_(0), base_(0) {
 889  E :    }
 890    :  
 891    :    // @param type type of reference.
 892    :    // @param size size of reference.
 893    :    // @param referenced the referenced block.
 894    :    // @param offset offset from the beginning of the block of the location to be
 895    :    //     explicitly referred to.
 896    :    // @param base offset into the block of the location actually being
 897    :    //     referenced. This must be strictly within @p referenced.
 898    :    Reference(ReferenceType type,
 899    :              Size size,
 900    :              Block* referenced,
 901    :              Offset offset,
 902    :              Offset base)
 903    :        : type_(type),
 904    :          size_(size),
 905    :          referenced_(referenced),
 906    :          offset_(offset),
 907  E :          base_(base) {
 908  E :      DCHECK(IsValid());
 909  E :    }
 910    :  
 911    :    // Copy constructor.
 912    :    Reference(const Reference& other)
 913    :        : type_(other.type_),
 914    :          size_(other.size_),
 915    :          referenced_(other.referenced_),
 916    :          offset_(other.offset_),
 917  E :          base_(other.base_) {
 918  E :    }
 919    :  
 920    :    // Accessors.
 921  E :    ReferenceType type() const { return type_; }
 922  E :    Size size() const { return size_; }
 923  E :    Block* referenced() const { return referenced_; }
 924  E :    Offset offset() const { return offset_; }
 925  E :    Offset base() const { return base_; }
 926    :  
 927    :    // Determines if this is a direct reference. That is, if the actual location
 928    :    // being referenced (offset) and the intended location being referenced (base)
 929    :    // are the same.
 930    :    //
 931    :    // @returns true if the reference is direct, false otherwise.
 932  E :    bool IsDirect() const { return base_ == offset_; }
 933    :  
 934    :    // Determines if this is a valid reference, by imposing size constraints on
 935    :    // reference types, and determining if the base address of the reference is
 936    :    // strictly contained within the referenced block.
 937    :    //
 938    :    // @returns true if valid, false otherwise.
 939    :    bool IsValid() const;
 940    :  
 941  E :    bool operator==(const Reference& other) const {
 942    :      return type_ == other.type_ &&
 943    :          size_ == other.size_ &&
 944    :          referenced_ == other.referenced_ &&
 945    :          offset_ == other.offset_ &&
 946  E :          base_ == other.base_;
 947  E :    }
 948    :  
 949    :    // The maximum size that a reference may have. This needs to be kept in sync
 950    :    // with the expectations of IsValid().
 951    :    static const size_t kMaximumSize = 4;
 952    :  
 953    :    // Returns true if the given reference type and size combination is valid.
 954    :    static bool IsValidTypeSize(ReferenceType type, Size size);
 955    :  
 956    :   private:
 957    :    // Type of this reference.
 958    :    ReferenceType type_;
 959    :  
 960    :    // Size of this reference.
 961    :    // Absolute references are always pointer wide, but PC-relative
 962    :    // references can be 1, 2 or 4 bytes wide, which affects their range.
 963    :    Size size_;
 964    :  
 965    :    // The block referenced.
 966    :    Block* referenced_;
 967    :  
 968    :    // Offset into the referenced block.
 969    :    Offset offset_;
 970    :  
 971    :    // The base of the reference, as in offset in the block. This must be a
 972    :    // location strictly within the block.
 973    :    Offset base_;
 974    :  };
 975    :  
 976    :  // Commonly used container types.
 977    :  typedef std::vector<BlockGraph::Block*> BlockVector;
 978    :  typedef std::vector<const BlockGraph::Block*> ConstBlockVector;
 979    :  
 980    :  }  // namespace block_graph
 981    :  
 982    :  #endif  // SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_

Coverage information generated Tue Jun 25 13:56:24 2013.