Coverage for /Syzygy/block_graph/block_graph.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
100.0%1141140.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // A block graph is a an abstract graph of blocks, each of which has an ID,
  16    :  // a type, a size and a few other properties. Each block represents either code
  17    :  // or data, and blocks can reference one another through references of various
  18    :  // types.
  19    :  //
  20    :  // The BlockGraph also stores minimum knowledge of sections (names and
  21    :  // characteristics), and each block belongs to at most one section. In this
  22    :  // sense, a BlockGraph acts as top-level division of blocks.
  23    :  
  24    :  #ifndef SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
  25    :  #define SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
  26    :  
  27    :  #include <hash_map>
  28    :  #include <map>
  29    :  #include <set>
  30    :  #include <string>
  31    :  #include <vector>
  32    :  
  33    :  #include "base/basictypes.h"
  34    :  #include "base/string_piece.h"
  35    :  #include "syzygy/common/align.h"
  36    :  #include "syzygy/core/address.h"
  37    :  #include "syzygy/core/address_space.h"
  38    :  
  39    :  namespace block_graph {
  40    :  
  41    :  // Forward declaration.
  42    :  class BlockGraphSerializer;
  43    :  
  44    :  // The invalid address can never occur in an graph, it's used as default
  45    :  // value for block addresses.
  46    :  extern const core::RelativeAddress kInvalidAddress;
  47    :  
  48    :  #define BLOCK_ATTRIBUTE_ENUM(F) \
  49    :      /* Set for functions declared non-returning. */ \
  50    :      F(NON_RETURN_FUNCTION, 0) \
  51    :      /* Set for blocks that are inferred by the decomposer. */ \
  52    :      F(GAP_BLOCK, 1) \
  53    :      /* Set for blocks that are parsed by the PEFileParser. These */ \
  54    :      /* blocks are unmovable, indivisible, etc, and have to be treated */ \
  55    :      /* specially. */ \
  56    :      F(PE_PARSED, 2) \
  57    :      /* Set for blocks that are created from section contribution */ \
  58    :      /* information. */ \
  59    :      F(SECTION_CONTRIB, 3) \
  60    :      /* This is used to indicate that a block consists purely of padding */ \
  61    :      /* data. */ \
  62    :      F(PADDING_BLOCK, 4) \
  63    :      /* Indicates blocks that contain inline assembly. */ \
  64    :      F(HAS_INLINE_ASSEMBLY, 5) \
  65    :      /* Indicates that the block was built by a compiler whose precise */ \
  66    :      /* behaviour and semantics we are unfamiliar with. */ \
  67    :      F(BUILT_BY_UNSUPPORTED_COMPILER, 6) \
  68    :      /* Indicates that the block has been built by the Syzygy toolchain, and */ \
  69    :      /* thus is inherently safe for basic-block decomposition without having */ \
  70    :      /* to perform the myriad of safety checks we do otherwise. */ \
  71    :      F(BUILT_BY_SYZYGY, 7) \
  72    :      /* This is set for blocks whose initial disassembly was incomplete. */ \
  73    :      /* This is not necessarily an error, as we see have seen blocks with */ \
  74    :      /* unreachable code, even in release mode. */ \
  75    :      F(INCOMPLETE_DISASSEMBLY, 8) \
  76    :      /* This is set for blocks whose disassembly was unable to finish due to */ \
  77    :      /* an error. This block has violated assumptions that we make or */ \
  78    :      /* conventions that we have observed the compiler to use. It is not safe */\
  79    :      /* for basic block disassembly. */ \
  80    :      F(ERRORED_DISASSEMBLY, 9) \
  81    :      /* This is set for functions that have exception handling enabled. */ \
  82    :      /* Without delving far deeper into the specifics, it is unsafe to basic */ \
  83    :      /* block decompose these blocks. */ \
  84    :      F(HAS_EXCEPTION_HANDLING, 10) \
  85    :      /* This is set for blocks whose disassembly went off the end of the */ \
  86    :      /* block, or into data. These blocks have control flow that we are not */ \
  87    :      /* aware of, or are otherwise malformed. */ \
  88    :      F(DISASSEMBLED_PAST_END, 11) \
  89    :      /* This always needs to be set to the next available attribute bit. */ \
  90    :      F(BLOCK_ATTRIBUTES_MAX, 12)
  91    :  
  92    :  
  93    :  // The BlockGraph is a top-level container for Blocks.
  94    :  class BlockGraph {
  95    :   public:
  96    :    typedef core::RelativeAddress RelativeAddress;
  97    :  
  98    :    typedef size_t SectionId;
  99    :    typedef size_t BlockId;
 100    :    typedef size_t Size;
 101    :    typedef ptrdiff_t Offset;
 102    :    typedef uint32 BlockAttributes;
 103    :    typedef uint32 LabelAttributes;
 104    :  
 105    :    // The BlockGraph maintains a list of sections, and each block belongs
 106    :    // to one of them. This is the set of information we keep regarding them.
 107    :    struct Section;
 108    :    // The section map contains all sections, indexed by id.
 109    :    typedef std::map<SectionId, Section> SectionMap;
 110    :  
 111    :    static const SectionId kInvalidSectionId;
 112    :  
 113    :    enum BlockAttributeEnum {
 114    :  #define DECLARE_ENUM(name, bit) name = (1 << bit),
 115    :      BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM)
 116    :  #undef DECLARE_ENUM
 117    :    };
 118    :  
 119    :    // Returns a string containing the names of the supplied attributes.
 120    :    static std::string BlockAttributesToString(BlockAttributes attrs);
 121    :  
 122    :    enum BlockType {
 123    :      CODE_BLOCK,
 124    :      DATA_BLOCK,
 125    :  
 126    :      // NOTE: This must always be last, and kBlockType must be kept in sync
 127    :      // with this enum.
 128    :      BLOCK_TYPE_MAX
 129    :    };
 130    :  
 131    :    static const char* BlockTypeToString(BlockType type);
 132    :  
 133    :    // Label attributes. Attributes of the form _END_LABEL type actually
 134    :    // point to the first byte past the range they delineate. To make the
 135    :    // semantics of moving labels easier, we shift these labels left by one and
 136    :    // make them follow the last byte of the delineated range.
 137    :    enum LabelAttributesEnum {
 138    :      // The label points to an entry-point in a code block.
 139    :      CODE_LABEL = (1 << 0),
 140    :  
 141    :      // Mark the start and end of the debuggable portion of a code block.
 142    :      DEBUG_START_LABEL = (1 << 1),
 143    :      DEBUG_END_LABEL = (1 << 2),
 144    :  
 145    :      // Mark the start and end of an embedded scope in a code block.
 146    :      SCOPE_START_LABEL = (1 << 3),
 147    :      SCOPE_END_LABEL = (1 << 4),
 148    :  
 149    :      // Marks the location of a (virtual table?) call.
 150    :      CALL_SITE_LABEL = (1 << 5),
 151    :  
 152    :      // The label points to the start of a jump table. The length is inferred
 153    :      // by the location of the next label, or the end of the block. This will
 154    :      // also have DATA_LABEL set.
 155    :      JUMP_TABLE_LABEL = (1 << 6),
 156    :      // The label points to the start of a case table. The length is inferred
 157    :      // by the location of the next label, or the end of the block. This will
 158    :      // also have DATA_LABEL set.
 159    :      CASE_TABLE_LABEL = (1 << 7),
 160    :      // The label originated from a data symbol. The length is inferred by the
 161    :      // location of the next label, or the end of the block. The type of data
 162    :      // is unknown.
 163    :      DATA_LABEL = (1 << 8),
 164    :  
 165    :      // This always needs to be the most significant bit.
 166    :      LABEL_ATTRIBUTES_MAX = (1 << 9),
 167    :    };
 168    :  
 169    :    static std::string LabelAttributesToString(LabelAttributes label_attributes);
 170    :  
 171    :    enum ReferenceType {
 172    :      PC_RELATIVE_REF,
 173    :      ABSOLUTE_REF,
 174    :      RELATIVE_REF,
 175    :      FILE_OFFSET_REF,
 176    :      // Must be last!
 177    :      REFERENCE_TYPE_MAX,
 178    :    };
 179    :  
 180    :    // Forward declarations.
 181    :    class AddressSpace;
 182    :    class Block;
 183    :    class Label;
 184    :    class Reference;
 185    :  
 186    :    // The block map contains all blocks, indexed by id.
 187    :    typedef std::map<BlockId, Block> BlockMap;
 188    :  
 189    :    BlockGraph();
 190    :    ~BlockGraph();
 191    :  
 192    :    // Adds a section with the given name.
 193    :    //
 194    :    // @param name The section name.
 195    :    // @param characteristics The section characteristics.
 196    :    // @returns the newly created section.
 197    :    Section* AddSection(const base::StringPiece& name, uint32 characteristics);
 198    :  
 199    :    // Finds a section with the given name, returning NULL if no such section
 200    :    // exists.
 201    :    //
 202    :    // @param name The section name.
 203    :    // @returns the section if one is found, NULL otherwise.
 204    :    Section* FindSection(const base::StringPiece& name);
 205    :  
 206    :    // Find or adds section with the given name.
 207    :    //
 208    :    // If a section with the given name already exists, updates its
 209    :    // characteristics and returns it. Otherwise, creates a new section and
 210    :    // returns it. If multiple sections exist with the given name, the first
 211    :    // one encountered is returned.
 212    :    //
 213    :    // TODO(chrisha): The semantics of this function are a little odd. It would
 214    :    //     make more sense for it to return only if a section with matching name
 215    :    //     AND characteristics is found, otherwise to create a new one.
 216    :    //
 217    :    // @param name The section name.
 218    :    // @param characteristics The section characteristics.
 219    :    // @returns the new or found section.
 220    :    Section* FindOrAddSection(const base::StringPiece& name,
 221    :                              uint32 characteristics);
 222    :  
 223    :    // Removes the given section from the BlockGraph.
 224    :    //
 225    :    // The section must belong to this block graph. Be aware that this can leave
 226    :    // Blocks with dangling section_ids.
 227    :    //
 228    :    // @param section The section to remove.
 229    :    // @returns true on success, false otherwise.
 230    :    bool RemoveSection(Section* section);
 231    :  
 232    :    // Removes the section with the given id from the BlockGraph.
 233    :    //
 234    :    // @param section The id of the section to remove.
 235    :    // @returns true on success, false otherwise.
 236    :    bool RemoveSectionById(SectionId id);
 237    :  
 238    :    // Add @p block of type @p type and @p size and
 239    :    // return the new block.
 240    :    // @returns the new block.
 241    :    Block* AddBlock(BlockType type, Size size, const base::StringPiece& name);
 242    :  
 243    :    // Deletes the given block from the BlockGraph. The block must belong to this
 244    :    // block graph, and have no references or referrers. Returns true on success,
 245    :    // false otherwise. On failure, the BlockGraph has not been changed.
 246    :    bool RemoveBlock(Block* block);
 247    :  
 248    :    // Deletes the block with the given @p id from the block graph. The block id
 249    :    // must be valid, and the block must have no references or referrers. Returns
 250    :    // true on success, false otherwise. On failure, the BlockGraph has not been
 251    :    // changed.
 252    :    bool RemoveBlockById(BlockId id);
 253    :  
 254    :    // Accessors.
 255  E :    const SectionMap& sections() const { return sections_; }
 256  E :    SectionMap& sections_mutable() { return sections_; }
 257  E :    const BlockMap& blocks() const { return blocks_; }
 258  E :    BlockMap& blocks_mutable() { return blocks_; }
 259    :  
 260    :    // @{
 261    :    // Retrieve the section with the given id.
 262    :    //
 263    :    // @param id The id of the section to retrieve.
 264    :    // @returns the section in question or NULL if no such section.
 265    :    Section* GetSectionById(SectionId id);
 266    :    const Section* GetSectionById(SectionId id) const;
 267    :    // @}
 268    :  
 269    :    // @{
 270    :    // Retrieve the block with id.
 271    :    // @returns the block in question or NULL if no such block.
 272    :    Block* GetBlockById(BlockId id);
 273    :    const Block* GetBlockById(BlockId id) const;
 274    :    // @}
 275    :  
 276    :   private:
 277    :    // Give BlockGraphSerializer access to our innards for serialization.
 278    :    friend BlockGraphSerializer;
 279    :  
 280    :    // Removes a block by the iterator to it. The iterator must be valid.
 281    :    bool RemoveBlockByIterator(BlockMap::iterator it);
 282    :  
 283    :    // All sections we contain.
 284    :    SectionMap sections_;
 285    :  
 286    :    // Our section ID allocator.
 287    :    SectionId next_section_id_;
 288    :  
 289    :    // All blocks we contain.
 290    :    BlockMap blocks_;
 291    :  
 292    :    // Our block ID allocator.
 293    :    BlockId next_block_id_;
 294    :  };
 295    :  
 296    :  // The BlockGraph maintains a list of sections, and each block belongs
 297    :  // to one of them. This is the set of information we keep regarding them.
 298    :  struct BlockGraph::Section {
 299    :    // Default constructor. Required for serialization.
 300  E :    Section() : id_(kInvalidSectionId), characteristics_(0) {
 301  E :    }
 302    :  
 303    :    // Full constructor.
 304    :    //
 305    :    // @param id The section id. This must not be kInvalidSectionId.
 306    :    // @param name The name of the section. Must not be empty or NULL.
 307    :    // @param characteristics The characteristics of the section.
 308  E :    Section(SectionId id, const base::StringPiece& name, uint32 characteristics)
 309    :        : id_(id), name_(), characteristics_(characteristics) {
 310  E :      DCHECK_NE(kInvalidSectionId, id);
 311  E :      DCHECK(name != NULL);
 312  E :      name.CopyToString(&name_);
 313  E :      DCHECK(!name_.empty());
 314  E :    }
 315    :  
 316    :    // Get the id of this section.
 317    :    //
 318    :    // @returns the id of the section.
 319  E :    SectionId id() const { return id_; }
 320    :  
 321    :    // Get the name of this section.
 322    :    //
 323    :    // @returns the section name.
 324  E :    const std::string& name() const { return name_; }
 325    :  
 326    :    // Sets the name for this section.
 327    :    //
 328    :    // @param name The name of the section. If NULL or empty, this will fail.
 329    :    // @returns true if the name is set, false otherwise.
 330    :    bool set_name(const base::StringPiece& name);
 331    :  
 332    :    // Get the characteristics of this section.
 333    :    //
 334    :    // @returns the section characteristics.
 335  E :    uint32 characteristics() const { return characteristics_; }
 336    :  
 337    :    // Sets the characteristics for this section.
 338    :    //
 339    :    // @param characteristics The new characteristics to set.
 340  E :    void set_characteristics(uint32 characteristics) {
 341  E :      characteristics_ = characteristics;
 342  E :    }
 343    :  
 344    :    // Sets a one or more additional characteristics for this section.
 345    :    //
 346    :    // @param characteristic The new characteristic(s) to set for this section.
 347  E :    void set_characteristic(uint32 characteristic) {
 348  E :      characteristics_ |= characteristic;
 349  E :    }
 350    :  
 351    :    // Clears one or more characteristics for this section.
 352    :    //
 353    :    // @param characteristic The characteristic(s) to clear for this section.
 354  E :    void clear_characteristic(uint32 characteristic) {
 355  E :      characteristics_ &= ~characteristic;
 356  E :    }
 357    :  
 358    :    // @name Serialization functions.
 359    :    // @{
 360    :    bool Save(core::OutArchive* out_archive) const;
 361    :    bool Load(core::InArchive* in_archive);
 362    :    // @}
 363    :  
 364    :    // A simple comparison operator for serialization tests.
 365  E :    bool operator==(const Section& other) const {
 366    :      return id_ == other.id_ && name_ == other.name_ &&
 367  E :          characteristics_ == other.characteristics_;
 368  E :    }
 369    :  
 370    :    // A not-equal comparison operator.
 371  E :    bool operator!=(const Section& other) const {
 372  E :      return !operator==(other);
 373  E :    }
 374    :  
 375    :   private:
 376    :    // The id of the section. This has no particular meaning other than as a way
 377    :    // to identify sections uniquely.
 378    :    SectionId id_;
 379    :    // The name of the section. This will be truncated to a max of 8 characters
 380    :    // on output.
 381    :    std::string name_;
 382    :    // The section characteristics, a bitmask of IMAGE_SCN_* values.
 383    :    uint32 characteristics_;
 384    :  };
 385    :  
 386    :  // A label denotes the beginning (or end) of a sub-region within a (code)
 387    :  // block. In particular, a code label represents an instruction boundary
 388    :  // at which disassembly can begin and a data label represents the beginning
 389    :  // of embedded data.
 390    :  class BlockGraph::Label {
 391    :   public:
 392    :    // Default constructor.
 393  E :    Label() : attributes_(0) {
 394  E :    }
 395    :  
 396    :    // Full constructor.
 397  E :    Label(const base::StringPiece& name, LabelAttributes attributes)
 398    :        : name_(name.begin(), name.end()), attributes_(attributes) {
 399  E :    }
 400    :  
 401    :    // @name Accessors.
 402    :    // @{
 403  E :    const std::string& name() const { return name_; }
 404    :    // @}
 405    :  
 406    :    // A helper function for logging and debugging.
 407    :    std::string ToString() const;
 408    :  
 409    :    // Equality comparator for unittesting.
 410  E :    bool operator==(const Label& other) const {
 411  E :      return name_ == other.name_ && attributes_ == other.attributes_;
 412  E :    }
 413    :  
 414    :    // The label attributes are a bitmask. You can set them wholesale,
 415    :    // or set and clear them individually by bitmasking.
 416  E :    LabelAttributes attributes() const { return attributes_; }
 417  E :    void set_attributes(LabelAttributes attributes) { attributes_ = attributes; }
 418    :  
 419    :    // Set or clear one or more attributes.
 420  E :    void set_attribute(LabelAttributes attribute) { attributes_ |= attribute; }
 421  E :    void clear_attribute(LabelAttributes attribute) { attributes_ &= ~attribute; }
 422    :  
 423    :    // Determines if all or any of the given attributes are set.
 424  E :    bool has_attributes(LabelAttributes attributes) const {
 425  E :      return (attributes_ & attributes) == attributes;
 426  E :    }
 427  E :    bool has_any_attributes(LabelAttributes attributes) const {
 428  E :      return (attributes_ & attributes) != 0;
 429  E :    }
 430    :  
 431    :    // @returns true if this label is valid, false otherwise.
 432    :    bool IsValid() const;
 433    :  
 434    :    // Tests a set of label attributes for validity.
 435    :    // @param attributes the attributes to test.
 436    :    // @returns true if the provided attributes are valid, false otherwise.
 437    :    static bool AreValidAttributes(LabelAttributes attributes);
 438    :  
 439    :   private:
 440    :    // The name by which this label is known.
 441    :    std::string name_;
 442    :  
 443    :    // The disposition of the bytes found at this label.
 444    :    LabelAttributes attributes_;
 445    :  };
 446    :  
 447    :  
 448    :  // A block represents a block of either code or data.
 449    :  //
 450    :  // Since blocks may be split and up and glued together in arbitrary ways, each
 451    :  // block maintains an address-space over its data, associating ranges of block
 452    :  // data to ranges of bytes in the original image. This effectively encodes OMAP
 453    :  // data, allowing the PDB file to be updated.
 454    :  //
 455    :  // Each block also stores references to other blocks in the graph, their
 456    :  // relative location within the block and their type and size.
 457    :  //
 458    :  // Each block has a set of attributes, including a size, a name and a
 459    :  // "current" address. Most of those attributes are mutable, and are set in the
 460    :  // process of creating and manipulating images and graph address spaces.
 461    :  class BlockGraph::Block {
 462    :   public:
 463    :    // Set of the blocks that have a reference to this block.
 464    :    // This is keyed on block and source offset (not destination offset),
 465    :    // to allow one to easily locate and remove the backreferences on change or
 466    :    // deletion.
 467    :    typedef std::pair<Block*, Offset> Referrer;
 468    :    typedef std::set<Referrer> ReferrerSet;
 469    :  
 470    :    // Map of references that this block makes to other blocks.
 471    :    typedef std::map<Offset, Reference> ReferenceMap;
 472    :  
 473    :    // Represents a range of data in this block.
 474    :    typedef core::AddressRange<Offset, Size> DataRange;
 475    :  
 476    :    // Represents a range of data in the original image.
 477    :    typedef core::AddressRange<RelativeAddress, Size> SourceRange;
 478    :  
 479    :    // A map between bytes in this block and bytes in the original image.
 480    :    typedef core::AddressRangeMap<DataRange, SourceRange> SourceRanges;
 481    :  
 482    :    // Typed labels associated with various offsets in the block. Some of these
 483    :    // labels (of type CODE_LABEL) represent code start points for disassembly
 484    :    // while others (of type DATA_LABEL) represent the start of embedded data
 485    :    // within the block. Note that, while possible, it is NOT guaranteed that
 486    :    // all basic blocks are marked with a label. Basic block decomposition should
 487    :    // disassemble from the code labels to discover all basic blocks.
 488    :    typedef std::map<Offset, Label> LabelMap;
 489    :  
 490    :    // Blocks need to be default constructible for serialization.
 491    :    Block();
 492    :  
 493    :    Block(BlockId id,
 494    :          BlockType type,
 495    :          Size size,
 496    :          const base::StringPiece& name);
 497    :    ~Block();
 498    :  
 499    :    // Accessors.
 500  E :    BlockId id() const { return id_; }
 501  E :    BlockType type() const { return type_; }
 502  E :    void set_type(BlockType type) { type_ = type; }
 503    :  
 504  E :    Size size() const { return size_; }
 505    :  
 506    :    // Set the total size of the block. Note that allocated data_size_ must
 507    :    // always be less than or equal to the total size.
 508  E :    void set_size(Size size) {
 509  E :      DCHECK_LE(data_size_, size);
 510  E :      size_ = size;
 511  E :    }
 512    :  
 513  E :    const std::string& name() const { return name_; }
 514  E :    void set_name(const base::StringPiece& name) { name.CopyToString(&name_); }
 515    :  
 516  E :    Size alignment() const { return alignment_; }
 517  E :    void set_alignment(Size alignment) {
 518    :      // Ensure that alignment is a non-zero power of two.
 519  E :      DCHECK(common::IsPowerOfTwo(alignment));
 520  E :      alignment_ = alignment;
 521  E :    }
 522    :  
 523    :    // The address of the block is set any time the block is assigned
 524    :    // an address in an address space.
 525  E :    RelativeAddress addr() const { return addr_; }
 526  E :    void set_addr(RelativeAddress addr) { return addr_ = addr; }
 527    :  
 528    :    // The section ID for the block. These IDs are wrt to the SectionMap in the
 529    :    // parent BlockGraph.
 530  E :    SectionId section() const { return section_; }
 531  E :    void set_section(SectionId section) { section_ = section; }
 532    :  
 533    :    // The block attributes are a bitmask. You can set them wholesale,
 534    :    // or set and clear them individually by bitmasking.
 535  E :    BlockAttributes attributes() const { return attributes_; }
 536  E :    void set_attributes(BlockAttributes attributes) { attributes_ = attributes; }
 537    :  
 538    :    // Set or clear one or more attributes.
 539  E :    void set_attribute(BlockAttributes attribute) { attributes_ |= attribute; }
 540  E :    void clear_attribute(BlockAttributes attribute) {
 541  E :      attributes_ &= ~attribute;
 542  E :    }
 543    :  
 544    :    // This is true iff data_ is in the ownership of the block.
 545    :    // Iff true, the block will delete [] data_ on destruction or when
 546    :    // data is overwritten.
 547  E :    bool owns_data() const { return owns_data_; }
 548    :  
 549    :    // Makes room for the given amount of data at the given offset. This is
 550    :    // special in that it will patch up any labels, source ranges and referrers
 551    :    // that land beyond the newly created data, shifting them to the right by
 552    :    // @p size. If the data for this block is actually allocated it will also
 553    :    // patch up the allocated data by zeroing the newly allocate range of data,
 554    :    // and shifting the tail by @p size. If the new data is strictly implicit
 555    :    // (offset > data_size), then the allocated data is not affected in any way
 556    :    // unless @p always_allocate_data is true.
 557    :    //
 558    :    // @param offset the offset at which to insert the new data.
 559    :    // @param size the size of the new data to be inserted.
 560    :    // @param always_allocate_data if true, then data_size will be grown if
 561    :    //     necessary to ensure that the newly created data can be written.
 562    :    // @pre 0 <= offset <= size()
 563    :    void InsertData(Offset offset, Size size, bool always_allocate_data);
 564    :  
 565    :    // Removes the data in the given range. This will refuse to remove labels,
 566    :    // references and referrers that land in the range, and will fail if any
 567    :    // exist. It will also shift any labels, references and referrers that land
 568    :    // beyond the end of the removed range. Source ranges will also be fixed. If
 569    :    // the removed range lies within the initialized data then the data will also
 570    :    // be truncated/shifted as necessary.
 571    :    //
 572    :    // @param offset the offset at which to remove data.
 573    :    // @param size the size of the data to remove, in bytes.
 574    :    // @returns true on success, false otherwise.
 575    :    // @pre 0 <= offset <= size
 576    :    bool RemoveData(Offset offset, Size size);
 577    :  
 578    :    // Performs an inline resize of data in a BlockGraph. If the data is shrinking
 579    :    // this equates to a RemoveData operation. If it is growing it equates to an
 580    :    // InsertData operation.
 581    :    //
 582    :    // @param offset the offset of the data to resize.
 583    :    // @param current_size the current size of the data to resize.
 584    :    // @param new_size the desired size of the data.
 585    :    // @param always_allocate_data if true, then data_size will be grown if
 586    :    //     necessary to ensure that the resized data can be written.
 587    :    // @returns true on success, false otherwise.
 588    :    // @pre 0 <= offset <= size
 589    :    bool InsertOrRemoveData(Offset offset, Size current_size, Size new_size,
 590    :                            bool always_allocate_data);
 591    :  
 592    :    // Set the data the block refers to.
 593    :    // @param data NULL or the data this block refers to.
 594    :    //     The underlying data must outlive this block.
 595    :    // @param data_size the size of data, or zero if data == NULL.
 596    :    // @pre data_size <= size().
 597    :    void SetData(const uint8* data, size_t data_size);
 598    :  
 599    :    // Allocates and returns a new data buffer of the given size. The returned
 600    :    // data will have been initialized to zero.
 601    :    // @pre data_size <= size().
 602    :    uint8* AllocateData(size_t data_size);
 603    :  
 604    :    // Makes a copy of data, returns a pointer to the copy.
 605    :    // @pre data_size <= size().
 606    :    uint8* CopyData(size_t data_size, const void* data);
 607    :  
 608    :    // Resizes data to new_size by truncating or zero-extending the current data.
 609    :    // @pre new_size <= size().
 610    :    const uint8* ResizeData(size_t new_size);
 611    :  
 612    :    // Returns a mutable copy of the block's data. If the block doesn't own
 613    :    // the data on entry, it'll be copied and the copy returned to the caller.
 614    :    uint8* GetMutableData();
 615    :  
 616    :    // The data bytes the block refers to.
 617  E :    const uint8* data() const { return data_; }
 618    :  
 619    :    // The data size may be smaller than the block size (see size()),
 620    :    // when the block e.g. refers to data that's all or part
 621    :    // zero-initialized by the linker/loader.
 622  E :    size_t data_size() const { return data_size_; }
 623    :  
 624  E :    const ReferenceMap& references() const { return references_; }
 625  E :    const ReferrerSet& referrers() const { return referrers_; }
 626  E :    const SourceRanges& source_ranges() const { return source_ranges_; }
 627  E :    SourceRanges& source_ranges() { return source_ranges_; }
 628  E :    const LabelMap& labels() const { return labels_; }
 629    :  
 630    :    // Returns true if there are any other bocks holding a reference to this one.
 631    :    bool HasExternalReferrers() const;
 632    :  
 633    :    // Set the reference at @p offset to @p ref.
 634    :    // If there's a pre-existing reference at @p offset, this overrides it.
 635    :    // @param offset offset of the reference into this block.
 636    :    // @param ref the reference to add.
 637    :    // @returns true iff this inserts a new reference.
 638    :    bool SetReference(Offset offset, const Reference& ref);
 639    :  
 640    :    // Retrieve the reference at @p offset if one exists.
 641    :    // @param reference on success returns the reference @p offset.
 642    :    // @returns true iff there was a reference at @p offset.
 643    :    bool GetReference(Offset offset, Reference* reference) const;
 644    :  
 645    :    // Remove the reference at @p offset.
 646    :    // @returns true iff there was a reference at @p offset.
 647    :    bool RemoveReference(Offset offset);
 648    :  
 649    :    // Remove all references from this block. This is handy when removing a block
 650    :    // from the block graph.
 651    :    bool RemoveAllReferences();
 652    :  
 653    :    // Set a label to @p offset.
 654    :    // A label in code marks the location of the start of an instruction -
 655    :    // e.g. a location where disassembly can usefully commence. Labels
 656    :    // appear to be inserted by the VS tool chain where e.g. a switch
 657    :    // statement is implemented with a jump table, to note the location
 658    :    // of the jump destinations.
 659    :    // @param offset the offset of the label to set.
 660    :    // @param name the name of the label.
 661    :    // @param attributes the attributes of the label.
 662    :    // @returns true iff a new label is inserted.
 663    :    // @note that only one label can exist at each offset, and the first
 664    :    //     label set at any offset will stay there.
 665    :    // @{
 666    :    bool SetLabel(Offset offset, const Label& label);
 667    :    bool SetLabel(Offset offset,
 668    :                  const base::StringPiece& name,
 669  E :                  LabelAttributes attributes) {
 670  E :      return SetLabel(offset, Label(name, attributes));
 671  E :    }
 672    :    // @}
 673    :  
 674    :    // Gets the label at the given @p offset.
 675    :    // @param offset the offset of the label to get.
 676    :    // @param label the string to receive the label.
 677    :    // @return true if the label exists, false otherwise.
 678    :    bool GetLabel(Offset offset, Label* label) const;
 679    :  
 680    :    // Removes the label at the given @p offset.
 681    :    // @param offset the offset of the label to remove.
 682    :    // @return true if the label existed and was removed, false it it did not
 683    :    //     exist.
 684    :    bool RemoveLabel(Offset offset);
 685    :  
 686    :    // Returns true iff the block has a label at @p offset.
 687    :    // @param offset the offset of the label to search for.
 688    :    bool HasLabel(Offset offset);
 689    :  
 690    :    // Change all references to this block to refer to @p new_block instead,
 691    :    // while offsetting each reference by @p offset.
 692    :    // @note this fails if any of the transferred references end up with offsets
 693    :    //     less than zero, or greater than new_block->size().
 694    :    // @returns true iff all references were transferred successfully.
 695    :    bool TransferReferrers(Offset offset, Block* new_block);
 696    :  
 697    :    // Returns true if this block contains the given range of bytes.
 698    :    bool Contains(RelativeAddress address, size_t size) const;
 699    :  
 700    :   protected:
 701    :    // Give BlockGraph access to our innards for serialization.
 702    :    friend class BlockGraph;
 703    :    // Give BlockGraphSerializer access to our innards for serialization.
 704    :    friend class BlockGraphSerializer;
 705    :  
 706    :    // Allocates and returns a new data buffer of the given size. The returned
 707    :    // data buffer will not have been initialized in any way.
 708    :    uint8* AllocateRawData(size_t size);
 709    :  
 710    :    BlockId id_;
 711    :    BlockType type_;
 712    :    Size size_;
 713    :    Size alignment_;
 714    :    std::string name_;
 715    :    RelativeAddress addr_;
 716    :  
 717    :    SectionId section_;
 718    :    BlockAttributes attributes_;
 719    :  
 720    :    ReferenceMap references_;
 721    :    ReferrerSet referrers_;
 722    :    SourceRanges source_ranges_;
 723    :    LabelMap labels_;
 724    :  
 725    :    // True iff data_ is ours to deallocate with delete [].
 726    :    // If this is false, data_ must be guaranteed to outlive the block.
 727    :    bool owns_data_;
 728    :    // A pointer to the code or data we represent.
 729    :    const uint8* data_;
 730    :    // Size of the above.
 731    :    size_t data_size_;
 732    :  };
 733    :  
 734    :  // A graph address space endows a graph with a non-overlapping ordering
 735    :  // on blocks, where each block occupies zero or one address ranges in the
 736    :  // address space. No two blocks may overlap in an address space.
 737    :  class BlockGraph::AddressSpace {
 738    :   public:
 739    :    typedef core::AddressSpace<RelativeAddress, BlockGraph::Size, Block*>
 740    :        AddressSpaceImpl;
 741    :    typedef AddressSpaceImpl::Range Range;
 742    :    typedef AddressSpaceImpl::RangeMap RangeMap;
 743    :    typedef AddressSpaceImpl::RangeMapIter RangeMapIter;
 744    :    typedef AddressSpaceImpl::RangeMapConstIter RangeMapConstIter;
 745    :    typedef AddressSpaceImpl::RangeMapIterPair RangeMapIterPair;
 746    :    typedef AddressSpaceImpl::RangeMapConstIterPair RangeMapConstIterPair;
 747    :  
 748    :    // Constructs a new empty address space.
 749    :    // @p start to @p start + @p size on @p graph.
 750    :    explicit AddressSpace(BlockGraph* graph);
 751    :  
 752    :    // Add a block of type @p type and @p size at @p address to our associated
 753    :    // graph, and return the new block.
 754    :    // @returns the new block, or NULL if the new block would overlap
 755    :    //     an existing block.
 756    :    Block* AddBlock(BlockType type,
 757    :                    RelativeAddress addr,
 758    :                    Size size,
 759    :                    const base::StringPiece& name);
 760    :  
 761    :    // Merges all blocks that intersect @p range into a single block.
 762    :    // Moves labels and references from the intersecting blocks to the
 763    :    // merged block, and changes referring blocks to refer to the new,
 764    :    // merged block. Removes the original blocks from the BlockGraph.
 765    :    // @returns the new, merged block if there was at least one intersecting
 766    :    //     block in @p range, or NULL otherwise.
 767    :    Block* MergeIntersectingBlocks(const Range& range);
 768    :  
 769    :    // Insert existing block @p block at @p address.
 770    :    // @returns true on success, or false if the @p block would overlap
 771    :    //     an existing block.
 772    :    bool InsertBlock(RelativeAddress addr, Block* block);
 773    :  
 774    :    // Returns a pointer to the block containing address, or NULL
 775    :    // if no block contains address.
 776    :    Block* GetBlockByAddress(RelativeAddress addr) const;
 777    :  
 778    :    // Returns a pointer to the block containing the address range
 779    :    // [address, address + size), or NULL if no block contains that
 780    :    // range.
 781    :    Block* GetContainingBlock(RelativeAddress addr, Size size) const;
 782    :  
 783    :    // Finds the first block, if any that intersects
 784    :    // [@p address, @p address + @p size).
 785    :    Block* GetFirstIntersectingBlock(RelativeAddress address, Size size);
 786    :  
 787    :    // Check whether the address space contains @p block.
 788    :    // @param block the block in question.
 789    :    // @returns true if the block is in the address space, false otherwise.
 790    :    bool ContainsBlock(const Block* block);
 791    :  
 792    :    // Locates all blocks that intersect [@p address, @p address + @p size).
 793    :    // @returns a pair of iterators that iterate over the found blocks.
 794    :    RangeMapConstIterPair GetIntersectingBlocks(RelativeAddress address,
 795    :                                                Size size) const;
 796    :    RangeMapIterPair GetIntersectingBlocks(RelativeAddress address, Size size);
 797    :  
 798    :    // Retrieve the address off @p block.
 799    :    // @param block the block in question.
 800    :    // @param addr on success, returns the address of @p block in this
 801    :    //     address space.
 802    :    // @returns true on success, false if @p block is not in this
 803    :    //     address space.
 804    :    bool GetAddressOf(const Block* block, RelativeAddress* addr) const;
 805    :  
 806    :    // Accessor.
 807  E :    BlockGraph* graph() { return graph_; }
 808  E :    const BlockGraph* graph() const { return graph_; }
 809    :  
 810  E :    RangeMapConstIter begin() const {
 811  E :      return address_space_.ranges().begin();
 812  E :    }
 813    :  
 814  E :    RangeMapConstIter end() const {
 815  E :      return address_space_.ranges().end();
 816  E :    }
 817    :  
 818  E :    size_t size() const {
 819  E :      return address_space_.ranges().size();
 820  E :    }
 821    :  
 822  E :    const AddressSpaceImpl& address_space_impl() const {
 823  E :      return address_space_;
 824  E :    }
 825    :  
 826    :   private:
 827    :    bool InsertImpl(RelativeAddress addr, Block* block);
 828    :  
 829    :    typedef stdext::hash_map<const Block*, RelativeAddress> BlockAddressMap;
 830    :  
 831    :    AddressSpaceImpl address_space_;
 832    :    BlockAddressMap block_addresses_;
 833    :    BlockGraph* graph_;
 834    :  };
 835    :  
 836    :  // Represents a reference from one block to another. References may be offset.
 837    :  // That is, they may refer to an object at a given location, but actually point
 838    :  // to a location that is some fixed distance away from that object. This allows,
 839    :  // for example, non-zero based indexing into a table. The object that is
 840    :  // intended to be dereferenced is called the 'base' of the offset.
 841    :  //
 842    :  // BlockGraph references are from a location (offset) in one block, to some
 843    :  // location in another block. The referenced block itself plays the role of the
 844    :  // 'base' of the reference, with the offset of the reference being stored as
 845    :  // an integer from the beginning of the block. However, basic block
 846    :  // decomposition requires breaking the block into smaller pieces and thus we
 847    :  // need to carry around an explicit base value, indicating which byte in the
 848    :  // block is intended to be referenced.
 849    :  //
 850    :  // A direct reference to a location will have the same value for 'base' and
 851    :  // 'offset'.
 852    :  //
 853    :  // Here is an example:
 854    :  //
 855    :  //        /----------\
 856    :  //        +---------------------------+
 857    :  //  O     |          B                | <--- Referenced block
 858    :  //        +---------------------------+      B = base
 859    :  //  \-----/                                  O = offset
 860    :  //
 861    :  class BlockGraph::Reference {
 862    :   public:
 863    :    Reference() :
 864  E :        type_(RELATIVE_REF), size_(0), referenced_(NULL), offset_(0), base_(0) {
 865  E :    }
 866    :  
 867    :    // @param type type of reference.
 868    :    // @param size size of reference.
 869    :    // @param referenced the referenced block.
 870    :    // @param offset offset from the beginning of the block of the location to be
 871    :    //     explicitly referred to.
 872    :    // @param base offset into the block of the location actually being
 873    :    //     referenced. This must be strictly within @p referenced.
 874    :    Reference(ReferenceType type,
 875    :              Size size,
 876    :              Block* referenced,
 877    :              Offset offset,
 878    :              Offset base)
 879    :        : type_(type),
 880    :          size_(size),
 881    :          referenced_(referenced),
 882    :          offset_(offset),
 883  E :          base_(base) {
 884  E :      DCHECK(IsValid());
 885  E :    }
 886    :  
 887    :    // Copy constructor.
 888    :    Reference(const Reference& other)
 889    :        : type_(other.type_),
 890    :          size_(other.size_),
 891    :          referenced_(other.referenced_),
 892    :          offset_(other.offset_),
 893  E :          base_(other.base_) {
 894  E :    }
 895    :  
 896    :    // Accessors.
 897  E :    ReferenceType type() const { return type_; }
 898  E :    Size size() const { return size_; }
 899  E :    Block* referenced() const { return referenced_; }
 900  E :    Offset offset() const { return offset_; }
 901  E :    Offset base() const { return base_; }
 902    :  
 903    :    // Determines if this is a direct reference. That is, if the actual location
 904    :    // being referenced (offset) and the intended location being referenced (base)
 905    :    // are the same.
 906    :    //
 907    :    // @returns true if the reference is direct, false otherwise.
 908  E :    bool IsDirect() const { return base_ == offset_; }
 909    :  
 910    :    // Determines if this is a valid reference, by imposing size constraints on
 911    :    // reference types, and determining if the base address of the reference is
 912    :    // strictly contained within the referenced block.
 913    :    //
 914    :    // @returns true if valid, false otherwise.
 915    :    bool IsValid() const;
 916    :  
 917  E :    bool operator==(const Reference& other) const {
 918    :      return type_ == other.type_ &&
 919    :          size_ == other.size_ &&
 920    :          referenced_ == other.referenced_ &&
 921    :          offset_ == other.offset_ &&
 922  E :          base_ == other.base_;
 923  E :    }
 924    :  
 925    :    // The maximum size that a reference may have. This needs to be kept in sync
 926    :    // with the expectations of IsValid().
 927    :    static const size_t kMaximumSize = 4;
 928    :  
 929    :    // Returns true if the given reference type and size combination is valid.
 930    :    static bool IsValidTypeSize(ReferenceType type, Size size);
 931    :  
 932    :   private:
 933    :    // Type of this reference.
 934    :    ReferenceType type_;
 935    :  
 936    :    // Size of this reference.
 937    :    // Absolute references are always pointer wide, but PC-relative
 938    :    // references can be 1, 2 or 4 bytes wide, which affects their range.
 939    :    Size size_;
 940    :  
 941    :    // The block referenced.
 942    :    Block* referenced_;
 943    :  
 944    :    // Offset into the referenced block.
 945    :    Offset offset_;
 946    :  
 947    :    // The base of the reference, as in offset in the block. This must be a
 948    :    // location strictly within the block.
 949    :    Offset base_;
 950    :  };
 951    :  
 952    :  // Commonly used container types.
 953    :  typedef std::vector<BlockGraph::Block*> BlockVector;
 954    :  typedef std::vector<const BlockGraph::Block*> ConstBlockVector;
 955    :  
 956    :  }  // namespace block_graph
 957    :  
 958    :  #endif  // SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_

Coverage information generated Thu Sep 06 11:30:46 2012.