Coverage for /Syzygy/block_graph/block_graph.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
100.0%1161160.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // A block graph is an abstract graph of blocks, each of which has an ID, a
  16    :  // type, a size and a few other properties. Each block represents either code or
  17    :  // data, and blocks can reference one another through references of various
  18    :  // types.
  19    :  //
  20    :  // The BlockGraph also stores minimum knowledge of sections (names and
  21    :  // characteristics), and each block belongs to at most one section. In this
  22    :  // sense, a BlockGraph acts as top-level division of blocks.
  23    :  
  24    :  #ifndef SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
  25    :  #define SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
  26    :  
  27    :  #include <hash_map>
  28    :  #include <map>
  29    :  #include <set>
  30    :  #include <string>
  31    :  #include <vector>
  32    :  
  33    :  #include "base/basictypes.h"
  34    :  #include "base/file_util.h"
  35    :  #include "base/string_piece.h"
  36    :  #include "syzygy/common/align.h"
  37    :  #include "syzygy/core/address.h"
  38    :  #include "syzygy/core/address_space.h"
  39    :  
  40    :  namespace block_graph {
  41    :  
  42    :  // Forward declaration.
  43    :  class BlockGraphSerializer;
  44    :  
  45    :  // The invalid address can never occur in an graph, it's used as default
  46    :  // value for block addresses.
  47    :  extern const core::RelativeAddress kInvalidAddress;
  48    :  
  49    :  #define BLOCK_ATTRIBUTE_ENUM(F) \
  50    :      /* Set for functions declared non-returning. */ \
  51    :      F(NON_RETURN_FUNCTION) \
  52    :      /* Set for blocks that are inferred by the decomposer. */ \
  53    :      F(GAP_BLOCK) \
  54    :      /* Set for blocks that are parsed by the PEFileParser. These */ \
  55    :      /* blocks are unmovable, indivisible, etc, and have to be treated */ \
  56    :      /* specially. */ \
  57    :      F(PE_PARSED) \
  58    :      /* Set for blocks that are created from section contribution */ \
  59    :      /* information. */ \
  60    :      F(SECTION_CONTRIB) \
  61    :      /* This is used to indicate that a block consists purely of padding */ \
  62    :      /* data. */ \
  63    :      F(PADDING_BLOCK) \
  64    :      /* Indicates blocks that contain inline assembly. */ \
  65    :      F(HAS_INLINE_ASSEMBLY) \
  66    :      /* Indicates that the block was built by a compiler whose precise */ \
  67    :      /* behaviour and semantics we are unfamiliar with. */ \
  68    :      F(BUILT_BY_UNSUPPORTED_COMPILER) \
  69    :      /* Indicates that the block has been built by the Syzygy toolchain, and */ \
  70    :      /* thus is inherently safe for basic-block decomposition without having */ \
  71    :      /* to perform the myriad of safety checks we do otherwise. */ \
  72    :      F(BUILT_BY_SYZYGY) \
  73    :      /* This is set for blocks whose initial disassembly was incomplete. */ \
  74    :      /* This is not necessarily an error, as we see have seen blocks with */ \
  75    :      /* unreachable code, even in release mode. */ \
  76    :      F(INCOMPLETE_DISASSEMBLY) \
  77    :      /* This is set for blocks whose disassembly was unable to finish due to */ \
  78    :      /* an error. This block has violated assumptions that we make or */ \
  79    :      /* conventions that we have observed the compiler to use. It is not safe */\
  80    :      /* for basic block disassembly. */ \
  81    :      F(ERRORED_DISASSEMBLY) \
  82    :      /* This is set for functions that have exception handling enabled. */ \
  83    :      /* Without delving far deeper into the specifics, it is unsafe to basic */ \
  84    :      /* block decompose these blocks. */ \
  85    :      F(HAS_EXCEPTION_HANDLING) \
  86    :      /* This is set for blocks whose disassembly went off the end of the */ \
  87    :      /* block, or into data. These blocks have control flow that we are not */ \
  88    :      /* aware of, or are otherwise malformed. */ \
  89    :      F(DISASSEMBLED_PAST_END) \
  90    :      /* This is set for blocks that have a thunk symbol pointing to them. */ \
  91    :      /* Typically thunk blocks are compiler or linker-generated, such as */ \
  92    :      /* e.g. import thunks, delay load import thunks, etc. */ \
  93    :      F(THUNK) \
  94    :      /* This is set for blocks that have been parsed as COFF groups. The */ \
  95    :      /* contents of these blocks are semantically indivisible. */ \
  96    :      F(COFF_GROUP) \
  97    :      /* This always needs to be set to the next available attribute bit. */ \
  98    :      F(BLOCK_ATTRIBUTES_MAX)
  99    :  
 100    :  // The BlockGraph is a top-level container for Blocks.
 101    :  class BlockGraph {
 102    :   public:
 103    :    typedef core::RelativeAddress RelativeAddress;
 104    :  
 105    :    typedef size_t SectionId;
 106    :    typedef size_t BlockId;
 107    :    typedef size_t Size;
 108    :    typedef ptrdiff_t Offset;
 109    :    typedef uint32 BlockAttributes;
 110    :    typedef uint32 LabelAttributes;
 111    :  
 112    :    // The BlockGraph maintains a list of sections, and each block belongs
 113    :    // to one of them. This is the set of information we keep regarding them.
 114    :    struct Section;
 115    :    // The section map contains all sections, indexed by id.
 116    :    typedef std::map<SectionId, Section> SectionMap;
 117    :  
 118    :    static const SectionId kInvalidSectionId;
 119    :  
 120    :    // Assign distinct bit IDs to each attribute constant.
 121    :    enum BlockAttributeIdEnum {
 122    :  #define DECLARE_ENUM_BIT(name) name##_BIT,
 123    :      BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM_BIT)
 124    :  #undef DECLARE_ENUM_BIT
 125    :    };
 126    :  
 127    :    enum BlockAttributeEnum {
 128    :  #define DECLARE_ENUM(name) name = (1 << name##_BIT),
 129    :      BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM)
 130    :  #undef DECLARE_ENUM
 131    :    };
 132    :  
 133    :    // Returns a string containing the names of the supplied attributes.
 134    :    static std::string BlockAttributesToString(BlockAttributes attrs);
 135    :  
 136    :    enum BlockType {
 137    :      CODE_BLOCK,
 138    :      DATA_BLOCK,
 139    :  
 140    :      // NOTE: This must always be last, and kBlockType must be kept in sync
 141    :      // with this enum.
 142    :      BLOCK_TYPE_MAX
 143    :    };
 144    :  
 145    :    static const char* BlockTypeToString(BlockType type);
 146    :  
 147    :    // Label attributes. Attributes of the form _END_LABEL type actually
 148    :    // point to the first byte past the range they delineate. To make the
 149    :    // semantics of moving labels easier, we shift these labels left by one and
 150    :    // make them follow the last byte of the delineated range.
 151    :    enum LabelAttributesEnum {
 152    :      // The label points to an entry-point in a code block.
 153    :      CODE_LABEL = (1 << 0),
 154    :  
 155    :      // Mark the start and end of the debuggable portion of a code block.
 156    :      DEBUG_START_LABEL = (1 << 1),
 157    :      DEBUG_END_LABEL = (1 << 2),
 158    :  
 159    :      // Mark the start and end of an embedded scope in a code block.
 160    :      SCOPE_START_LABEL = (1 << 3),
 161    :      SCOPE_END_LABEL = (1 << 4),
 162    :  
 163    :      // Marks the location of a (virtual table?) call.
 164    :      CALL_SITE_LABEL = (1 << 5),
 165    :  
 166    :      // The label points to the start of a jump table. The length is inferred
 167    :      // by the location of the next label, or the end of the block. This will
 168    :      // also have DATA_LABEL set.
 169    :      JUMP_TABLE_LABEL = (1 << 6),
 170    :      // The label points to the start of a case table. The length is inferred
 171    :      // by the location of the next label, or the end of the block. This will
 172    :      // also have DATA_LABEL set.
 173    :      CASE_TABLE_LABEL = (1 << 7),
 174    :      // The label originated from a data symbol. The length is inferred by the
 175    :      // location of the next label, or the end of the block. The type of data
 176    :      // is unknown.
 177    :      DATA_LABEL = (1 << 8),
 178    :  
 179    :      // Used to mark a label that was derived from a public symbol. These are
 180    :      // usually actually pointing to code and global data symbols, but we can't
 181    :      // always tell (there may be public symbols pointing to data in a code
 182    :      // block).
 183    :      PUBLIC_SYMBOL_LABEL = (1 << 9),
 184    :  
 185    :      // This always needs to be the most significant bit.
 186    :      LABEL_ATTRIBUTES_MAX = (1 << 10),
 187    :    };
 188    :  
 189    :    static std::string LabelAttributesToString(LabelAttributes label_attributes);
 190    :  
 191    :    enum ReferenceType {
 192    :      PC_RELATIVE_REF,
 193    :      ABSOLUTE_REF,
 194    :      RELATIVE_REF,
 195    :      FILE_OFFSET_REF,
 196    :      // Must be last!
 197    :      REFERENCE_TYPE_MAX,
 198    :    };
 199    :  
 200    :    // Forward declarations.
 201    :    class AddressSpace;
 202    :    class Block;
 203    :    class Label;
 204    :    class Reference;
 205    :  
 206    :    // The block map contains all blocks, indexed by id.
 207    :    typedef std::map<BlockId, Block> BlockMap;
 208    :  
 209    :    BlockGraph();
 210    :    ~BlockGraph();
 211    :  
 212    :    // Adds a section with the given name.
 213    :    //
 214    :    // @param name The section name.
 215    :    // @param characteristics The section characteristics.
 216    :    // @returns the newly created section.
 217    :    Section* AddSection(const base::StringPiece& name, uint32 characteristics);
 218    :  
 219    :    // Finds a section with the given name, returning NULL if no such section
 220    :    // exists.
 221    :    //
 222    :    // @param name The section name.
 223    :    // @returns the section if one is found, NULL otherwise.
 224    :    Section* FindSection(const base::StringPiece& name);
 225    :  
 226    :    // Find or adds section with the given name.
 227    :    //
 228    :    // If a section with the given name already exists, updates its
 229    :    // characteristics and returns it. Otherwise, creates a new section and
 230    :    // returns it. If multiple sections exist with the given name, the first
 231    :    // one encountered is returned.
 232    :    //
 233    :    // TODO(chrisha): The semantics of this function are a little odd. It would
 234    :    //     make more sense for it to return only if a section with matching name
 235    :    //     AND characteristics is found, otherwise to create a new one.
 236    :    //
 237    :    // @param name The section name.
 238    :    // @param characteristics The section characteristics.
 239    :    // @returns the new or found section.
 240    :    Section* FindOrAddSection(const base::StringPiece& name,
 241    :                              uint32 characteristics);
 242    :  
 243    :    // Removes the given section from the BlockGraph.
 244    :    //
 245    :    // The section must belong to this block graph. Be aware that this can leave
 246    :    // Blocks with dangling section_ids.
 247    :    //
 248    :    // @param section The section to remove.
 249    :    // @returns true on success, false otherwise.
 250    :    bool RemoveSection(Section* section);
 251    :  
 252    :    // Removes the section with the given id from the BlockGraph.
 253    :    //
 254    :    // @param section The id of the section to remove.
 255    :    // @returns true on success, false otherwise.
 256    :    bool RemoveSectionById(SectionId id);
 257    :  
 258    :    // Add @p block of type @p type and @p size and
 259    :    // return the new block.
 260    :    // @returns the new block.
 261    :    Block* AddBlock(BlockType type, Size size, const base::StringPiece& name);
 262    :  
 263    :    // Deletes the given block from the BlockGraph. The block must belong to this
 264    :    // block graph, and have no references or referrers. Returns true on success,
 265    :    // false otherwise. On failure, the BlockGraph has not been changed.
 266    :    bool RemoveBlock(Block* block);
 267    :  
 268    :    // Deletes the block with the given @p id from the block graph. The block id
 269    :    // must be valid, and the block must have no references or referrers. Returns
 270    :    // true on success, false otherwise. On failure, the BlockGraph has not been
 271    :    // changed.
 272    :    bool RemoveBlockById(BlockId id);
 273    :  
 274    :    // Accessors.
 275  E :    const SectionMap& sections() const { return sections_; }
 276  E :    SectionMap& sections_mutable() { return sections_; }
 277  E :    const BlockMap& blocks() const { return blocks_; }
 278  E :    BlockMap& blocks_mutable() { return blocks_; }
 279    :  
 280    :    // @{
 281    :    // Retrieve the section with the given id.
 282    :    //
 283    :    // @param id The id of the section to retrieve.
 284    :    // @returns the section in question or NULL if no such section.
 285    :    Section* GetSectionById(SectionId id);
 286    :    const Section* GetSectionById(SectionId id) const;
 287    :    // @}
 288    :  
 289    :    // @{
 290    :    // Retrieve the block with id.
 291    :    // @returns the block in question or NULL if no such block.
 292    :    Block* GetBlockById(BlockId id);
 293    :    const Block* GetBlockById(BlockId id) const;
 294    :    // @}
 295    :  
 296    :    const std::string& InternString(const base::StringPiece& str);
 297    :  
 298    :   private:
 299    :    // Give BlockGraphSerializer access to our innards for serialization.
 300    :    friend BlockGraphSerializer;
 301    :  
 302    :    // Removes a block by the iterator to it. The iterator must be valid.
 303    :    bool RemoveBlockByIterator(BlockMap::iterator it);
 304    :  
 305    :    // All sections we contain.
 306    :    SectionMap sections_;
 307    :  
 308    :    // Our section ID allocator.
 309    :    SectionId next_section_id_;
 310    :  
 311    :    // All blocks we contain.
 312    :    BlockMap blocks_;
 313    :  
 314    :    // Our block ID allocator.
 315    :    BlockId next_block_id_;
 316    :  
 317    :    // A set of internalized string.
 318    :    std::set<std::string> string_table_;
 319    :  };
 320    :  
 321    :  // The BlockGraph maintains a list of sections, and each block belongs
 322    :  // to one of them. This is the set of information we keep regarding them.
 323    :  struct BlockGraph::Section {
 324    :    // Default constructor. Required for serialization.
 325  E :    Section() : id_(kInvalidSectionId), characteristics_(0) {
 326  E :    }
 327    :  
 328    :    // Full constructor.
 329    :    //
 330    :    // @param id The section id. This must not be kInvalidSectionId.
 331    :    // @param name The name of the section. Must not be empty or NULL.
 332    :    // @param characteristics The characteristics of the section.
 333  E :    Section(SectionId id, const base::StringPiece& name, uint32 characteristics)
 334    :        : id_(id), name_(), characteristics_(characteristics) {
 335  E :      DCHECK_NE(kInvalidSectionId, id);
 336  E :      DCHECK(name != NULL);
 337  E :      name.CopyToString(&name_);
 338  E :      DCHECK(!name_.empty());
 339  E :    }
 340    :  
 341    :    // Get the id of this section.
 342    :    //
 343    :    // @returns the id of the section.
 344  E :    SectionId id() const { return id_; }
 345    :  
 346    :    // Get the name of this section.
 347    :    //
 348    :    // @returns the section name.
 349  E :    const std::string& name() const { return name_; }
 350    :  
 351    :    // Sets the name for this section.
 352    :    //
 353    :    // @param name The name of the section. If NULL or empty, this will fail.
 354    :    // @returns true if the name is set, false otherwise.
 355    :    bool set_name(const base::StringPiece& name);
 356    :  
 357    :    // Get the characteristics of this section.
 358    :    //
 359    :    // @returns the section characteristics.
 360  E :    uint32 characteristics() const { return characteristics_; }
 361    :  
 362    :    // Sets the characteristics for this section.
 363    :    //
 364    :    // @param characteristics The new characteristics to set.
 365  E :    void set_characteristics(uint32 characteristics) {
 366  E :      characteristics_ = characteristics;
 367  E :    }
 368    :  
 369    :    // Sets a one or more additional characteristics for this section.
 370    :    //
 371    :    // @param characteristic The new characteristic(s) to set for this section.
 372  E :    void set_characteristic(uint32 characteristic) {
 373  E :      characteristics_ |= characteristic;
 374  E :    }
 375    :  
 376    :    // Clears one or more characteristics for this section.
 377    :    //
 378    :    // @param characteristic The characteristic(s) to clear for this section.
 379  E :    void clear_characteristic(uint32 characteristic) {
 380  E :      characteristics_ &= ~characteristic;
 381  E :    }
 382    :  
 383    :    // @name Serialization functions.
 384    :    // @{
 385    :    bool Save(core::OutArchive* out_archive) const;
 386    :    bool Load(core::InArchive* in_archive);
 387    :    // @}
 388    :  
 389    :    // A simple comparison operator for serialization tests.
 390  E :    bool operator==(const Section& other) const {
 391    :      return id_ == other.id_ && name_ == other.name_ &&
 392  E :          characteristics_ == other.characteristics_;
 393  E :    }
 394    :  
 395    :    // A not-equal comparison operator.
 396  E :    bool operator!=(const Section& other) const {
 397  E :      return !operator==(other);
 398  E :    }
 399    :  
 400    :   private:
 401    :    // The id of the section. This has no particular meaning other than as a way
 402    :    // to identify sections uniquely.
 403    :    SectionId id_;
 404    :    // The name of the section. This will be truncated to a max of 8 characters
 405    :    // on output.
 406    :    std::string name_;
 407    :    // The section characteristics, a bitmask of IMAGE_SCN_* values.
 408    :    uint32 characteristics_;
 409    :  };
 410    :  
 411    :  // A label denotes the beginning (or end) of a sub-region within a (code)
 412    :  // block. In particular, a code label represents an instruction boundary
 413    :  // at which disassembly can begin and a data label represents the beginning
 414    :  // of embedded data.
 415    :  class BlockGraph::Label {
 416    :   public:
 417    :    // Default constructor.
 418  E :    Label() : attributes_(0) {
 419  E :    }
 420    :  
 421    :    // Full constructor.
 422  E :    Label(const base::StringPiece& name, LabelAttributes attributes)
 423    :        : name_(name.begin(), name.end()), attributes_(attributes) {
 424  E :    }
 425    :  
 426    :    // Copy construction.
 427  E :    Label(const Label& other)
 428    :        : name_(other.name_), attributes_(other.attributes_) {
 429  E :    }
 430    :  
 431    :    // @name Accessors.
 432    :    // @{
 433  E :    const std::string& name() const { return name_; }
 434    :    // @}
 435    :  
 436    :    // A helper function for logging and debugging.
 437    :    std::string ToString() const;
 438    :  
 439    :    // Equality comparator for unittesting.
 440  E :    bool operator==(const Label& other) const {
 441  E :      return name_ == other.name_ && attributes_ == other.attributes_;
 442  E :    }
 443    :  
 444    :    // The label attributes are a bitmask. You can set them wholesale,
 445    :    // or set and clear them individually by bitmasking.
 446  E :    LabelAttributes attributes() const { return attributes_; }
 447  E :    void set_attributes(LabelAttributes attributes) { attributes_ = attributes; }
 448    :  
 449    :    // Set or clear one or more attributes.
 450  E :    void set_attribute(LabelAttributes attribute) { attributes_ |= attribute; }
 451  E :    void clear_attribute(LabelAttributes attribute) { attributes_ &= ~attribute; }
 452    :  
 453    :    // Determines if all or any of the given attributes are set.
 454  E :    bool has_attributes(LabelAttributes attributes) const {
 455  E :      return (attributes_ & attributes) == attributes;
 456  E :    }
 457  E :    bool has_any_attributes(LabelAttributes attributes) const {
 458  E :      return (attributes_ & attributes) != 0;
 459  E :    }
 460    :  
 461    :    // @returns true if this label is valid, false otherwise.
 462    :    bool IsValid() const;
 463    :  
 464    :    // Tests a set of label attributes for validity.
 465    :    // @param attributes the attributes to test.
 466    :    // @returns true if the provided attributes are valid, false otherwise.
 467    :    static bool AreValidAttributes(LabelAttributes attributes);
 468    :  
 469    :   private:
 470    :    // The name by which this label is known.
 471    :    std::string name_;
 472    :  
 473    :    // The disposition of the bytes found at this label.
 474    :    LabelAttributes attributes_;
 475    :  };
 476    :  
 477    :  // A block represents a block of either code or data.
 478    :  //
 479    :  // Since blocks may be split and up and glued together in arbitrary ways, each
 480    :  // block maintains an address-space over its data, associating ranges of block
 481    :  // data to ranges of bytes in the original image. This effectively encodes OMAP
 482    :  // data, allowing the PDB file to be updated.
 483    :  //
 484    :  // Each block also stores references to other blocks in the graph, their
 485    :  // relative location within the block and their type and size.
 486    :  //
 487    :  // Each block has a set of attributes, including a size, a name and a
 488    :  // "current" address. Most of those attributes are mutable, and are set in the
 489    :  // process of creating and manipulating images and graph address spaces.
 490    :  class BlockGraph::Block {
 491    :   public:
 492    :    // Set of the blocks that have a reference to this block.
 493    :    // This is keyed on block and source offset (not destination offset),
 494    :    // to allow one to easily locate and remove the backreferences on change or
 495    :    // deletion.
 496    :    typedef std::pair<Block*, Offset> Referrer;
 497    :    typedef std::set<Referrer> ReferrerSet;
 498    :  
 499    :    // Map of references that this block makes to other blocks.
 500    :    typedef std::map<Offset, Reference> ReferenceMap;
 501    :  
 502    :    // Represents a range of data in this block.
 503    :    typedef core::AddressRange<Offset, Size> DataRange;
 504    :  
 505    :    // Represents a range of data in the original image.
 506    :    typedef core::AddressRange<RelativeAddress, Size> SourceRange;
 507    :  
 508    :    // A map between bytes in this block and bytes in the original image.
 509    :    typedef core::AddressRangeMap<DataRange, SourceRange> SourceRanges;
 510    :  
 511    :    // Typed labels associated with various offsets in the block. Some of these
 512    :    // labels (of type CODE_LABEL) represent code start points for disassembly
 513    :    // while others (of type DATA_LABEL) represent the start of embedded data
 514    :    // within the block. Note that, while possible, it is NOT guaranteed that
 515    :    // all basic blocks are marked with a label. Basic block decomposition should
 516    :    // disassemble from the code labels to discover all basic blocks.
 517    :    typedef std::map<Offset, Label> LabelMap;
 518    :  
 519    :    Block(BlockId id,
 520    :          BlockType type,
 521    :          Size size,
 522    :          const base::StringPiece& name,
 523    :          BlockGraph* block_graph);
 524    :    ~Block();
 525    :  
 526    :    // Accessors.
 527  E :    BlockId id() const { return id_; }
 528  E :    BlockType type() const { return type_; }
 529  E :    void set_type(BlockType type) { type_ = type; }
 530    :  
 531  E :    Size size() const { return size_; }
 532    :  
 533    :    // Set the total size of the block. Note that allocated data_size_ must
 534    :    // always be less than or equal to the total size.
 535  E :    void set_size(Size size) {
 536  E :      DCHECK_LE(data_size_, size);
 537  E :      size_ = size;
 538  E :    }
 539    :  
 540  E :    const std::string& name() const { return name_; }
 541  E :    void set_name(const base::StringPiece& name) { name.CopyToString(&name_); }
 542    :  
 543  E :    Size alignment() const { return alignment_; }
 544  E :    void set_alignment(Size alignment) {
 545    :      // Ensure that alignment is a non-zero power of two.
 546  E :      DCHECK(common::IsPowerOfTwo(alignment));
 547  E :      alignment_ = alignment;
 548  E :    }
 549    :  
 550    :    // The address of the block is set any time the block is assigned
 551    :    // an address in an address space.
 552  E :    RelativeAddress addr() const { return addr_; }
 553  E :    void set_addr(RelativeAddress addr) { addr_ = addr; }
 554    :  
 555    :    // The section ID for the block. These IDs are wrt to the SectionMap in the
 556    :    // parent BlockGraph.
 557  E :    SectionId section() const { return section_; }
 558  E :    void set_section(SectionId section) { section_ = section; }
 559    :  
 560    :    // The block attributes are a bitmask. You can set them wholesale,
 561    :    // or set and clear them individually by bitmasking.
 562  E :    BlockAttributes attributes() const { return attributes_; }
 563  E :    void set_attributes(BlockAttributes attributes) { attributes_ = attributes; }
 564    :  
 565    :    // Set or clear one or more attributes.
 566  E :    void set_attribute(BlockAttributes attribute) { attributes_ |= attribute; }
 567  E :    void clear_attribute(BlockAttributes attribute) {
 568  E :      attributes_ &= ~attribute;
 569  E :    }
 570    :  
 571    :    // This is true iff data_ is in the ownership of the block.
 572    :    // Iff true, the block will delete [] data_ on destruction or when
 573    :    // data is overwritten.
 574  E :    bool owns_data() const { return owns_data_; }
 575    :  
 576    :    // Makes room for the given amount of data at the given offset. This is
 577    :    // special in that it will patch up any labels, source ranges and referrers
 578    :    // that land beyond the newly created data, shifting them to the right by
 579    :    // @p size. If the data for this block is actually allocated it will also
 580    :    // patch up the allocated data by zeroing the newly allocate range of data,
 581    :    // and shifting the tail by @p size. If the new data is strictly implicit
 582    :    // (offset > data_size), then the allocated data is not affected in any way
 583    :    // unless @p always_allocate_data is true.
 584    :    //
 585    :    // @param offset the offset at which to insert the new data.
 586    :    // @param size the size of the new data to be inserted.
 587    :    // @param always_allocate_data if true, then data_size will be grown if
 588    :    //     necessary to ensure that the newly created data can be written.
 589    :    // @pre 0 <= offset <= size()
 590    :    void InsertData(Offset offset, Size size, bool always_allocate_data);
 591    :  
 592    :    // Removes the data in the given range. This will refuse to remove labels,
 593    :    // references and referrers that land in the range, and will fail if any
 594    :    // exist. It will also shift any labels, references and referrers that land
 595    :    // beyond the end of the removed range. Source ranges will also be fixed. If
 596    :    // the removed range lies within the initialized data then the data will also
 597    :    // be truncated/shifted as necessary.
 598    :    //
 599    :    // @param offset the offset at which to remove data.
 600    :    // @param size the size of the data to remove, in bytes.
 601    :    // @returns true on success, false otherwise.
 602    :    // @pre 0 <= offset <= size
 603    :    bool RemoveData(Offset offset, Size size);
 604    :  
 605    :    // Performs an inline resize of data in a BlockGraph. If the data is shrinking
 606    :    // this equates to a RemoveData operation. If it is growing it equates to an
 607    :    // InsertData operation.
 608    :    //
 609    :    // @param offset the offset of the data to resize.
 610    :    // @param current_size the current size of the data to resize.
 611    :    // @param new_size the desired size of the data.
 612    :    // @param always_allocate_data if true, then data_size will be grown if
 613    :    //     necessary to ensure that the resized data can be written.
 614    :    // @returns true on success, false otherwise.
 615    :    // @pre 0 <= offset <= size
 616    :    bool InsertOrRemoveData(Offset offset, Size current_size, Size new_size,
 617    :                            bool always_allocate_data);
 618    :  
 619    :    // Set the data the block refers to.
 620    :    // @param data NULL or the data this block refers to.
 621    :    //     The underlying data must outlive this block.
 622    :    // @param data_size the size of data, or zero if data == NULL.
 623    :    // @pre data_size <= size().
 624    :    void SetData(const uint8* data, size_t data_size);
 625    :  
 626    :    // Allocates and returns a new data buffer of the given size. The returned
 627    :    // data will have been initialized to zero.
 628    :    // @pre data_size <= size().
 629    :    uint8* AllocateData(size_t data_size);
 630    :  
 631    :    // Makes a copy of data, returns a pointer to the copy.
 632    :    // @pre data_size <= size().
 633    :    uint8* CopyData(size_t data_size, const void* data);
 634    :  
 635    :    // Resizes data to new_size by truncating or zero-extending the current data.
 636    :    // @pre new_size <= size().
 637    :    const uint8* ResizeData(size_t new_size);
 638    :  
 639    :    // Returns a mutable copy of the block's data. If the block doesn't own
 640    :    // the data on entry, it'll be copied and the copy returned to the caller.
 641    :    uint8* GetMutableData();
 642    :  
 643    :    // The data bytes the block refers to.
 644  E :    const uint8* data() const { return data_; }
 645    :  
 646    :    // The data size may be smaller than the block size (see size()),
 647    :    // when the block e.g. refers to data that's all or part
 648    :    // zero-initialized by the linker/loader.
 649  E :    size_t data_size() const { return data_size_; }
 650    :  
 651  E :    const ReferenceMap& references() const { return references_; }
 652  E :    const ReferrerSet& referrers() const { return referrers_; }
 653  E :    const SourceRanges& source_ranges() const { return source_ranges_; }
 654  E :    SourceRanges& source_ranges() { return source_ranges_; }
 655  E :    const LabelMap& labels() const { return labels_; }
 656    :  
 657    :    // Returns true if there are any other blocks holding a reference to this one.
 658    :    bool HasExternalReferrers() const;
 659    :  
 660    :    // Set the reference at @p offset to @p ref.
 661    :    // If there's a pre-existing reference at @p offset, this overrides it.
 662    :    // @param offset offset of the reference into this block.
 663    :    // @param ref the reference to add.
 664    :    // @returns true iff this inserts a new reference.
 665    :    bool SetReference(Offset offset, const Reference& ref);
 666    :  
 667    :    // Retrieve the reference at @p offset if one exists.
 668    :    // @param reference on success returns the reference @p offset.
 669    :    // @returns true iff there was a reference at @p offset.
 670    :    bool GetReference(Offset offset, Reference* reference) const;
 671    :  
 672    :    // Remove the reference at @p offset.
 673    :    // @returns true iff there was a reference at @p offset.
 674    :    bool RemoveReference(Offset offset);
 675    :  
 676    :    // Remove all references from this block. This is handy when removing a block
 677    :    // from the block graph.
 678    :    bool RemoveAllReferences();
 679    :  
 680    :    // Set a label to @p offset.
 681    :    // A label in code marks the location of the start of an instruction -
 682    :    // e.g. a location where disassembly can usefully commence. Labels
 683    :    // appear to be inserted by the VS tool chain where e.g. a switch
 684    :    // statement is implemented with a jump table, to note the location
 685    :    // of the jump destinations.
 686    :    // @param offset the offset of the label to set.
 687    :    // @param name the name of the label.
 688    :    // @param attributes the attributes of the label.
 689    :    // @returns true iff a new label is inserted.
 690    :    // @note that only one label can exist at each offset, and the first
 691    :    //     label set at any offset will stay there.
 692    :    // @{
 693    :    bool SetLabel(Offset offset, const Label& label);
 694    :    bool SetLabel(Offset offset,
 695    :                  const base::StringPiece& name,
 696  E :                  LabelAttributes attributes) {
 697  E :      return SetLabel(offset, Label(name, attributes));
 698  E :    }
 699    :    // @}
 700    :  
 701    :    // Gets the label at the given @p offset.
 702    :    // @param offset the offset of the label to get.
 703    :    // @param label the string to receive the label.
 704    :    // @return true if the label exists, false otherwise.
 705    :    bool GetLabel(Offset offset, Label* label) const;
 706    :  
 707    :    // Removes the label at the given @p offset.
 708    :    // @param offset the offset of the label to remove.
 709    :    // @return true if the label existed and was removed, false it it did not
 710    :    //     exist.
 711    :    bool RemoveLabel(Offset offset);
 712    :  
 713    :    // Returns true iff the block has a label at @p offset.
 714    :    // @param offset the offset of the label to search for.
 715    :    bool HasLabel(Offset offset) const;
 716    :  
 717    :    // Change all references to this block to refer to @p new_block instead,
 718    :    // while offsetting each reference by @p offset.
 719    :    // @note this fails if any of the transferred references end up with offsets
 720    :    //     less than zero, or greater than new_block->size().
 721    :    // @returns true iff all references were transferred successfully.
 722    :    bool TransferReferrers(Offset offset, Block* new_block);
 723    :  
 724    :    // Returns true if this block contains the given range of bytes.
 725    :    bool Contains(RelativeAddress address, size_t size) const;
 726    :  
 727    :   protected:
 728    :    // Give BlockGraph access to our innards for serialization.
 729    :    friend class BlockGraph;
 730    :    // Give BlockGraphSerializer access to our innards for serialization.
 731    :    friend class BlockGraphSerializer;
 732    :  
 733    :    // This constructor is used by serialization.
 734    :    explicit Block(BlockGraph* block_graph);
 735    :  
 736    :    // Allocates and returns a new data buffer of the given size. The returned
 737    :    // data buffer will not have been initialized in any way.
 738    :    uint8* AllocateRawData(size_t size);
 739    :  
 740    :    BlockId id_;
 741    :    BlockType type_;
 742    :    Size size_;
 743    :    Size alignment_;
 744    :    std::string name_;
 745    :    RelativeAddress addr_;
 746    :  
 747    :    // BlockGraph to which belongs this Block. A block can only belongs to one
 748    :    // Block Graph.
 749    :    BlockGraph * block_graph_;
 750    :  
 751    :    SectionId section_;
 752    :    BlockAttributes attributes_;
 753    :  
 754    :    ReferenceMap references_;
 755    :    ReferrerSet referrers_;
 756    :    SourceRanges source_ranges_;
 757    :    LabelMap labels_;
 758    :  
 759    :    // True iff data_ is ours to deallocate with delete [].
 760    :    // If this is false, data_ must be guaranteed to outlive the block.
 761    :    bool owns_data_;
 762    :    // A pointer to the code or data we represent.
 763    :    const uint8* data_;
 764    :    // Size of the above.
 765    :    size_t data_size_;
 766    :  };
 767    :  
 768    :  // A graph address space endows a graph with a non-overlapping ordering
 769    :  // on blocks, where each block occupies zero or one address ranges in the
 770    :  // address space. No two blocks may overlap in an address space.
 771    :  class BlockGraph::AddressSpace {
 772    :   public:
 773    :    typedef core::AddressSpace<RelativeAddress, BlockGraph::Size, Block*>
 774    :        AddressSpaceImpl;
 775    :    typedef AddressSpaceImpl::Range Range;
 776    :    typedef AddressSpaceImpl::RangeMap RangeMap;
 777    :    typedef AddressSpaceImpl::RangeMapIter RangeMapIter;
 778    :    typedef AddressSpaceImpl::RangeMapConstIter RangeMapConstIter;
 779    :    typedef AddressSpaceImpl::RangeMapIterPair RangeMapIterPair;
 780    :    typedef AddressSpaceImpl::RangeMapConstIterPair RangeMapConstIterPair;
 781    :  
 782    :    // Constructs a new empty address space.
 783    :    // @p start to @p start + @p size on @p graph.
 784    :    explicit AddressSpace(BlockGraph* graph);
 785    :  
 786    :    // Add a block of type @p type and @p size at @p address to our associated
 787    :    // graph, and return the new block.
 788    :    // @returns the new block, or NULL if the new block would overlap
 789    :    //     an existing block.
 790    :    Block* AddBlock(BlockType type,
 791    :                    RelativeAddress addr,
 792    :                    Size size,
 793    :                    const base::StringPiece& name);
 794    :  
 795    :    // Merges all blocks that intersect @p range into a single block.
 796    :    // Moves labels and references from the intersecting blocks to the
 797    :    // merged block, and changes referring blocks to refer to the new,
 798    :    // merged block. Removes the original blocks from the BlockGraph.
 799    :    // @returns the new, merged block if there was at least one intersecting
 800    :    //     block in @p range, or NULL otherwise.
 801    :    Block* MergeIntersectingBlocks(const Range& range);
 802    :  
 803    :    // Insert existing block @p block at @p address.
 804    :    // @returns true on success, or false if the @p block would overlap
 805    :    //     an existing block.
 806    :    bool InsertBlock(RelativeAddress addr, Block* block);
 807    :  
 808    :    // Returns a pointer to the block containing address, or NULL
 809    :    // if no block contains address.
 810    :    Block* GetBlockByAddress(RelativeAddress addr) const;
 811    :  
 812    :    // Returns a pointer to the block containing the address range
 813    :    // [address, address + size), or NULL if no block contains that
 814    :    // range.
 815    :    Block* GetContainingBlock(RelativeAddress addr, Size size) const;
 816    :  
 817    :    // Finds the first block, if any that intersects
 818    :    // [@p address, @p address + @p size).
 819    :    Block* GetFirstIntersectingBlock(RelativeAddress address, Size size);
 820    :  
 821    :    // Check whether the address space contains @p block.
 822    :    // @param block the block in question.
 823    :    // @returns true if the block is in the address space, false otherwise.
 824    :    bool ContainsBlock(const Block* block);
 825    :  
 826    :    // Locates all blocks that intersect [@p address, @p address + @p size).
 827    :    // @returns a pair of iterators that iterate over the found blocks.
 828    :    RangeMapConstIterPair GetIntersectingBlocks(RelativeAddress address,
 829    :                                                Size size) const;
 830    :    RangeMapIterPair GetIntersectingBlocks(RelativeAddress address, Size size);
 831    :  
 832    :    // Retrieve the address off @p block.
 833    :    // @param block the block in question.
 834    :    // @param addr on success, returns the address of @p block in this
 835    :    //     address space.
 836    :    // @returns true on success, false if @p block is not in this
 837    :    //     address space.
 838    :    bool GetAddressOf(const Block* block, RelativeAddress* addr) const;
 839    :  
 840    :    // Accessor.
 841  E :    BlockGraph* graph() { return graph_; }
 842  E :    const BlockGraph* graph() const { return graph_; }
 843    :  
 844  E :    RangeMapConstIter begin() const {
 845  E :      return address_space_.ranges().begin();
 846  E :    }
 847    :  
 848  E :    RangeMapConstIter end() const {
 849  E :      return address_space_.ranges().end();
 850  E :    }
 851    :  
 852  E :    size_t size() const {
 853  E :      return address_space_.ranges().size();
 854  E :    }
 855    :  
 856  E :    const AddressSpaceImpl& address_space_impl() const {
 857  E :      return address_space_;
 858  E :    }
 859    :  
 860    :   private:
 861    :    bool InsertImpl(RelativeAddress addr, Block* block);
 862    :  
 863    :    typedef stdext::hash_map<const Block*, RelativeAddress> BlockAddressMap;
 864    :  
 865    :    AddressSpaceImpl address_space_;
 866    :    BlockAddressMap block_addresses_;
 867    :    BlockGraph* graph_;
 868    :  };
 869    :  
 870    :  // Represents a reference from one block to another. References may be offset.
 871    :  // That is, they may refer to an object at a given location, but actually point
 872    :  // to a location that is some fixed distance away from that object. This allows,
 873    :  // for example, non-zero based indexing into a table. The object that is
 874    :  // intended to be dereferenced is called the 'base' of the offset.
 875    :  //
 876    :  // BlockGraph references are from a location (offset) in one block, to some
 877    :  // location in another block. The referenced block itself plays the role of the
 878    :  // 'base' of the reference, with the offset of the reference being stored as
 879    :  // an integer from the beginning of the block. However, basic block
 880    :  // decomposition requires breaking the block into smaller pieces and thus we
 881    :  // need to carry around an explicit base value, indicating which byte in the
 882    :  // block is intended to be referenced.
 883    :  //
 884    :  // A direct reference to a location will have the same value for 'base' and
 885    :  // 'offset'.
 886    :  //
 887    :  // Here is an example:
 888    :  //
 889    :  //        /----------\
 890    :  //        +---------------------------+
 891    :  //  O     |          B                | <--- Referenced block
 892    :  //        +---------------------------+      B = base
 893    :  //  \-----/                                  O = offset
 894    :  //
 895    :  class BlockGraph::Reference {
 896    :   public:
 897    :    Reference() :
 898  E :        type_(RELATIVE_REF), size_(0), referenced_(NULL), offset_(0), base_(0) {
 899  E :    }
 900    :  
 901    :    // @param type type of reference.
 902    :    // @param size size of reference.
 903    :    // @param referenced the referenced block.
 904    :    // @param offset offset from the beginning of the block of the location to be
 905    :    //     explicitly referred to.
 906    :    // @param base offset into the block of the location actually being
 907    :    //     referenced. This must be strictly within @p referenced.
 908    :    Reference(ReferenceType type,
 909    :              Size size,
 910    :              Block* referenced,
 911    :              Offset offset,
 912    :              Offset base)
 913    :        : type_(type),
 914    :          size_(size),
 915    :          referenced_(referenced),
 916    :          offset_(offset),
 917  E :          base_(base) {
 918  E :      DCHECK(IsValid());
 919  E :    }
 920    :  
 921    :    // Copy constructor.
 922    :    Reference(const Reference& other)
 923    :        : type_(other.type_),
 924    :          size_(other.size_),
 925    :          referenced_(other.referenced_),
 926    :          offset_(other.offset_),
 927  E :          base_(other.base_) {
 928  E :    }
 929    :  
 930    :    // Accessors.
 931  E :    ReferenceType type() const { return type_; }
 932  E :    Size size() const { return size_; }
 933  E :    Block* referenced() const { return referenced_; }
 934  E :    Offset offset() const { return offset_; }
 935  E :    Offset base() const { return base_; }
 936    :  
 937    :    // Determines if this is a direct reference. That is, if the actual location
 938    :    // being referenced (offset) and the intended location being referenced (base)
 939    :    // are the same.
 940    :    //
 941    :    // @returns true if the reference is direct, false otherwise.
 942  E :    bool IsDirect() const { return base_ == offset_; }
 943    :  
 944    :    // Determines if this is a valid reference, by imposing size constraints on
 945    :    // reference types, and determining if the base address of the reference is
 946    :    // strictly contained within the referenced block.
 947    :    //
 948    :    // @returns true if valid, false otherwise.
 949    :    bool IsValid() const;
 950    :  
 951  E :    bool operator==(const Reference& other) const {
 952    :      return type_ == other.type_ &&
 953    :          size_ == other.size_ &&
 954    :          referenced_ == other.referenced_ &&
 955    :          offset_ == other.offset_ &&
 956  E :          base_ == other.base_;
 957  E :    }
 958    :  
 959    :    // The maximum size that a reference may have. This needs to be kept in sync
 960    :    // with the expectations of IsValid().
 961    :    static const size_t kMaximumSize = 4;
 962    :  
 963    :    // Returns true if the given reference type and size combination is valid.
 964    :    static bool IsValidTypeSize(ReferenceType type, Size size);
 965    :  
 966    :   private:
 967    :    // Type of this reference.
 968    :    ReferenceType type_;
 969    :  
 970    :    // Size of this reference.
 971    :    // Absolute references are always pointer wide, but PC-relative
 972    :    // references can be 1, 2 or 4 bytes wide, which affects their range.
 973    :    Size size_;
 974    :  
 975    :    // The block referenced.
 976    :    Block* referenced_;
 977    :  
 978    :    // Offset into the referenced block.
 979    :    Offset offset_;
 980    :  
 981    :    // The base of the reference, as in offset in the block. This must be a
 982    :    // location strictly within the block.
 983    :    Offset base_;
 984    :  };
 985    :  
 986    :  // Commonly used container types.
 987    :  typedef std::vector<BlockGraph::Block*> BlockVector;
 988    :  typedef std::vector<const BlockGraph::Block*> ConstBlockVector;
 989    :  
 990    :  }  // namespace block_graph
 991    :  
 992    :  #endif  // SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_

Coverage information generated Thu Jul 04 09:34:53 2013.