1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A block graph is a an abstract graph of blocks, each of which has an ID,
16 : // a type, a size and a few other properties. Each block represents either code
17 : // or data, and blocks can reference one another through references of various
18 : // types.
19 : //
20 : // The BlockGraph also stores minimum knowledge of sections (names and
21 : // characteristics), and each block belongs to at most one section. In this
22 : // sense, a BlockGraph acts as top-level division of blocks.
23 :
24 : #ifndef SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
25 : #define SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
26 :
27 : #include <hash_map>
28 : #include <map>
29 : #include <set>
30 : #include <string>
31 : #include <vector>
32 :
33 : #include "base/basictypes.h"
34 : #include "base/string_piece.h"
35 : #include "syzygy/common/align.h"
36 : #include "syzygy/core/address.h"
37 : #include "syzygy/core/address_space.h"
38 :
39 : namespace block_graph {
40 :
41 : // Forward declaration.
42 : class BlockGraphSerializer;
43 :
44 : // The invalid address can never occur in an graph, it's used as default
45 : // value for block addresses.
46 : extern const core::RelativeAddress kInvalidAddress;
47 :
48 : #define BLOCK_ATTRIBUTE_ENUM(F) \
49 : /* Set for functions declared non-returning. */ \
50 : F(NON_RETURN_FUNCTION, 0) \
51 : /* Set for blocks that are inferred by the decomposer. */ \
52 : F(GAP_BLOCK, 1) \
53 : /* Set for blocks that are parsed by the PEFileParser. These */ \
54 : /* blocks are unmovable, indivisible, etc, and have to be treated */ \
55 : /* specially. */ \
56 : F(PE_PARSED, 2) \
57 : /* Set for blocks that are created from section contribution */ \
58 : /* information. */ \
59 : F(SECTION_CONTRIB, 3) \
60 : /* This is used to indicate that a block consists purely of padding */ \
61 : /* data. */ \
62 : F(PADDING_BLOCK, 4) \
63 : /* Indicates blocks that contain inline assembly. */ \
64 : F(HAS_INLINE_ASSEMBLY, 5) \
65 : /* Indicates that the block was built by a compiler whose precise */ \
66 : /* behaviour and semantics we are unfamiliar with. */ \
67 : F(BUILT_BY_UNSUPPORTED_COMPILER, 6) \
68 : /* Indicates that the block has been built by the Syzygy toolchain, and */ \
69 : /* thus is inherently safe for basic-block decomposition without having */ \
70 : /* to perform the myriad of safety checks we do otherwise. */ \
71 : F(BUILT_BY_SYZYGY, 7) \
72 : /* This is set for blocks whose initial disassembly was incomplete. */ \
73 : /* This is not necessarily an error, as we see have seen blocks with */ \
74 : /* unreachable code, even in release mode. */ \
75 : F(INCOMPLETE_DISASSEMBLY, 8) \
76 : /* This is set for blocks whose disassembly was unable to finish due to */ \
77 : /* an error. This block has violated assumptions that we make or */ \
78 : /* conventions that we have observed the compiler to use. It is not safe */\
79 : /* for basic block disassembly. */ \
80 : F(ERRORED_DISASSEMBLY, 9) \
81 : /* This is set for functions that have exception handling enabled. */ \
82 : /* Without delving far deeper into the specifics, it is unsafe to basic */ \
83 : /* block decompose these blocks. */ \
84 : F(HAS_EXCEPTION_HANDLING, 10) \
85 : /* This is set for blocks whose disassembly went off the end of the */ \
86 : /* block, or into data. These blocks have control flow that we are not */ \
87 : /* aware of, or are otherwise malformed. */ \
88 : F(DISASSEMBLED_PAST_END, 11) \
89 : /* This always needs to be set to the next available attribute bit. */ \
90 : F(BLOCK_ATTRIBUTES_MAX, 12)
91 :
92 :
93 : // The BlockGraph is a top-level container for Blocks.
94 : class BlockGraph {
95 : public:
96 : typedef core::RelativeAddress RelativeAddress;
97 :
98 : typedef size_t SectionId;
99 : typedef size_t BlockId;
100 : typedef size_t Size;
101 : typedef ptrdiff_t Offset;
102 : typedef uint32 BlockAttributes;
103 : typedef uint32 LabelAttributes;
104 :
105 : // The BlockGraph maintains a list of sections, and each block belongs
106 : // to one of them. This is the set of information we keep regarding them.
107 : struct Section;
108 : // The section map contains all sections, indexed by id.
109 : typedef std::map<SectionId, Section> SectionMap;
110 :
111 : static const SectionId kInvalidSectionId;
112 :
113 : enum BlockAttributeEnum {
114 : #define DECLARE_ENUM(name, bit) name = (1 << bit),
115 : BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM)
116 : #undef DECLARE_ENUM
117 : };
118 :
119 : // Returns a string containing the names of the supplied attributes.
120 : static std::string BlockAttributesToString(BlockAttributes attrs);
121 :
122 : enum BlockType {
123 : CODE_BLOCK,
124 : DATA_BLOCK,
125 :
126 : // NOTE: This must always be last, and kBlockType must be kept in sync
127 : // with this enum.
128 : BLOCK_TYPE_MAX
129 : };
130 :
131 : static const char* BlockTypeToString(BlockType type);
132 :
133 : // Label attributes. Attributes of the form _END_LABEL type actually
134 : // point to the first byte past the range they delineate. To make the
135 : // semantics of moving labels easier, we shift these labels left by one and
136 : // make them follow the last byte of the delineated range.
137 : enum LabelAttributesEnum {
138 : // The label points to an entry-point in a code block.
139 : CODE_LABEL = (1 << 0),
140 :
141 : // Mark the start and end of the debuggable portion of a code block.
142 : DEBUG_START_LABEL = (1 << 1),
143 : DEBUG_END_LABEL = (1 << 2),
144 :
145 : // Mark the start and end of an embedded scope in a code block.
146 : SCOPE_START_LABEL = (1 << 3),
147 : SCOPE_END_LABEL = (1 << 4),
148 :
149 : // Marks the location of a (virtual table?) call.
150 : CALL_SITE_LABEL = (1 << 5),
151 :
152 : // The label points to the start of a jump table. The length is inferred
153 : // by the location of the next label, or the end of the block. This will
154 : // also have DATA_LABEL set.
155 : JUMP_TABLE_LABEL = (1 << 6),
156 : // The label points to the start of a case table. The length is inferred
157 : // by the location of the next label, or the end of the block. This will
158 : // also have DATA_LABEL set.
159 : CASE_TABLE_LABEL = (1 << 7),
160 : // The label originated from a data symbol. The length is inferred by the
161 : // location of the next label, or the end of the block. The type of data
162 : // is unknown.
163 : DATA_LABEL = (1 << 8),
164 :
165 : // This always needs to be the most significant bit.
166 : LABEL_ATTRIBUTES_MAX = (1 << 9),
167 : };
168 :
169 : static std::string LabelAttributesToString(LabelAttributes label_attributes);
170 :
171 : enum ReferenceType {
172 : PC_RELATIVE_REF,
173 : ABSOLUTE_REF,
174 : RELATIVE_REF,
175 : FILE_OFFSET_REF,
176 : // Must be last!
177 : REFERENCE_TYPE_MAX,
178 : };
179 :
180 : // Forward declarations.
181 : class AddressSpace;
182 : class Block;
183 : class Label;
184 : class Reference;
185 :
186 : // The block map contains all blocks, indexed by id.
187 : typedef std::map<BlockId, Block> BlockMap;
188 :
189 : BlockGraph();
190 : ~BlockGraph();
191 :
192 : // Adds a section with the given name.
193 : //
194 : // @param name The section name.
195 : // @param characteristics The section characteristics.
196 : // @returns the newly created section.
197 : Section* AddSection(const base::StringPiece& name, uint32 characteristics);
198 :
199 : // Finds a section with the given name, returning NULL if no such section
200 : // exists.
201 : //
202 : // @param name The section name.
203 : // @returns the section if one is found, NULL otherwise.
204 : Section* FindSection(const base::StringPiece& name);
205 :
206 : // Find or adds section with the given name.
207 : //
208 : // If a section with the given name already exists, updates its
209 : // characteristics and returns it. Otherwise, creates a new section and
210 : // returns it. If multiple sections exist with the given name, the first
211 : // one encountered is returned.
212 : //
213 : // TODO(chrisha): The semantics of this function are a little odd. It would
214 : // make more sense for it to return only if a section with matching name
215 : // AND characteristics is found, otherwise to create a new one.
216 : //
217 : // @param name The section name.
218 : // @param characteristics The section characteristics.
219 : // @returns the new or found section.
220 : Section* FindOrAddSection(const base::StringPiece& name,
221 : uint32 characteristics);
222 :
223 : // Removes the given section from the BlockGraph.
224 : //
225 : // The section must belong to this block graph. Be aware that this can leave
226 : // Blocks with dangling section_ids.
227 : //
228 : // @param section The section to remove.
229 : // @returns true on success, false otherwise.
230 : bool RemoveSection(Section* section);
231 :
232 : // Removes the section with the given id from the BlockGraph.
233 : //
234 : // @param section The id of the section to remove.
235 : // @returns true on success, false otherwise.
236 : bool RemoveSectionById(SectionId id);
237 :
238 : // Add @p block of type @p type and @p size and
239 : // return the new block.
240 : // @returns the new block.
241 : Block* AddBlock(BlockType type, Size size, const base::StringPiece& name);
242 :
243 : // Deletes the given block from the BlockGraph. The block must belong to this
244 : // block graph, and have no references or referrers. Returns true on success,
245 : // false otherwise. On failure, the BlockGraph has not been changed.
246 : bool RemoveBlock(Block* block);
247 :
248 : // Deletes the block with the given @p id from the block graph. The block id
249 : // must be valid, and the block must have no references or referrers. Returns
250 : // true on success, false otherwise. On failure, the BlockGraph has not been
251 : // changed.
252 : bool RemoveBlockById(BlockId id);
253 :
254 : // Accessors.
255 E : const SectionMap& sections() const { return sections_; }
256 E : SectionMap& sections_mutable() { return sections_; }
257 E : const BlockMap& blocks() const { return blocks_; }
258 E : BlockMap& blocks_mutable() { return blocks_; }
259 :
260 : // @{
261 : // Retrieve the section with the given id.
262 : //
263 : // @param id The id of the section to retrieve.
264 : // @returns the section in question or NULL if no such section.
265 : Section* GetSectionById(SectionId id);
266 : const Section* GetSectionById(SectionId id) const;
267 : // @}
268 :
269 : // @{
270 : // Retrieve the block with id.
271 : // @returns the block in question or NULL if no such block.
272 : Block* GetBlockById(BlockId id);
273 : const Block* GetBlockById(BlockId id) const;
274 : // @}
275 :
276 : private:
277 : // Give BlockGraphSerializer access to our innards for serialization.
278 : friend BlockGraphSerializer;
279 :
280 : // Removes a block by the iterator to it. The iterator must be valid.
281 : bool RemoveBlockByIterator(BlockMap::iterator it);
282 :
283 : // All sections we contain.
284 : SectionMap sections_;
285 :
286 : // Our section ID allocator.
287 : SectionId next_section_id_;
288 :
289 : // All blocks we contain.
290 : BlockMap blocks_;
291 :
292 : // Our block ID allocator.
293 : BlockId next_block_id_;
294 : };
295 :
296 : // The BlockGraph maintains a list of sections, and each block belongs
297 : // to one of them. This is the set of information we keep regarding them.
298 : struct BlockGraph::Section {
299 : // Default constructor. Required for serialization.
300 E : Section() : id_(kInvalidSectionId), characteristics_(0) {
301 E : }
302 :
303 : // Full constructor.
304 : //
305 : // @param id The section id. This must not be kInvalidSectionId.
306 : // @param name The name of the section. Must not be empty or NULL.
307 : // @param characteristics The characteristics of the section.
308 E : Section(SectionId id, const base::StringPiece& name, uint32 characteristics)
309 : : id_(id), name_(), characteristics_(characteristics) {
310 E : DCHECK_NE(kInvalidSectionId, id);
311 E : DCHECK(name != NULL);
312 E : name.CopyToString(&name_);
313 E : DCHECK(!name_.empty());
314 E : }
315 :
316 : // Get the id of this section.
317 : //
318 : // @returns the id of the section.
319 E : SectionId id() const { return id_; }
320 :
321 : // Get the name of this section.
322 : //
323 : // @returns the section name.
324 E : const std::string& name() const { return name_; }
325 :
326 : // Sets the name for this section.
327 : //
328 : // @param name The name of the section. If NULL or empty, this will fail.
329 : // @returns true if the name is set, false otherwise.
330 : bool set_name(const base::StringPiece& name);
331 :
332 : // Get the characteristics of this section.
333 : //
334 : // @returns the section characteristics.
335 E : uint32 characteristics() const { return characteristics_; }
336 :
337 : // Sets the characteristics for this section.
338 : //
339 : // @param characteristics The new characteristics to set.
340 E : void set_characteristics(uint32 characteristics) {
341 E : characteristics_ = characteristics;
342 E : }
343 :
344 : // Sets a one or more additional characteristics for this section.
345 : //
346 : // @param characteristic The new characteristic(s) to set for this section.
347 E : void set_characteristic(uint32 characteristic) {
348 E : characteristics_ |= characteristic;
349 E : }
350 :
351 : // Clears one or more characteristics for this section.
352 : //
353 : // @param characteristic The characteristic(s) to clear for this section.
354 E : void clear_characteristic(uint32 characteristic) {
355 E : characteristics_ &= ~characteristic;
356 E : }
357 :
358 : // @name Serialization functions.
359 : // @{
360 : bool Save(core::OutArchive* out_archive) const;
361 : bool Load(core::InArchive* in_archive);
362 : // @}
363 :
364 : // A simple comparison operator for serialization tests.
365 E : bool operator==(const Section& other) const {
366 : return id_ == other.id_ && name_ == other.name_ &&
367 E : characteristics_ == other.characteristics_;
368 E : }
369 :
370 : // A not-equal comparison operator.
371 E : bool operator!=(const Section& other) const {
372 E : return !operator==(other);
373 E : }
374 :
375 : private:
376 : // The id of the section. This has no particular meaning other than as a way
377 : // to identify sections uniquely.
378 : SectionId id_;
379 : // The name of the section. This will be truncated to a max of 8 characters
380 : // on output.
381 : std::string name_;
382 : // The section characteristics, a bitmask of IMAGE_SCN_* values.
383 : uint32 characteristics_;
384 : };
385 :
386 : // A label denotes the beginning (or end) of a sub-region within a (code)
387 : // block. In particular, a code label represents an instruction boundary
388 : // at which disassembly can begin and a data label represents the beginning
389 : // of embedded data.
390 : class BlockGraph::Label {
391 : public:
392 : // Default constructor.
393 E : Label() : attributes_(0) {
394 E : }
395 :
396 : // Full constructor.
397 E : Label(const base::StringPiece& name, LabelAttributes attributes)
398 : : name_(name.begin(), name.end()), attributes_(attributes) {
399 E : }
400 :
401 : // @name Accessors.
402 : // @{
403 E : const std::string& name() const { return name_; }
404 : // @}
405 :
406 : // A helper function for logging and debugging.
407 : std::string ToString() const;
408 :
409 : // Equality comparator for unittesting.
410 E : bool operator==(const Label& other) const {
411 E : return name_ == other.name_ && attributes_ == other.attributes_;
412 E : }
413 :
414 : // The label attributes are a bitmask. You can set them wholesale,
415 : // or set and clear them individually by bitmasking.
416 E : LabelAttributes attributes() const { return attributes_; }
417 E : void set_attributes(LabelAttributes attributes) { attributes_ = attributes; }
418 :
419 : // Set or clear one or more attributes.
420 E : void set_attribute(LabelAttributes attribute) { attributes_ |= attribute; }
421 E : void clear_attribute(LabelAttributes attribute) { attributes_ &= ~attribute; }
422 :
423 : // Determines if all or any of the given attributes are set.
424 E : bool has_attributes(LabelAttributes attributes) const {
425 E : return (attributes_ & attributes) == attributes;
426 E : }
427 E : bool has_any_attributes(LabelAttributes attributes) const {
428 E : return (attributes_ & attributes) != 0;
429 E : }
430 :
431 : // @returns true if this label is valid, false otherwise.
432 : bool IsValid() const;
433 :
434 : // Tests a set of label attributes for validity.
435 : // @param attributes the attributes to test.
436 : // @returns true if the provided attributes are valid, false otherwise.
437 : static bool AreValidAttributes(LabelAttributes attributes);
438 :
439 : private:
440 : // The name by which this label is known.
441 : std::string name_;
442 :
443 : // The disposition of the bytes found at this label.
444 : LabelAttributes attributes_;
445 : };
446 :
447 :
448 : // A block represents a block of either code or data.
449 : //
450 : // Since blocks may be split and up and glued together in arbitrary ways, each
451 : // block maintains an address-space over its data, associating ranges of block
452 : // data to ranges of bytes in the original image. This effectively encodes OMAP
453 : // data, allowing the PDB file to be updated.
454 : //
455 : // Each block also stores references to other blocks in the graph, their
456 : // relative location within the block and their type and size.
457 : //
458 : // Each block has a set of attributes, including a size, a name and a
459 : // "current" address. Most of those attributes are mutable, and are set in the
460 : // process of creating and manipulating images and graph address spaces.
461 : class BlockGraph::Block {
462 : public:
463 : // Set of the blocks that have a reference to this block.
464 : // This is keyed on block and source offset (not destination offset),
465 : // to allow one to easily locate and remove the backreferences on change or
466 : // deletion.
467 : typedef std::pair<Block*, Offset> Referrer;
468 : typedef std::set<Referrer> ReferrerSet;
469 :
470 : // Map of references that this block makes to other blocks.
471 : typedef std::map<Offset, Reference> ReferenceMap;
472 :
473 : // Represents a range of data in this block.
474 : typedef core::AddressRange<Offset, Size> DataRange;
475 :
476 : // Represents a range of data in the original image.
477 : typedef core::AddressRange<RelativeAddress, Size> SourceRange;
478 :
479 : // A map between bytes in this block and bytes in the original image.
480 : typedef core::AddressRangeMap<DataRange, SourceRange> SourceRanges;
481 :
482 : // Typed labels associated with various offsets in the block. Some of these
483 : // labels (of type CODE_LABEL) represent code start points for disassembly
484 : // while others (of type DATA_LABEL) represent the start of embedded data
485 : // within the block. Note that, while possible, it is NOT guaranteed that
486 : // all basic blocks are marked with a label. Basic block decomposition should
487 : // disassemble from the code labels to discover all basic blocks.
488 : typedef std::map<Offset, Label> LabelMap;
489 :
490 : // Blocks need to be default constructible for serialization.
491 : Block();
492 :
493 : Block(BlockId id,
494 : BlockType type,
495 : Size size,
496 : const base::StringPiece& name);
497 : ~Block();
498 :
499 : // Accessors.
500 E : BlockId id() const { return id_; }
501 E : BlockType type() const { return type_; }
502 E : void set_type(BlockType type) { type_ = type; }
503 :
504 E : Size size() const { return size_; }
505 :
506 : // Set the total size of the block. Note that allocated data_size_ must
507 : // always be less than or equal to the total size.
508 E : void set_size(Size size) {
509 E : DCHECK_LE(data_size_, size);
510 E : size_ = size;
511 E : }
512 :
513 E : const std::string& name() const { return name_; }
514 E : void set_name(const base::StringPiece& name) { name.CopyToString(&name_); }
515 :
516 E : Size alignment() const { return alignment_; }
517 E : void set_alignment(Size alignment) {
518 : // Ensure that alignment is a non-zero power of two.
519 E : DCHECK(common::IsPowerOfTwo(alignment));
520 E : alignment_ = alignment;
521 E : }
522 :
523 : // The address of the block is set any time the block is assigned
524 : // an address in an address space.
525 E : RelativeAddress addr() const { return addr_; }
526 E : void set_addr(RelativeAddress addr) { return addr_ = addr; }
527 :
528 : // The section ID for the block. These IDs are wrt to the SectionMap in the
529 : // parent BlockGraph.
530 E : SectionId section() const { return section_; }
531 E : void set_section(SectionId section) { section_ = section; }
532 :
533 : // The block attributes are a bitmask. You can set them wholesale,
534 : // or set and clear them individually by bitmasking.
535 E : BlockAttributes attributes() const { return attributes_; }
536 E : void set_attributes(BlockAttributes attributes) { attributes_ = attributes; }
537 :
538 : // Set or clear one or more attributes.
539 E : void set_attribute(BlockAttributes attribute) { attributes_ |= attribute; }
540 E : void clear_attribute(BlockAttributes attribute) {
541 E : attributes_ &= ~attribute;
542 E : }
543 :
544 : // This is true iff data_ is in the ownership of the block.
545 : // Iff true, the block will delete [] data_ on destruction or when
546 : // data is overwritten.
547 E : bool owns_data() const { return owns_data_; }
548 :
549 : // Makes room for the given amount of data at the given offset. This is
550 : // special in that it will patch up any labels, source ranges and referrers
551 : // that land beyond the newly created data, shifting them to the right by
552 : // @p size. If the data for this block is actually allocated it will also
553 : // patch up the allocated data by zeroing the newly allocate range of data,
554 : // and shifting the tail by @p size. If the new data is strictly implicit
555 : // (offset > data_size), then the allocated data is not affected in any way
556 : // unless @p always_allocate_data is true.
557 : //
558 : // @param offset the offset at which to insert the new data.
559 : // @param size the size of the new data to be inserted.
560 : // @param always_allocate_data if true, then data_size will be grown if
561 : // necessary to ensure that the newly created data can be written.
562 : // @pre 0 <= offset <= size()
563 : void InsertData(Offset offset, Size size, bool always_allocate_data);
564 :
565 : // Removes the data in the given range. This will refuse to remove labels,
566 : // references and referrers that land in the range, and will fail if any
567 : // exist. It will also shift any labels, references and referrers that land
568 : // beyond the end of the removed range. Source ranges will also be fixed. If
569 : // the removed range lies within the initialized data then the data will also
570 : // be truncated/shifted as necessary.
571 : //
572 : // @param offset the offset at which to remove data.
573 : // @param size the size of the data to remove, in bytes.
574 : // @returns true on success, false otherwise.
575 : // @pre 0 <= offset <= size
576 : bool RemoveData(Offset offset, Size size);
577 :
578 : // Performs an inline resize of data in a BlockGraph. If the data is shrinking
579 : // this equates to a RemoveData operation. If it is growing it equates to an
580 : // InsertData operation.
581 : //
582 : // @param offset the offset of the data to resize.
583 : // @param current_size the current size of the data to resize.
584 : // @param new_size the desired size of the data.
585 : // @param always_allocate_data if true, then data_size will be grown if
586 : // necessary to ensure that the resized data can be written.
587 : // @returns true on success, false otherwise.
588 : // @pre 0 <= offset <= size
589 : bool InsertOrRemoveData(Offset offset, Size current_size, Size new_size,
590 : bool always_allocate_data);
591 :
592 : // Set the data the block refers to.
593 : // @param data NULL or the data this block refers to.
594 : // The underlying data must outlive this block.
595 : // @param data_size the size of data, or zero if data == NULL.
596 : // @pre data_size <= size().
597 : void SetData(const uint8* data, size_t data_size);
598 :
599 : // Allocates and returns a new data buffer of the given size. The returned
600 : // data will have been initialized to zero.
601 : // @pre data_size <= size().
602 : uint8* AllocateData(size_t data_size);
603 :
604 : // Makes a copy of data, returns a pointer to the copy.
605 : // @pre data_size <= size().
606 : uint8* CopyData(size_t data_size, const void* data);
607 :
608 : // Resizes data to new_size by truncating or zero-extending the current data.
609 : // @pre new_size <= size().
610 : const uint8* ResizeData(size_t new_size);
611 :
612 : // Returns a mutable copy of the block's data. If the block doesn't own
613 : // the data on entry, it'll be copied and the copy returned to the caller.
614 : uint8* GetMutableData();
615 :
616 : // The data bytes the block refers to.
617 E : const uint8* data() const { return data_; }
618 :
619 : // The data size may be smaller than the block size (see size()),
620 : // when the block e.g. refers to data that's all or part
621 : // zero-initialized by the linker/loader.
622 E : size_t data_size() const { return data_size_; }
623 :
624 E : const ReferenceMap& references() const { return references_; }
625 E : const ReferrerSet& referrers() const { return referrers_; }
626 E : const SourceRanges& source_ranges() const { return source_ranges_; }
627 E : SourceRanges& source_ranges() { return source_ranges_; }
628 E : const LabelMap& labels() const { return labels_; }
629 :
630 : // Returns true if there are any other bocks holding a reference to this one.
631 : bool HasExternalReferrers() const;
632 :
633 : // Set the reference at @p offset to @p ref.
634 : // If there's a pre-existing reference at @p offset, this overrides it.
635 : // @param offset offset of the reference into this block.
636 : // @param ref the reference to add.
637 : // @returns true iff this inserts a new reference.
638 : bool SetReference(Offset offset, const Reference& ref);
639 :
640 : // Retrieve the reference at @p offset if one exists.
641 : // @param reference on success returns the reference @p offset.
642 : // @returns true iff there was a reference at @p offset.
643 : bool GetReference(Offset offset, Reference* reference) const;
644 :
645 : // Remove the reference at @p offset.
646 : // @returns true iff there was a reference at @p offset.
647 : bool RemoveReference(Offset offset);
648 :
649 : // Remove all references from this block. This is handy when removing a block
650 : // from the block graph.
651 : bool RemoveAllReferences();
652 :
653 : // Set a label to @p offset.
654 : // A label in code marks the location of the start of an instruction -
655 : // e.g. a location where disassembly can usefully commence. Labels
656 : // appear to be inserted by the VS tool chain where e.g. a switch
657 : // statement is implemented with a jump table, to note the location
658 : // of the jump destinations.
659 : // @param offset the offset of the label to set.
660 : // @param name the name of the label.
661 : // @param attributes the attributes of the label.
662 : // @returns true iff a new label is inserted.
663 : // @note that only one label can exist at each offset, and the first
664 : // label set at any offset will stay there.
665 : // @{
666 : bool SetLabel(Offset offset, const Label& label);
667 : bool SetLabel(Offset offset,
668 : const base::StringPiece& name,
669 E : LabelAttributes attributes) {
670 E : return SetLabel(offset, Label(name, attributes));
671 E : }
672 : // @}
673 :
674 : // Gets the label at the given @p offset.
675 : // @param offset the offset of the label to get.
676 : // @param label the string to receive the label.
677 : // @return true if the label exists, false otherwise.
678 : bool GetLabel(Offset offset, Label* label) const;
679 :
680 : // Removes the label at the given @p offset.
681 : // @param offset the offset of the label to remove.
682 : // @return true if the label existed and was removed, false it it did not
683 : // exist.
684 : bool RemoveLabel(Offset offset);
685 :
686 : // Returns true iff the block has a label at @p offset.
687 : // @param offset the offset of the label to search for.
688 : bool HasLabel(Offset offset);
689 :
690 : // Change all references to this block to refer to @p new_block instead,
691 : // while offsetting each reference by @p offset.
692 : // @note this fails if any of the transferred references end up with offsets
693 : // less than zero, or greater than new_block->size().
694 : // @returns true iff all references were transferred successfully.
695 : bool TransferReferrers(Offset offset, Block* new_block);
696 :
697 : // Returns true if this block contains the given range of bytes.
698 : bool Contains(RelativeAddress address, size_t size) const;
699 :
700 : protected:
701 : // Give BlockGraph access to our innards for serialization.
702 : friend class BlockGraph;
703 : // Give BlockGraphSerializer access to our innards for serialization.
704 : friend class BlockGraphSerializer;
705 :
706 : // Allocates and returns a new data buffer of the given size. The returned
707 : // data buffer will not have been initialized in any way.
708 : uint8* AllocateRawData(size_t size);
709 :
710 : BlockId id_;
711 : BlockType type_;
712 : Size size_;
713 : Size alignment_;
714 : std::string name_;
715 : RelativeAddress addr_;
716 :
717 : SectionId section_;
718 : BlockAttributes attributes_;
719 :
720 : ReferenceMap references_;
721 : ReferrerSet referrers_;
722 : SourceRanges source_ranges_;
723 : LabelMap labels_;
724 :
725 : // True iff data_ is ours to deallocate with delete [].
726 : // If this is false, data_ must be guaranteed to outlive the block.
727 : bool owns_data_;
728 : // A pointer to the code or data we represent.
729 : const uint8* data_;
730 : // Size of the above.
731 : size_t data_size_;
732 : };
733 :
734 : // A graph address space endows a graph with a non-overlapping ordering
735 : // on blocks, where each block occupies zero or one address ranges in the
736 : // address space. No two blocks may overlap in an address space.
737 : class BlockGraph::AddressSpace {
738 : public:
739 : typedef core::AddressSpace<RelativeAddress, BlockGraph::Size, Block*>
740 : AddressSpaceImpl;
741 : typedef AddressSpaceImpl::Range Range;
742 : typedef AddressSpaceImpl::RangeMap RangeMap;
743 : typedef AddressSpaceImpl::RangeMapIter RangeMapIter;
744 : typedef AddressSpaceImpl::RangeMapConstIter RangeMapConstIter;
745 : typedef AddressSpaceImpl::RangeMapIterPair RangeMapIterPair;
746 : typedef AddressSpaceImpl::RangeMapConstIterPair RangeMapConstIterPair;
747 :
748 : // Constructs a new empty address space.
749 : // @p start to @p start + @p size on @p graph.
750 : explicit AddressSpace(BlockGraph* graph);
751 :
752 : // Add a block of type @p type and @p size at @p address to our associated
753 : // graph, and return the new block.
754 : // @returns the new block, or NULL if the new block would overlap
755 : // an existing block.
756 : Block* AddBlock(BlockType type,
757 : RelativeAddress addr,
758 : Size size,
759 : const base::StringPiece& name);
760 :
761 : // Merges all blocks that intersect @p range into a single block.
762 : // Moves labels and references from the intersecting blocks to the
763 : // merged block, and changes referring blocks to refer to the new,
764 : // merged block. Removes the original blocks from the BlockGraph.
765 : // @returns the new, merged block if there was at least one intersecting
766 : // block in @p range, or NULL otherwise.
767 : Block* MergeIntersectingBlocks(const Range& range);
768 :
769 : // Insert existing block @p block at @p address.
770 : // @returns true on success, or false if the @p block would overlap
771 : // an existing block.
772 : bool InsertBlock(RelativeAddress addr, Block* block);
773 :
774 : // Returns a pointer to the block containing address, or NULL
775 : // if no block contains address.
776 : Block* GetBlockByAddress(RelativeAddress addr) const;
777 :
778 : // Returns a pointer to the block containing the address range
779 : // [address, address + size), or NULL if no block contains that
780 : // range.
781 : Block* GetContainingBlock(RelativeAddress addr, Size size) const;
782 :
783 : // Finds the first block, if any that intersects
784 : // [@p address, @p address + @p size).
785 : Block* GetFirstIntersectingBlock(RelativeAddress address, Size size);
786 :
787 : // Check whether the address space contains @p block.
788 : // @param block the block in question.
789 : // @returns true if the block is in the address space, false otherwise.
790 : bool ContainsBlock(const Block* block);
791 :
792 : // Locates all blocks that intersect [@p address, @p address + @p size).
793 : // @returns a pair of iterators that iterate over the found blocks.
794 : RangeMapConstIterPair GetIntersectingBlocks(RelativeAddress address,
795 : Size size) const;
796 : RangeMapIterPair GetIntersectingBlocks(RelativeAddress address, Size size);
797 :
798 : // Retrieve the address off @p block.
799 : // @param block the block in question.
800 : // @param addr on success, returns the address of @p block in this
801 : // address space.
802 : // @returns true on success, false if @p block is not in this
803 : // address space.
804 : bool GetAddressOf(const Block* block, RelativeAddress* addr) const;
805 :
806 : // Accessor.
807 E : BlockGraph* graph() { return graph_; }
808 E : const BlockGraph* graph() const { return graph_; }
809 :
810 E : RangeMapConstIter begin() const {
811 E : return address_space_.ranges().begin();
812 E : }
813 :
814 E : RangeMapConstIter end() const {
815 E : return address_space_.ranges().end();
816 E : }
817 :
818 E : size_t size() const {
819 E : return address_space_.ranges().size();
820 E : }
821 :
822 E : const AddressSpaceImpl& address_space_impl() const {
823 E : return address_space_;
824 E : }
825 :
826 : private:
827 : bool InsertImpl(RelativeAddress addr, Block* block);
828 :
829 : typedef stdext::hash_map<const Block*, RelativeAddress> BlockAddressMap;
830 :
831 : AddressSpaceImpl address_space_;
832 : BlockAddressMap block_addresses_;
833 : BlockGraph* graph_;
834 : };
835 :
836 : // Represents a reference from one block to another. References may be offset.
837 : // That is, they may refer to an object at a given location, but actually point
838 : // to a location that is some fixed distance away from that object. This allows,
839 : // for example, non-zero based indexing into a table. The object that is
840 : // intended to be dereferenced is called the 'base' of the offset.
841 : //
842 : // BlockGraph references are from a location (offset) in one block, to some
843 : // location in another block. The referenced block itself plays the role of the
844 : // 'base' of the reference, with the offset of the reference being stored as
845 : // an integer from the beginning of the block. However, basic block
846 : // decomposition requires breaking the block into smaller pieces and thus we
847 : // need to carry around an explicit base value, indicating which byte in the
848 : // block is intended to be referenced.
849 : //
850 : // A direct reference to a location will have the same value for 'base' and
851 : // 'offset'.
852 : //
853 : // Here is an example:
854 : //
855 : // /----------\
856 : // +---------------------------+
857 : // O | B | <--- Referenced block
858 : // +---------------------------+ B = base
859 : // \-----/ O = offset
860 : //
861 : class BlockGraph::Reference {
862 : public:
863 : Reference() :
864 E : type_(RELATIVE_REF), size_(0), referenced_(NULL), offset_(0), base_(0) {
865 E : }
866 :
867 : // @param type type of reference.
868 : // @param size size of reference.
869 : // @param referenced the referenced block.
870 : // @param offset offset from the beginning of the block of the location to be
871 : // explicitly referred to.
872 : // @param base offset into the block of the location actually being
873 : // referenced. This must be strictly within @p referenced.
874 : Reference(ReferenceType type,
875 : Size size,
876 : Block* referenced,
877 : Offset offset,
878 : Offset base)
879 : : type_(type),
880 : size_(size),
881 : referenced_(referenced),
882 : offset_(offset),
883 E : base_(base) {
884 E : DCHECK(IsValid());
885 E : }
886 :
887 : // Copy constructor.
888 : Reference(const Reference& other)
889 : : type_(other.type_),
890 : size_(other.size_),
891 : referenced_(other.referenced_),
892 : offset_(other.offset_),
893 E : base_(other.base_) {
894 E : }
895 :
896 : // Accessors.
897 E : ReferenceType type() const { return type_; }
898 E : Size size() const { return size_; }
899 E : Block* referenced() const { return referenced_; }
900 E : Offset offset() const { return offset_; }
901 E : Offset base() const { return base_; }
902 :
903 : // Determines if this is a direct reference. That is, if the actual location
904 : // being referenced (offset) and the intended location being referenced (base)
905 : // are the same.
906 : //
907 : // @returns true if the reference is direct, false otherwise.
908 E : bool IsDirect() const { return base_ == offset_; }
909 :
910 : // Determines if this is a valid reference, by imposing size constraints on
911 : // reference types, and determining if the base address of the reference is
912 : // strictly contained within the referenced block.
913 : //
914 : // @returns true if valid, false otherwise.
915 : bool IsValid() const;
916 :
917 E : bool operator==(const Reference& other) const {
918 : return type_ == other.type_ &&
919 : size_ == other.size_ &&
920 : referenced_ == other.referenced_ &&
921 : offset_ == other.offset_ &&
922 E : base_ == other.base_;
923 E : }
924 :
925 : // The maximum size that a reference may have. This needs to be kept in sync
926 : // with the expectations of IsValid().
927 : static const size_t kMaximumSize = 4;
928 :
929 : // Returns true if the given reference type and size combination is valid.
930 : static bool IsValidTypeSize(ReferenceType type, Size size);
931 :
932 : private:
933 : // Type of this reference.
934 : ReferenceType type_;
935 :
936 : // Size of this reference.
937 : // Absolute references are always pointer wide, but PC-relative
938 : // references can be 1, 2 or 4 bytes wide, which affects their range.
939 : Size size_;
940 :
941 : // The block referenced.
942 : Block* referenced_;
943 :
944 : // Offset into the referenced block.
945 : Offset offset_;
946 :
947 : // The base of the reference, as in offset in the block. This must be a
948 : // location strictly within the block.
949 : Offset base_;
950 : };
951 :
952 : // Commonly used container types.
953 : typedef std::vector<BlockGraph::Block*> BlockVector;
954 : typedef std::vector<const BlockGraph::Block*> ConstBlockVector;
955 :
956 : } // namespace block_graph
957 :
958 : #endif // SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
|