1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A block graph is an abstract graph of blocks, each of which has an ID, a
16 : // type, a size and a few other properties. Each block represents either code or
17 : // data, and blocks can reference one another through references of various
18 : // types.
19 : //
20 : // The BlockGraph also stores minimum knowledge of sections (names and
21 : // characteristics), and each block belongs to at most one section. In this
22 : // sense, a BlockGraph acts as top-level division of blocks.
23 :
24 : #ifndef SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
25 : #define SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
26 :
27 : #include <hash_map>
28 : #include <map>
29 : #include <set>
30 : #include <string>
31 : #include <vector>
32 :
33 : #include "base/basictypes.h"
34 : #include "base/files/file_util.h"
35 : #include "base/strings/string_piece.h"
36 : #include "syzygy/common/align.h"
37 : #include "syzygy/core/address.h"
38 : #include "syzygy/core/address_space.h"
39 : #include "syzygy/core/string_table.h"
40 :
41 : namespace block_graph {
42 :
43 : // Forward declaration.
44 : class BlockGraphSerializer;
45 :
46 : // NOTE: When adding attributes be sure to update any uses of them in
47 : // block_graph.cc, for example in MergeIntersectingBlocks.
48 : #define BLOCK_ATTRIBUTE_ENUM(F) \
49 : /* Set for functions declared non-returning. */ \
50 : F(NON_RETURN_FUNCTION) \
51 : /* Set for blocks that are inferred by the decomposer. */ \
52 : F(GAP_BLOCK) \
53 : /* Set for blocks that are parsed by the PEFileParser. These */ \
54 : /* blocks are unmovable, indivisible, etc, and have to be treated */ \
55 : /* specially. */ \
56 : F(PE_PARSED) \
57 : /* Set for blocks that are created from section contribution */ \
58 : /* information or directly from COFF sections. */ \
59 : F(SECTION_CONTRIB) \
60 : /* This is used to indicate that a block consists purely of padding */ \
61 : /* data. */ \
62 : F(PADDING_BLOCK) \
63 : /* Indicates blocks that contain inline assembly. */ \
64 : F(HAS_INLINE_ASSEMBLY) \
65 : /* Indicates that the block was built by a compiler whose precise */ \
66 : /* behaviour and semantics we are unfamiliar with. */ \
67 : F(BUILT_BY_UNSUPPORTED_COMPILER) \
68 : /* Indicates that the block has been built by the Syzygy toolchain, and */ \
69 : /* thus is inherently safe for basic-block decomposition without having */ \
70 : /* to perform the myriad of safety checks we do otherwise. */ \
71 : F(BUILT_BY_SYZYGY) \
72 : /* Deprecated: used by old decomposer. */ \
73 : F(RESERVED_ATTRIBUTE1) \
74 : /* Deprecated: used by old decomposer. */ \
75 : F(RESERVED_ATTRIBUTE2) \
76 : /* This is set for functions that have exception handling enabled. */ \
77 : /* Without delving far deeper into the specifics, it is unsafe to basic */ \
78 : /* block decompose these blocks. */ \
79 : F(HAS_EXCEPTION_HANDLING) \
80 : /* Deprecated: used by old decomposer. */ \
81 : F(RESERVED_ATTRIBUTE3) \
82 : /* This is set for blocks that have a thunk symbol pointing to them. */ \
83 : /* Typically thunk blocks are compiler or linker-generated, such as */ \
84 : /* e.g. import thunks, delay load import thunks, etc. */ \
85 : F(THUNK) \
86 : /* This is set for blocks that have been parsed as COFF groups. The */ \
87 : /* contents of these blocks are semantically indivisible. */ \
88 : F(COFF_GROUP) \
89 : /* COFF headers block; not set for PE. */ \
90 : F(COFF_HEADERS) \
91 : /* COFF symbol table. */ \
92 : F(COFF_SYMBOL_TABLE) \
93 : /* COFF string table. */ \
94 : F(COFF_STRING_TABLE) \
95 : /* COFF relocation table; these should be ignored when dealing with */ \
96 : /* block graphs, as all the information is represented as references. */ \
97 : F(COFF_RELOC_DATA) \
98 : /* COFF BSS (unmapped) block; has size but no data. */ \
99 : F(COFF_BSS) \
100 : /* Contains unsupported instructions. */ \
101 : F(UNSUPPORTED_INSTRUCTIONS) \
102 : /* This always needs to be set to the next available attribute bit. */ \
103 : F(BLOCK_ATTRIBUTES_MAX)
104 :
105 : // The BlockGraph is a top-level container for Blocks.
106 : class BlockGraph {
107 : public:
108 : typedef core::RelativeAddress RelativeAddress;
109 :
110 : typedef size_t SectionId;
111 : typedef size_t BlockId;
112 : typedef size_t Size;
113 : typedef ptrdiff_t Offset;
114 : typedef uint32 BlockAttributes;
115 : typedef uint32 LabelAttributes;
116 :
117 : // The BlockGraph maintains a list of sections, and each block belongs
118 : // to one of them. This is the set of information we keep regarding them.
119 : struct Section;
120 : // The section map contains all sections, indexed by id.
121 : typedef std::map<SectionId, Section> SectionMap;
122 :
123 : static const SectionId kInvalidSectionId;
124 :
125 : // The types of image formats that we can currently represent in a
126 : // BlockGraph.
127 : enum ImageFormat {
128 : UNKNOWN_IMAGE_FORMAT,
129 : PE_IMAGE,
130 : COFF_IMAGE,
131 : // Representation of a loaded PE module, created by the hot patching
132 : // decomposer.
133 : PE_IN_MEMORY_IMAGE,
134 :
135 : // This must always be the last entry, and kImageFormat must be kept in
136 : // sync with this enum.
137 : IMAGE_FORMAT_MAX
138 : };
139 :
140 : static const char* ImageFormatToString(ImageFormat format);
141 :
142 : // Assign distinct bit IDs to each attribute constant.
143 : enum BlockAttributeIdEnum {
144 : #define DECLARE_ENUM_BIT(name) name##_BIT,
145 : BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM_BIT)
146 : #undef DECLARE_ENUM_BIT
147 : };
148 :
149 : enum BlockAttributeEnum {
150 : #define DECLARE_ENUM(name) name = (1 << name##_BIT),
151 : BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM)
152 : #undef DECLARE_ENUM
153 : };
154 :
155 : // Returns a string containing the names of the supplied attributes.
156 : static std::string BlockAttributesToString(BlockAttributes attrs);
157 :
158 : enum BlockType {
159 : CODE_BLOCK,
160 : DATA_BLOCK,
161 :
162 : // NOTE: This must always be last, and kBlockType must be kept in sync
163 : // with this enum.
164 : BLOCK_TYPE_MAX
165 : };
166 :
167 : static const char* BlockTypeToString(BlockType type);
168 :
169 : // Label attributes. Attributes of the form _END_LABEL type actually
170 : // point to the first byte past the range they delineate. To make the
171 : // semantics of moving labels easier, we shift these labels left by one and
172 : // make them follow the last byte of the delineated range.
173 : enum LabelAttributesEnum {
174 : // The label points to an entry-point in a code block.
175 : CODE_LABEL = (1 << 0),
176 :
177 : // Mark the start and end of the debuggable portion of a code block.
178 : DEBUG_START_LABEL = (1 << 1),
179 : DEBUG_END_LABEL = (1 << 2),
180 :
181 : // Mark the start and end of an embedded scope in a code block.
182 : SCOPE_START_LABEL = (1 << 3),
183 : SCOPE_END_LABEL = (1 << 4),
184 :
185 : // Marks the location of a (virtual table?) call.
186 : CALL_SITE_LABEL = (1 << 5),
187 :
188 : // The label points to the start of a jump table. The length is inferred
189 : // by the location of the next label, or the end of the block. This will
190 : // also have DATA_LABEL set.
191 : JUMP_TABLE_LABEL = (1 << 6),
192 : // The label points to the start of a case table. The length is inferred
193 : // by the location of the next label, or the end of the block. This will
194 : // also have DATA_LABEL set.
195 : CASE_TABLE_LABEL = (1 << 7),
196 : // The label originated from a data symbol. The length is inferred by the
197 : // location of the next label, or the end of the block. The type of data
198 : // is unknown.
199 : DATA_LABEL = (1 << 8),
200 :
201 : // Used to mark a label that was derived from a public symbol. These are
202 : // usually actually pointing to code and global data symbols, but we can't
203 : // always tell (there may be public symbols pointing to data in a code
204 : // block).
205 : PUBLIC_SYMBOL_LABEL = (1 << 9),
206 :
207 : // This always needs to be the most significant bit.
208 : LABEL_ATTRIBUTES_MAX = (1 << 10),
209 : };
210 :
211 : static std::string LabelAttributesToString(LabelAttributes label_attributes);
212 :
213 : enum ReferenceType {
214 : // Common reference types.
215 : PC_RELATIVE_REF,
216 : ABSOLUTE_REF,
217 : RELATIVE_REF,
218 : FILE_OFFSET_REF,
219 :
220 : // Object-file reference types.
221 : SECTION_REF,
222 : SECTION_OFFSET_REF,
223 :
224 : // Relocation reference types.
225 : // TODO(chrisha): Do we need a separate bit, and different ABS and REL
226 : // reference types for COFF files?
227 : RELOC_REF_BIT = 1u << 3,
228 : RELOC_PC_RELATIVE_REF = PC_RELATIVE_REF | RELOC_REF_BIT,
229 : RELOC_ABSOLUTE_REF = ABSOLUTE_REF | RELOC_REF_BIT,
230 : RELOC_RELATIVE_REF = RELATIVE_REF | RELOC_REF_BIT,
231 : RELOC_SECTION_REF = SECTION_REF | RELOC_REF_BIT,
232 : RELOC_SECTION_OFFSET_REF = SECTION_OFFSET_REF | RELOC_REF_BIT,
233 :
234 : // Must be last!
235 : REFERENCE_TYPE_MAX,
236 : };
237 :
238 : // Forward declarations.
239 : class AddressSpace;
240 : class Block;
241 : class Label;
242 : class Reference;
243 : struct BlockIdLess;
244 :
245 : // The block map contains all blocks, indexed by id.
246 : typedef std::map<BlockId, Block> BlockMap;
247 :
248 : BlockGraph();
249 : ~BlockGraph();
250 :
251 : // Adds a section with the given name.
252 : //
253 : // @param name The section name.
254 : // @param characteristics The section characteristics.
255 : // @returns the newly created section.
256 : Section* AddSection(const base::StringPiece& name, uint32 characteristics);
257 :
258 : // Finds a section with the given name, returning NULL if no such section
259 : // exists.
260 : //
261 : // @param name The section name.
262 : // @returns the section if one is found, NULL otherwise.
263 : Section* FindSection(const base::StringPiece& name);
264 : const Section* FindSection(const base::StringPiece& name) const;
265 :
266 : // Find or adds section with the given name.
267 : //
268 : // If a section with the given name already exists, updates its
269 : // characteristics and returns it. Otherwise, creates a new section and
270 : // returns it. If multiple sections exist with the given name, the first
271 : // one encountered is returned.
272 : //
273 : // TODO(chrisha): The semantics of this function are a little odd. It would
274 : // make more sense for it to return only if a section with matching name
275 : // AND characteristics is found, otherwise to create a new one.
276 : //
277 : // @param name The section name.
278 : // @param characteristics The section characteristics.
279 : // @returns the new or found section.
280 : Section* FindOrAddSection(const base::StringPiece& name,
281 : uint32 characteristics);
282 :
283 : // Removes the given section from the BlockGraph.
284 : //
285 : // The section must belong to this block graph. Be aware that this can leave
286 : // Blocks with dangling section_ids.
287 : //
288 : // @param section The section to remove.
289 : // @returns true on success, false otherwise.
290 : bool RemoveSection(Section* section);
291 :
292 : // Removes the section with the given id from the BlockGraph.
293 : //
294 : // @param id The id of the section to remove.
295 : // @returns true on success, false otherwise.
296 : bool RemoveSectionById(SectionId id);
297 :
298 : // Adds @p block of type @p type and @p size and returns the new block.
299 : // @returns the new block.
300 : Block* AddBlock(BlockType type, Size size, const base::StringPiece& name);
301 :
302 : // Copies @p block to a new block with @p new_name and returns the new block.
303 : // Copies all block properties, except as noted below.
304 : // @returns the new block.
305 : // @note If @p block owns data then allocates a new copy; else copies the
306 : // pointer.
307 : // @note Does not copy the deprecated |addr| property.
308 : // @note Does not transfer referrers. Duplicates all external references.
309 : // Copies self-references as self-references in the new block.
310 : Block* CopyBlock(Block* block, const base::StringPiece& new_name);
311 :
312 : // Deletes the given block from the BlockGraph. The block must belong to this
313 : // block graph, and have no references or referrers. Returns true on success,
314 : // false otherwise. On failure, the BlockGraph has not been changed.
315 : bool RemoveBlock(Block* block);
316 :
317 : // Deletes the block with the given @p id from the block graph. The block id
318 : // must be valid, and the block must have no references or referrers. Returns
319 : // true on success, false otherwise. On failure, the BlockGraph has not been
320 : // changed.
321 : bool RemoveBlockById(BlockId id);
322 :
323 : // Accessors.
324 E : const SectionMap& sections() const { return sections_; }
325 E : SectionMap& sections_mutable() { return sections_; }
326 E : const BlockMap& blocks() const { return blocks_; }
327 E : BlockMap& blocks_mutable() { return blocks_; }
328 :
329 : // @{
330 : // Retrieve the section with the given id.
331 : //
332 : // @param id The id of the section to retrieve.
333 : // @returns the section in question or NULL if no such section.
334 : Section* GetSectionById(SectionId id);
335 : const Section* GetSectionById(SectionId id) const;
336 : // @}
337 :
338 : // @{
339 : // Retrieve the block with id.
340 : // @returns the block in question or NULL if no such block.
341 : Block* GetBlockById(BlockId id);
342 : const Block* GetBlockById(BlockId id) const;
343 : // @}
344 :
345 : // Get the string table.
346 : // @returns the string table of this BlockGraph.
347 E : core::StringTable& string_table() { return string_table_; }
348 :
349 : // Sets the image format.
350 : // @param image_format The format of the image.
351 E : void set_image_format(ImageFormat image_format) {
352 E : image_format_ = image_format;
353 E : }
354 : // @returns the image format.
355 E : ImageFormat image_format() const { return image_format_; }
356 :
357 : private:
358 : // Give BlockGraphSerializer access to our innards for serialization.
359 : friend BlockGraphSerializer;
360 :
361 : // Removes a block by the iterator to it. The iterator must be valid.
362 : bool RemoveBlockByIterator(BlockMap::iterator it);
363 :
364 : // All sections we contain.
365 : SectionMap sections_;
366 :
367 : // Our section ID allocator.
368 : SectionId next_section_id_;
369 :
370 : // All blocks we contain.
371 : BlockMap blocks_;
372 :
373 : // Our block ID allocator.
374 : BlockId next_block_id_;
375 :
376 : // A string table used to intern strings.
377 : core::StringTable string_table_;
378 :
379 : // The format of the image represented by this block graph. Defaults to
380 : // UNKNOWN_IMAGE_FORMAT. Usually initialized by the appropriate decomposer.
381 : ImageFormat image_format_;
382 :
383 : DISALLOW_COPY_AND_ASSIGN(BlockGraph);
384 : };
385 :
386 : // The BlockGraph maintains a list of sections, and each block belongs
387 : // to one of them. This is the set of information we keep regarding them.
388 : struct BlockGraph::Section {
389 : // Default constructor. Required for serialization.
390 E : Section() : id_(kInvalidSectionId), characteristics_(0) {
391 E : }
392 :
393 : // Full constructor.
394 : //
395 : // @param id The section id. This must not be kInvalidSectionId.
396 : // @param name The name of the section. Must not be empty or NULL.
397 : // @param characteristics The characteristics of the section.
398 E : Section(SectionId id, const base::StringPiece& name, uint32 characteristics)
399 : : id_(id), name_(), characteristics_(characteristics) {
400 E : DCHECK_NE(kInvalidSectionId, id);
401 E : DCHECK(name != NULL);
402 E : name.CopyToString(&name_);
403 E : DCHECK(!name_.empty());
404 E : }
405 :
406 : // Get the id of this section.
407 : //
408 : // @returns the id of the section.
409 E : SectionId id() const { return id_; }
410 :
411 : // Get the name of this section.
412 : //
413 : // @returns the section name.
414 E : const std::string& name() const { return name_; }
415 :
416 : // Sets the name for this section.
417 : //
418 : // @param name The name of the section. If NULL or empty, this will fail.
419 : // @returns true if the name is set, false otherwise.
420 : bool set_name(const base::StringPiece& name);
421 :
422 : // Get the characteristics of this section.
423 : //
424 : // @returns the section characteristics.
425 E : uint32 characteristics() const { return characteristics_; }
426 :
427 : // Sets the characteristics for this section.
428 : //
429 : // @param characteristics The new characteristics to set.
430 E : void set_characteristics(uint32 characteristics) {
431 E : characteristics_ = characteristics;
432 E : }
433 :
434 : // Sets a one or more additional characteristics for this section.
435 : //
436 : // @param characteristic The new characteristic(s) to set for this section.
437 E : void set_characteristic(uint32 characteristic) {
438 E : characteristics_ |= characteristic;
439 E : }
440 :
441 : // Clears one or more characteristics for this section.
442 : //
443 : // @param characteristic The characteristic(s) to clear for this section.
444 E : void clear_characteristic(uint32 characteristic) {
445 E : characteristics_ &= ~characteristic;
446 E : }
447 :
448 : // @name Serialization functions.
449 : // @{
450 : bool Save(core::OutArchive* out_archive) const;
451 : bool Load(core::InArchive* in_archive);
452 : // @}
453 :
454 : // A simple comparison operator for serialization tests.
455 E : bool operator==(const Section& other) const {
456 : return id_ == other.id_ && name_ == other.name_ &&
457 E : characteristics_ == other.characteristics_;
458 E : }
459 :
460 : // A not-equal comparison operator.
461 E : bool operator!=(const Section& other) const {
462 E : return !operator==(other);
463 E : }
464 :
465 : private:
466 : // The id of the section. This has no particular meaning other than as a way
467 : // to identify sections uniquely.
468 : SectionId id_;
469 : // The name of the section. This will be truncated to a max of 8 characters
470 : // on output.
471 : std::string name_;
472 : // The section characteristics, a bitmask of IMAGE_SCN_* values.
473 : uint32 characteristics_;
474 : };
475 :
476 : // A label denotes the beginning (or end) of a sub-region within a (code)
477 : // block. In particular, a code label represents an instruction boundary
478 : // at which disassembly can begin and a data label represents the beginning
479 : // of embedded data.
480 : class BlockGraph::Label {
481 : public:
482 : // Default constructor.
483 E : Label() : attributes_(0) {
484 E : }
485 :
486 : // Full constructor.
487 E : Label(const base::StringPiece& name, LabelAttributes attributes)
488 : : name_(name.begin(), name.end()), attributes_(attributes) {
489 E : }
490 :
491 : // Copy construction.
492 E : Label(const Label& other)
493 : : name_(other.name_), attributes_(other.attributes_) {
494 E : }
495 :
496 : // @name Accessors.
497 : // @{
498 E : const std::string& name() const { return name_; }
499 : // @}
500 :
501 : // A helper function for logging and debugging.
502 : std::string ToString() const;
503 :
504 : // Equality comparator for unittesting.
505 E : bool operator==(const Label& other) const {
506 E : return name_ == other.name_ && attributes_ == other.attributes_;
507 E : }
508 :
509 : // The label attributes are a bitmask. You can set them wholesale,
510 : // or set and clear them individually by bitmasking.
511 E : LabelAttributes attributes() const { return attributes_; }
512 E : void set_attributes(LabelAttributes attributes) { attributes_ = attributes; }
513 :
514 : // Set or clear one or more attributes.
515 E : void set_attribute(LabelAttributes attribute) { attributes_ |= attribute; }
516 E : void clear_attribute(LabelAttributes attribute) { attributes_ &= ~attribute; }
517 :
518 : // Determines if all or any of the given attributes are set.
519 E : bool has_attributes(LabelAttributes attributes) const {
520 E : return (attributes_ & attributes) == attributes;
521 E : }
522 E : bool has_any_attributes(LabelAttributes attributes) const {
523 E : return (attributes_ & attributes) != 0;
524 E : }
525 :
526 : // @returns true if this label is valid, false otherwise.
527 : bool IsValid() const;
528 :
529 : // Tests a set of label attributes for validity.
530 : // @param attributes the attributes to test.
531 : // @returns true if the provided attributes are valid, false otherwise.
532 : static bool AreValidAttributes(LabelAttributes attributes);
533 :
534 : private:
535 : // The name by which this label is known.
536 : std::string name_;
537 :
538 : // The disposition of the bytes found at this label.
539 : LabelAttributes attributes_;
540 : };
541 :
542 : // A block represents a block of either code or data.
543 : //
544 : // Since blocks may be split and up and glued together in arbitrary ways, each
545 : // block maintains an address-space over its data, associating ranges of block
546 : // data to ranges of bytes in the original image. This effectively encodes OMAP
547 : // data, allowing the PDB file to be updated.
548 : //
549 : // Each block also stores references to other blocks in the graph, their
550 : // relative location within the block and their type and size.
551 : //
552 : // Each block has a set of attributes, including a size, a name and a
553 : // "current" address. Most of those attributes are mutable, and are set in the
554 : // process of creating and manipulating images and graph address spaces.
555 : class BlockGraph::Block {
556 : public:
557 : // Set of the blocks that have a reference to this block.
558 : // This is keyed on block and source offset (not destination offset),
559 : // to allow one to easily locate and remove the backreferences on change or
560 : // deletion.
561 : typedef std::pair<Block*, Offset> Referrer;
562 : typedef std::set<Referrer> ReferrerSet;
563 :
564 : // Map of references that this block makes to other blocks.
565 : typedef std::map<Offset, Reference> ReferenceMap;
566 :
567 : // Represents a range of data in this block.
568 : typedef core::AddressRange<Offset, Size> DataRange;
569 :
570 : // Represents a range of data in the original image.
571 : typedef core::AddressRange<RelativeAddress, Size> SourceRange;
572 :
573 : // A map between bytes in this block and bytes in the original image.
574 : typedef core::AddressRangeMap<DataRange, SourceRange> SourceRanges;
575 :
576 : // The flags that can be passed to the TransferReferrers function.
577 : enum TransferReferrersFlags {
578 : kSkipInternalReferences = (1 << 0),
579 : kTransferInternalReferences = (1 << 1)
580 : };
581 :
582 : // Typed labels associated with various offsets in the block. Some of these
583 : // labels (of type CODE_LABEL) represent code start points for disassembly
584 : // while others (of type DATA_LABEL) represent the start of embedded data
585 : // within the block. Note that, while possible, it is NOT guaranteed that
586 : // all basic blocks are marked with a label. Basic block decomposition should
587 : // disassemble from the code labels to discover all basic blocks.
588 : typedef std::map<Offset, Label> LabelMap;
589 :
590 : ~Block();
591 :
592 : // Accessors.
593 E : BlockId id() const { return id_; }
594 E : BlockType type() const { return type_; }
595 E : void set_type(BlockType type) { type_ = type; }
596 :
597 E : Size size() const { return size_; }
598 :
599 : // Set the total size of the block. Note that allocated data_size_ must
600 : // always be less than or equal to the total size.
601 E : void set_size(Size size) {
602 E : DCHECK_LE(data_size_, size);
603 E : size_ = size;
604 E : }
605 :
606 E : const std::string& name() const {
607 E : DCHECK(name_ != NULL);
608 E : return *name_;
609 E : }
610 : void set_name(const base::StringPiece& name);
611 :
612 : const std::string& compiland_name() const;
613 : void set_compiland_name(const base::StringPiece& name);
614 :
615 : // Gets the alignment of the block. Note that the alignment is applied to
616 : // the byte at the alignment offset.
617 : // @returns The current alignment of the block.
618 E : Size alignment() const { return alignment_; }
619 : // Sets the alignment that applies to the block when building the layout.
620 : // By default, the first byte of the block will be aligned, but that can
621 : // be overridden by setting the alignment offset.
622 : // @param alignment The new alignment. The alignment should be a power of two.
623 : // The value of zero is invalid, use 1 to signal no alignment requirements.
624 E : void set_alignment(Size alignment) {
625 : // Ensure that alignment is a non-zero power of two.
626 E : DCHECK(common::IsPowerOfTwo(alignment));
627 E : alignment_ = alignment;
628 E : }
629 :
630 : // Gets the offset the alignment is applied to.
631 : // @returns The offset that will be aligned.
632 E : Offset alignment_offset() const { return alignment_offset_; }
633 : // Sets the offset the alignment refers to. The layout builder makes sure the
634 : // byte at this position is aligned. By default this value is zero which
635 : // means that the first byte of the block should be aligned.
636 : // @param alignment_offset The new offset of the alignment.
637 E : void set_alignment_offset(Offset alignment_offset) {
638 E : alignment_offset_ = alignment_offset;
639 E : }
640 :
641 : // Returns the minimum amount of padding bytes that are inserted before the
642 : // block when building the layout.
643 : // @returns The minimum amount of padding.
644 E : Size padding_before() const { return padding_before_; }
645 : // Sets the minium amount of padding bytes before the block when building the
646 : // layout.
647 : // @param padding_before At least this amount of bytes are inserted before
648 : // the block when building the layout.
649 E : void set_padding_before(Size padding_before) {
650 E : padding_before_ = padding_before;
651 E : }
652 :
653 : // The address of the block is set any time the block is assigned
654 : // an address in an address space.
655 E : RelativeAddress addr() const { return addr_; }
656 E : void set_addr(RelativeAddress addr) { addr_ = addr; }
657 :
658 : // The section ID for the block. These IDs are wrt to the SectionMap in the
659 : // parent BlockGraph.
660 E : SectionId section() const { return section_; }
661 E : void set_section(SectionId section) { section_ = section; }
662 :
663 : // The block attributes are a bitmask. You can set them wholesale,
664 : // or set and clear them individually by bitmasking.
665 E : BlockAttributes attributes() const { return attributes_; }
666 E : void set_attributes(BlockAttributes attributes) { attributes_ = attributes; }
667 :
668 : // Set or clear one or more attributes.
669 E : void set_attribute(BlockAttributes attribute) { attributes_ |= attribute; }
670 E : void clear_attribute(BlockAttributes attribute) {
671 E : attributes_ &= ~attribute;
672 E : }
673 :
674 : // This is true iff data_ is in the ownership of the block.
675 : // Iff true, the block will delete [] data_ on destruction or when
676 : // data is overwritten.
677 E : bool owns_data() const { return owns_data_; }
678 :
679 : // Makes room for the given amount of data at the given offset. This is
680 : // special in that it will patch up any labels, source ranges and referrers
681 : // that land beyond the newly created data, shifting them to the right by
682 : // @p size. If the data for this block is actually allocated it will also
683 : // patch up the allocated data by zeroing the newly allocate range of data,
684 : // and shifting the tail by @p size. If the new data is strictly implicit
685 : // (offset > data_size), then the allocated data is not affected in any way
686 : // unless @p always_allocate_data is true.
687 : //
688 : // @param offset the offset at which to insert the new data.
689 : // @param size the size of the new data to be inserted.
690 : // @param always_allocate_data if true, then data_size will be grown if
691 : // necessary to ensure that the newly created data can be written.
692 : // @pre 0 <= offset <= size()
693 : void InsertData(Offset offset, Size size, bool always_allocate_data);
694 :
695 : // Removes the data in the given range. This will refuse to remove labels,
696 : // references and referrers that land in the range, and will fail if any
697 : // exist. It will also shift any labels, references and referrers that land
698 : // beyond the end of the removed range. Source ranges will also be fixed. If
699 : // the removed range lies within the initialized data then the data will also
700 : // be truncated/shifted as necessary.
701 : //
702 : // @param offset the offset at which to remove data.
703 : // @param size the size of the data to remove, in bytes.
704 : // @returns true on success, false otherwise.
705 : // @pre 0 <= offset <= size
706 : bool RemoveData(Offset offset, Size size);
707 :
708 : // Performs an inline resize of data in a BlockGraph. If the data is shrinking
709 : // this equates to a RemoveData operation. If it is growing it equates to an
710 : // InsertData operation.
711 : //
712 : // @param offset the offset of the data to resize.
713 : // @param current_size the current size of the data to resize.
714 : // @param new_size the desired size of the data.
715 : // @param always_allocate_data if true, then data_size will be grown if
716 : // necessary to ensure that the resized data can be written.
717 : // @returns true on success, false otherwise.
718 : // @pre 0 <= offset <= size
719 : bool InsertOrRemoveData(Offset offset, Size current_size, Size new_size,
720 : bool always_allocate_data);
721 :
722 : // Set the data the block refers to.
723 : // @param data NULL or the data this block refers to.
724 : // The underlying data must outlive this block.
725 : // @param data_size the size of data, or zero if data == NULL.
726 : // @pre data_size <= size().
727 : void SetData(const uint8* data, size_t data_size);
728 :
729 : // Allocates and returns a new data buffer of the given size. The returned
730 : // data will have been initialized to zero.
731 : // @pre data_size > 0.
732 : // @pre data_size <= size().
733 : uint8* AllocateData(size_t data_size);
734 :
735 : // Makes a copy of data, returns a pointer to the copy.
736 : // @pre data_size <= size().
737 : uint8* CopyData(size_t data_size, const void* data);
738 :
739 : // Resizes data to new_size by truncating or zero-extending the current data.
740 : // @pre new_size <= size().
741 : const uint8* ResizeData(size_t new_size);
742 :
743 : // Returns a mutable copy of the block's data. If the block doesn't own
744 : // the data on entry, it'll be copied and the copy returned to the caller.
745 : uint8* GetMutableData();
746 :
747 : // The data bytes the block refers to.
748 E : const uint8* data() const { return data_; }
749 :
750 : // The data size may be smaller than the block size (see size()),
751 : // when the block e.g. refers to data that's all or part
752 : // zero-initialized by the linker/loader.
753 E : size_t data_size() const { return data_size_; }
754 :
755 E : const ReferenceMap& references() const { return references_; }
756 E : const ReferrerSet& referrers() const { return referrers_; }
757 E : const SourceRanges& source_ranges() const { return source_ranges_; }
758 E : SourceRanges& source_ranges() { return source_ranges_; }
759 E : const LabelMap& labels() const { return labels_; }
760 :
761 : // Returns true if there are any other blocks holding a reference to this one.
762 : bool HasExternalReferrers() const;
763 :
764 : // Set the reference at @p offset to @p ref.
765 : // If there's a pre-existing reference at @p offset, this overrides it.
766 : // @param offset offset of the reference into this block.
767 : // @param ref the reference to add.
768 : // @returns true iff this inserts a new reference.
769 : bool SetReference(Offset offset, const Reference& ref);
770 :
771 : // Retrieve the reference at @p offset if one exists.
772 : // @param reference on success returns the reference @p offset.
773 : // @returns true iff there was a reference at @p offset.
774 : bool GetReference(Offset offset, Reference* reference) const;
775 :
776 : // Remove the reference at @p offset.
777 : // @returns true iff there was a reference at @p offset.
778 : bool RemoveReference(Offset offset);
779 :
780 : // Remove all references from this block. This is handy when removing a block
781 : // from the block graph.
782 : bool RemoveAllReferences();
783 :
784 : // Set a label to @p offset.
785 : // A label in code marks the location of the start of an instruction -
786 : // e.g. a location where disassembly can usefully commence. Labels
787 : // appear to be inserted by the VS tool chain where e.g. a switch
788 : // statement is implemented with a jump table, to note the location
789 : // of the jump destinations.
790 : // @param offset the offset of the label to set.
791 : // @param name the name of the label.
792 : // @param attributes the attributes of the label.
793 : // @returns true iff a new label is inserted.
794 : // @note that only one label can exist at each offset, and the first
795 : // label set at any offset will stay there.
796 : // @{
797 : bool SetLabel(Offset offset, const Label& label);
798 : bool SetLabel(Offset offset,
799 : const base::StringPiece& name,
800 E : LabelAttributes attributes) {
801 E : return SetLabel(offset, Label(name, attributes));
802 E : }
803 : // @}
804 :
805 : // Gets the label at the given @p offset.
806 : // @param offset the offset of the label to get.
807 : // @param label the string to receive the label.
808 : // @returns true if the label exists, false otherwise.
809 : bool GetLabel(Offset offset, Label* label) const;
810 :
811 : // Removes the label at the given @p offset.
812 : // @param offset the offset of the label to remove.
813 : // @returns true if the label existed and was removed, false it it did not
814 : // exist.
815 : bool RemoveLabel(Offset offset);
816 :
817 : // Returns true iff the block has a label at @p offset.
818 : // @param offset the offset of the label to search for.
819 : bool HasLabel(Offset offset) const;
820 :
821 : // Change all references to this block to refer to @p new_block instead,
822 : // while offsetting each reference by @p offset.
823 : // @param offset The offset that we should apply to each reference.
824 : // @param new_block The block the reference should point to.
825 : // @param flags The flags that control some parameters of this function.
826 : // @note this fails if any of the transferred references end up with offsets
827 : // less than zero, or greater than new_block->size().
828 : // @returns true iff all references were transferred successfully.
829 : bool TransferReferrers(Offset offset,
830 : Block* new_block,
831 : TransferReferrersFlags flags);
832 :
833 : // Returns true if this block contains the given range of bytes.
834 : bool Contains(RelativeAddress address, size_t size) const;
835 :
836 : protected:
837 : // Give BlockGraph access to our innards for serialization.
838 : friend class BlockGraph;
839 : // Give BlockGraphSerializer access to our innards for serialization.
840 : friend class BlockGraphSerializer;
841 :
842 : // Full constructor.
843 : // @note This is protected so that blocks may only be created via the
844 : // BlockGraph factory.
845 : Block(BlockId id,
846 : BlockType type,
847 : Size size,
848 : const base::StringPiece& name,
849 : BlockGraph* block_graph);
850 :
851 : // This constructor is used by serialization.
852 : explicit Block(BlockGraph* block_graph);
853 :
854 : // Allocates and returns a new data buffer of the given size. The returned
855 : // data buffer will not have been initialized in any way.
856 : uint8* AllocateRawData(size_t size);
857 :
858 : BlockId id_;
859 : BlockType type_;
860 : Size size_;
861 : Size alignment_;
862 : Offset alignment_offset_;
863 : Size padding_before_;
864 : const std::string* name_;
865 : const std::string* compiland_name_;
866 : RelativeAddress addr_;
867 :
868 : // BlockGraph to which belongs this Block. A block can only belongs to one
869 : // Block Graph.
870 : BlockGraph* block_graph_;
871 :
872 : SectionId section_;
873 : BlockAttributes attributes_;
874 :
875 : ReferenceMap references_;
876 : ReferrerSet referrers_;
877 : SourceRanges source_ranges_;
878 : LabelMap labels_;
879 :
880 : // True iff data_ is ours to deallocate with delete [].
881 : // If this is false, data_ must be guaranteed to outlive the block.
882 : bool owns_data_;
883 : // A pointer to the code or data we represent.
884 : const uint8* data_;
885 : // Size of the above.
886 : size_t data_size_;
887 : };
888 :
889 : // Less-than comparator for blocks. Useful to keep ordered set stable.
890 : struct BlockGraph::BlockIdLess {
891 : bool operator()(const Block* lhs,
892 E : const Block* rhs) const {
893 E : DCHECK_NE(static_cast<const Block*>(NULL), lhs);
894 E : DCHECK_NE(static_cast<const Block*>(NULL), rhs);
895 E : return lhs->id() < rhs->id();
896 E : }
897 : };
898 :
899 : // A graph address space endows a graph with a non-overlapping ordering
900 : // on blocks, where each block occupies zero or one address ranges in the
901 : // address space. No two blocks may overlap in an address space. Empty blocks
902 : // are not stored in the underlying address-space implementation itself, but do
903 : // have associated addresses and are stored in the block-address map.
904 : class BlockGraph::AddressSpace {
905 : public:
906 : typedef core::AddressSpace<RelativeAddress, BlockGraph::Size, Block*>
907 : AddressSpaceImpl;
908 : typedef AddressSpaceImpl::Range Range;
909 : typedef AddressSpaceImpl::RangeMap RangeMap;
910 : typedef AddressSpaceImpl::RangeMapIter RangeMapIter;
911 : typedef AddressSpaceImpl::RangeMapConstIter RangeMapConstIter;
912 : typedef AddressSpaceImpl::RangeMapIterPair RangeMapIterPair;
913 : typedef AddressSpaceImpl::RangeMapConstIterPair RangeMapConstIterPair;
914 : typedef stdext::hash_map<const Block*, RelativeAddress> BlockAddressMap;
915 :
916 : // Constructs a new empty address space.
917 : // @p start to @p start + @p size on @p graph.
918 : explicit AddressSpace(BlockGraph* graph);
919 :
920 : // Add a block of type @p type and @p size at @p address to our associated
921 : // graph, and return the new block
922 : // @returns the new block, or NULL if the new block would overlap
923 : // an existing block.
924 : Block* AddBlock(BlockType type,
925 : RelativeAddress addr,
926 : Size size,
927 : const base::StringPiece& name);
928 :
929 : // Resizes a block in the address-space by extending to the right, or
930 : // trimming. Updates the block size but does not udpate its contents. This
931 : // invalidates any RangeMap iterators to the block in question.
932 : // @param block The block whose size is to change.
933 : // @param size The new size of the block. Must be > 0.
934 : // @returns true on success, false if not possible due to a conflict.
935 : bool ResizeBlock(Block* block, size_t size);
936 :
937 : // Merges all blocks that intersect @p range into a single block.
938 : // Moves labels and references from the intersecting blocks to the
939 : // merged block, and changes referring blocks to refer to the new,
940 : // merged block. Removes the original blocks from the BlockGraph.
941 : // @returns the new, merged block if there was at least one intersecting
942 : // block in @p range, or NULL otherwise.
943 : Block* MergeIntersectingBlocks(const Range& range);
944 :
945 : // Insert existing block @p block at @p address.
946 : // @returns true on success, or false if the @p block would overlap
947 : // an existing block.
948 : bool InsertBlock(RelativeAddress addr, Block* block);
949 :
950 : // Returns a pointer to the block containing address, or NULL
951 : // if no block contains address.
952 : Block* GetBlockByAddress(RelativeAddress addr) const;
953 :
954 : // Returns a pointer to the block containing the address range
955 : // [address, address + size), or NULL if no block contains that
956 : // range.
957 : Block* GetContainingBlock(RelativeAddress addr, Size size) const;
958 :
959 : // Finds the first block, if any that intersects
960 : // [@p address, @p address + @p size).
961 : Block* GetFirstIntersectingBlock(RelativeAddress address, Size size);
962 :
963 : // Check whether the address space contains @p block.
964 : // @param block the block in question.
965 : // @returns true if the block is in the address space, false otherwise.
966 : bool ContainsBlock(const Block* block);
967 :
968 : // Locates all blocks that intersect [@p address, @p address + @p size).
969 : // @returns a pair of iterators that iterate over the found blocks.
970 : RangeMapConstIterPair GetIntersectingBlocks(RelativeAddress address,
971 : Size size) const;
972 : RangeMapIterPair GetIntersectingBlocks(RelativeAddress address, Size size);
973 :
974 : // Retrieve the address off @p block.
975 : // @param block the block in question.
976 : // @param addr on success, returns the address of @p block in this
977 : // address space.
978 : // @returns true on success, false if @p block is not in this
979 : // address space.
980 : bool GetAddressOf(const Block* block, RelativeAddress* addr) const;
981 :
982 : // Accessor.
983 E : BlockGraph* graph() { return graph_; }
984 E : const BlockGraph* graph() const { return graph_; }
985 :
986 E : RangeMapConstIter begin() const {
987 E : return address_space_.ranges().begin();
988 E : }
989 :
990 E : RangeMapConstIter end() const {
991 E : return address_space_.ranges().end();
992 E : }
993 :
994 : // @returns the number of blocks in the address-space. This includes empty
995 : // blocks, which won't actually be visited by iteration.
996 E : size_t size() const {
997 : // We use the size of the map of addresses, as zero sized blocks only live
998 : // there.
999 E : return block_addresses_.size();
1000 E : }
1001 :
1002 : // @returns a reference to the underlaying address-space implementation. Only
1003 : // non-empty blocks are inserted in the address space.
1004 E : const AddressSpaceImpl& address_space_impl() const {
1005 E : return address_space_;
1006 E : }
1007 :
1008 : // @returne a reference to the map of blocks addresses, by block pointer.
1009 E : const BlockAddressMap& block_addresses() const {
1010 E : return block_addresses_;
1011 E : }
1012 :
1013 : protected:
1014 : bool InsertImpl(RelativeAddress addr, Block* block);
1015 :
1016 : AddressSpaceImpl address_space_;
1017 : BlockAddressMap block_addresses_;
1018 : BlockGraph* graph_;
1019 : };
1020 :
1021 : // Represents a reference from one block to another. References may be offset.
1022 : // That is, they may refer to an object at a given location, but actually point
1023 : // to a location that is some fixed distance away from that object. This allows,
1024 : // for example, non-zero based indexing into a table. The object that is
1025 : // intended to be dereferenced is called the 'base' of the offset.
1026 : //
1027 : // BlockGraph references are from a location (offset) in one block, to some
1028 : // location in another block. The referenced block itself plays the role of the
1029 : // 'base' of the reference, with the offset of the reference being stored as
1030 : // an integer from the beginning of the block. However, basic block
1031 : // decomposition requires breaking the block into smaller pieces and thus we
1032 : // need to carry around an explicit base value, indicating which byte in the
1033 : // block is intended to be referenced.
1034 : //
1035 : // A direct reference to a location will have the same value for 'base' and
1036 : // 'offset'.
1037 : //
1038 : // Here is an example:
1039 : //
1040 : // /----------\
1041 : // +---------------------------+
1042 : // O | B | <--- Referenced block
1043 : // +---------------------------+ B = base
1044 : // \-----/ O = offset
1045 : //
1046 : class BlockGraph::Reference {
1047 : public:
1048 : Reference() :
1049 E : type_(RELATIVE_REF), size_(0), referenced_(NULL), offset_(0), base_(0) {
1050 E : }
1051 :
1052 : // @param type type of reference.
1053 : // @param size size of reference.
1054 : // @param referenced the referenced block.
1055 : // @param offset offset from the beginning of the block of the location to be
1056 : // explicitly referred to.
1057 : // @param base offset into the block of the location actually being
1058 : // referenced. This must be strictly within @p referenced.
1059 : Reference(ReferenceType type,
1060 : Size size,
1061 : Block* referenced,
1062 : Offset offset,
1063 : Offset base)
1064 : : type_(type),
1065 : size_(size),
1066 : referenced_(referenced),
1067 : offset_(offset),
1068 E : base_(base) {
1069 E : DCHECK(IsValid());
1070 E : }
1071 :
1072 : // Copy constructor.
1073 : Reference(const Reference& other)
1074 : : type_(other.type_),
1075 : size_(other.size_),
1076 : referenced_(other.referenced_),
1077 : offset_(other.offset_),
1078 E : base_(other.base_) {
1079 E : }
1080 :
1081 : // Accessors.
1082 E : ReferenceType type() const { return type_; }
1083 E : Size size() const { return size_; }
1084 E : Block* referenced() const { return referenced_; }
1085 E : Offset offset() const { return offset_; }
1086 E : Offset base() const { return base_; }
1087 :
1088 : // Determines if this is a direct reference. That is, if the actual location
1089 : // being referenced (offset) and the intended location being referenced (base)
1090 : // are the same.
1091 : //
1092 : // @returns true if the reference is direct, false otherwise.
1093 E : bool IsDirect() const { return base_ == offset_; }
1094 :
1095 : // Determines if this is a valid reference, by imposing size constraints on
1096 : // reference types, and determining if the base address of the reference is
1097 : // strictly contained within the referenced block.
1098 : //
1099 : // @returns true if valid, false otherwise.
1100 : bool IsValid() const;
1101 :
1102 E : bool operator==(const Reference& other) const {
1103 : return type_ == other.type_ &&
1104 : size_ == other.size_ &&
1105 : referenced_ == other.referenced_ &&
1106 : offset_ == other.offset_ &&
1107 E : base_ == other.base_;
1108 E : }
1109 :
1110 : // The maximum size that a reference may have. This needs to be kept in sync
1111 : // with the expectations of IsValid().
1112 : static const size_t kMaximumSize = 4;
1113 :
1114 : // Returns true if the given reference type and size combination is valid.
1115 : static bool IsValidTypeSize(ReferenceType type, Size size);
1116 :
1117 : private:
1118 : // Type of this reference.
1119 : ReferenceType type_;
1120 :
1121 : // Size of this reference.
1122 : // Absolute references are always pointer wide, but PC-relative
1123 : // references can be 1, 2 or 4 bytes wide, which affects their range.
1124 : Size size_;
1125 :
1126 : // The block referenced.
1127 : Block* referenced_;
1128 :
1129 : // Offset into the referenced block.
1130 : Offset offset_;
1131 :
1132 : // The base of the reference, as in offset in the block. This must be a
1133 : // location strictly within the block.
1134 : Offset base_;
1135 : };
1136 :
1137 : // Commonly used container types.
1138 : typedef std::vector<BlockGraph::Block*> BlockVector;
1139 : typedef std::vector<const BlockGraph::Block*> ConstBlockVector;
1140 :
1141 : // A small helper struct for dumping block information to log messages.
1142 : struct BlockInfo {
1143 : enum AddressType {
1144 : kNoAddress,
1145 : kAbsoluteAddress,
1146 : kFileOffsetAddress,
1147 : kRelativeAddress,
1148 : };
1149 :
1150 i : explicit BlockInfo(const BlockGraph::Block* block)
1151 : : block(block), type(kNoAddress) {
1152 i : DCHECK_NE(static_cast<BlockGraph::Block*>(nullptr), block);
1153 i : }
1154 :
1155 : BlockInfo(const BlockGraph::Block* block,
1156 : core::AbsoluteAddress address)
1157 : : block(block), type(kAbsoluteAddress), abs_addr(address) {
1158 : DCHECK_NE(static_cast<BlockGraph::Block*>(nullptr), block);
1159 : }
1160 : BlockInfo(const BlockGraph::Block* block,
1161 : core::FileOffsetAddress address)
1162 : : block(block), type(kFileOffsetAddress), file_addr(address) {
1163 : DCHECK_NE(static_cast<BlockGraph::Block*>(nullptr), block);
1164 : }
1165 i : BlockInfo(const BlockGraph::Block* block,
1166 : core::RelativeAddress address)
1167 : : block(block), type(kRelativeAddress), rel_addr(address) {
1168 i : DCHECK_NE(static_cast<BlockGraph::Block*>(nullptr), block);
1169 i : }
1170 :
1171 : const BlockGraph::Block* block;
1172 : AddressType type;
1173 :
1174 : // Ideally these would be in a union but because they have non-trivial
1175 : // constructors they are not allowed.
1176 : core::AbsoluteAddress abs_addr;
1177 : core::FileOffsetAddress file_addr;
1178 : core::RelativeAddress rel_addr;
1179 :
1180 : private:
1181 : DISALLOW_COPY_AND_ASSIGN(BlockInfo);
1182 : };
1183 :
1184 : } // namespace block_graph
1185 :
1186 : // Pretty prints a BlockInfo to an ostream. This has to be outside of any
1187 : // namespaces so that operator<< is found properly.
1188 : std::ostream& operator<<(std::ostream& os, const block_graph::BlockInfo& bi);
1189 :
1190 : #endif // SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
|