1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // A block graph is an abstract graph of blocks, each of which has an ID, a
16 : // type, a size and a few other properties. Each block represents either code or
17 : // data, and blocks can reference one another through references of various
18 : // types.
19 : //
20 : // The BlockGraph also stores minimum knowledge of sections (names and
21 : // characteristics), and each block belongs to at most one section. In this
22 : // sense, a BlockGraph acts as top-level division of blocks.
23 :
24 : #ifndef SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
25 : #define SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
26 :
27 : #include <hash_map>
28 : #include <map>
29 : #include <set>
30 : #include <string>
31 : #include <vector>
32 :
33 : #include "base/basictypes.h"
34 : #include "base/file_util.h"
35 : #include "base/string_piece.h"
36 : #include "syzygy/common/align.h"
37 : #include "syzygy/core/address.h"
38 : #include "syzygy/core/address_space.h"
39 :
40 : namespace block_graph {
41 :
42 : // Forward declaration.
43 : class BlockGraphSerializer;
44 :
45 : // The invalid address can never occur in an graph, it's used as default
46 : // value for block addresses.
47 : extern const core::RelativeAddress kInvalidAddress;
48 :
49 : #define BLOCK_ATTRIBUTE_ENUM(F) \
50 : /* Set for functions declared non-returning. */ \
51 : F(NON_RETURN_FUNCTION) \
52 : /* Set for blocks that are inferred by the decomposer. */ \
53 : F(GAP_BLOCK) \
54 : /* Set for blocks that are parsed by the PEFileParser. These */ \
55 : /* blocks are unmovable, indivisible, etc, and have to be treated */ \
56 : /* specially. */ \
57 : F(PE_PARSED) \
58 : /* Set for blocks that are created from section contribution */ \
59 : /* information. */ \
60 : F(SECTION_CONTRIB) \
61 : /* This is used to indicate that a block consists purely of padding */ \
62 : /* data. */ \
63 : F(PADDING_BLOCK) \
64 : /* Indicates blocks that contain inline assembly. */ \
65 : F(HAS_INLINE_ASSEMBLY) \
66 : /* Indicates that the block was built by a compiler whose precise */ \
67 : /* behaviour and semantics we are unfamiliar with. */ \
68 : F(BUILT_BY_UNSUPPORTED_COMPILER) \
69 : /* Indicates that the block has been built by the Syzygy toolchain, and */ \
70 : /* thus is inherently safe for basic-block decomposition without having */ \
71 : /* to perform the myriad of safety checks we do otherwise. */ \
72 : F(BUILT_BY_SYZYGY) \
73 : /* This is set for blocks whose initial disassembly was incomplete. */ \
74 : /* This is not necessarily an error, as we see have seen blocks with */ \
75 : /* unreachable code, even in release mode. */ \
76 : F(INCOMPLETE_DISASSEMBLY) \
77 : /* This is set for blocks whose disassembly was unable to finish due to */ \
78 : /* an error. This block has violated assumptions that we make or */ \
79 : /* conventions that we have observed the compiler to use. It is not safe */\
80 : /* for basic block disassembly. */ \
81 : F(ERRORED_DISASSEMBLY) \
82 : /* This is set for functions that have exception handling enabled. */ \
83 : /* Without delving far deeper into the specifics, it is unsafe to basic */ \
84 : /* block decompose these blocks. */ \
85 : F(HAS_EXCEPTION_HANDLING) \
86 : /* This is set for blocks whose disassembly went off the end of the */ \
87 : /* block, or into data. These blocks have control flow that we are not */ \
88 : /* aware of, or are otherwise malformed. */ \
89 : F(DISASSEMBLED_PAST_END) \
90 : /* This is set for blocks that have a thunk symbol pointing to them. */ \
91 : /* Typically thunk blocks are compiler or linker-generated, such as */ \
92 : /* e.g. import thunks, delay load import thunks, etc. */ \
93 : F(THUNK) \
94 : /* This is set for blocks that have been parsed as COFF groups. The */ \
95 : /* contents of these blocks are semantically indivisible. */ \
96 : F(COFF_GROUP) \
97 : /* This always needs to be set to the next available attribute bit. */ \
98 : F(BLOCK_ATTRIBUTES_MAX)
99 :
100 : // The BlockGraph is a top-level container for Blocks.
101 : class BlockGraph {
102 : public:
103 : typedef core::RelativeAddress RelativeAddress;
104 :
105 : typedef size_t SectionId;
106 : typedef size_t BlockId;
107 : typedef size_t Size;
108 : typedef ptrdiff_t Offset;
109 : typedef uint32 BlockAttributes;
110 : typedef uint32 LabelAttributes;
111 :
112 : // The BlockGraph maintains a list of sections, and each block belongs
113 : // to one of them. This is the set of information we keep regarding them.
114 : struct Section;
115 : // The section map contains all sections, indexed by id.
116 : typedef std::map<SectionId, Section> SectionMap;
117 :
118 : static const SectionId kInvalidSectionId;
119 :
120 : // Assign distinct bit IDs to each attribute constant.
121 : enum BlockAttributeIdEnum {
122 : #define DECLARE_ENUM_BIT(name) name##_BIT,
123 : BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM_BIT)
124 : #undef DECLARE_ENUM_BIT
125 : };
126 :
127 : enum BlockAttributeEnum {
128 : #define DECLARE_ENUM(name) name = (1 << name##_BIT),
129 : BLOCK_ATTRIBUTE_ENUM(DECLARE_ENUM)
130 : #undef DECLARE_ENUM
131 : };
132 :
133 : // Returns a string containing the names of the supplied attributes.
134 : static std::string BlockAttributesToString(BlockAttributes attrs);
135 :
136 : enum BlockType {
137 : CODE_BLOCK,
138 : DATA_BLOCK,
139 :
140 : // NOTE: This must always be last, and kBlockType must be kept in sync
141 : // with this enum.
142 : BLOCK_TYPE_MAX
143 : };
144 :
145 : static const char* BlockTypeToString(BlockType type);
146 :
147 : // Label attributes. Attributes of the form _END_LABEL type actually
148 : // point to the first byte past the range they delineate. To make the
149 : // semantics of moving labels easier, we shift these labels left by one and
150 : // make them follow the last byte of the delineated range.
151 : enum LabelAttributesEnum {
152 : // The label points to an entry-point in a code block.
153 : CODE_LABEL = (1 << 0),
154 :
155 : // Mark the start and end of the debuggable portion of a code block.
156 : DEBUG_START_LABEL = (1 << 1),
157 : DEBUG_END_LABEL = (1 << 2),
158 :
159 : // Mark the start and end of an embedded scope in a code block.
160 : SCOPE_START_LABEL = (1 << 3),
161 : SCOPE_END_LABEL = (1 << 4),
162 :
163 : // Marks the location of a (virtual table?) call.
164 : CALL_SITE_LABEL = (1 << 5),
165 :
166 : // The label points to the start of a jump table. The length is inferred
167 : // by the location of the next label, or the end of the block. This will
168 : // also have DATA_LABEL set.
169 : JUMP_TABLE_LABEL = (1 << 6),
170 : // The label points to the start of a case table. The length is inferred
171 : // by the location of the next label, or the end of the block. This will
172 : // also have DATA_LABEL set.
173 : CASE_TABLE_LABEL = (1 << 7),
174 : // The label originated from a data symbol. The length is inferred by the
175 : // location of the next label, or the end of the block. The type of data
176 : // is unknown.
177 : DATA_LABEL = (1 << 8),
178 :
179 : // Used to mark a label that was derived from a public symbol. These are
180 : // usually actually pointing to code and global data symbols, but we can't
181 : // always tell (there may be public symbols pointing to data in a code
182 : // block).
183 : PUBLIC_SYMBOL_LABEL = (1 << 9),
184 :
185 : // This always needs to be the most significant bit.
186 : LABEL_ATTRIBUTES_MAX = (1 << 10),
187 : };
188 :
189 : static std::string LabelAttributesToString(LabelAttributes label_attributes);
190 :
191 : enum ReferenceType {
192 : PC_RELATIVE_REF,
193 : ABSOLUTE_REF,
194 : RELATIVE_REF,
195 : FILE_OFFSET_REF,
196 : // Must be last!
197 : REFERENCE_TYPE_MAX,
198 : };
199 :
200 : // Forward declarations.
201 : class AddressSpace;
202 : class Block;
203 : class Label;
204 : class Reference;
205 :
206 : // The block map contains all blocks, indexed by id.
207 : typedef std::map<BlockId, Block> BlockMap;
208 :
209 : BlockGraph();
210 : ~BlockGraph();
211 :
212 : // Adds a section with the given name.
213 : //
214 : // @param name The section name.
215 : // @param characteristics The section characteristics.
216 : // @returns the newly created section.
217 : Section* AddSection(const base::StringPiece& name, uint32 characteristics);
218 :
219 : // Finds a section with the given name, returning NULL if no such section
220 : // exists.
221 : //
222 : // @param name The section name.
223 : // @returns the section if one is found, NULL otherwise.
224 : Section* FindSection(const base::StringPiece& name);
225 :
226 : // Find or adds section with the given name.
227 : //
228 : // If a section with the given name already exists, updates its
229 : // characteristics and returns it. Otherwise, creates a new section and
230 : // returns it. If multiple sections exist with the given name, the first
231 : // one encountered is returned.
232 : //
233 : // TODO(chrisha): The semantics of this function are a little odd. It would
234 : // make more sense for it to return only if a section with matching name
235 : // AND characteristics is found, otherwise to create a new one.
236 : //
237 : // @param name The section name.
238 : // @param characteristics The section characteristics.
239 : // @returns the new or found section.
240 : Section* FindOrAddSection(const base::StringPiece& name,
241 : uint32 characteristics);
242 :
243 : // Removes the given section from the BlockGraph.
244 : //
245 : // The section must belong to this block graph. Be aware that this can leave
246 : // Blocks with dangling section_ids.
247 : //
248 : // @param section The section to remove.
249 : // @returns true on success, false otherwise.
250 : bool RemoveSection(Section* section);
251 :
252 : // Removes the section with the given id from the BlockGraph.
253 : //
254 : // @param section The id of the section to remove.
255 : // @returns true on success, false otherwise.
256 : bool RemoveSectionById(SectionId id);
257 :
258 : // Add @p block of type @p type and @p size and
259 : // return the new block.
260 : // @returns the new block.
261 : Block* AddBlock(BlockType type, Size size, const base::StringPiece& name);
262 :
263 : // Deletes the given block from the BlockGraph. The block must belong to this
264 : // block graph, and have no references or referrers. Returns true on success,
265 : // false otherwise. On failure, the BlockGraph has not been changed.
266 : bool RemoveBlock(Block* block);
267 :
268 : // Deletes the block with the given @p id from the block graph. The block id
269 : // must be valid, and the block must have no references or referrers. Returns
270 : // true on success, false otherwise. On failure, the BlockGraph has not been
271 : // changed.
272 : bool RemoveBlockById(BlockId id);
273 :
274 : // Accessors.
275 E : const SectionMap& sections() const { return sections_; }
276 E : SectionMap& sections_mutable() { return sections_; }
277 E : const BlockMap& blocks() const { return blocks_; }
278 E : BlockMap& blocks_mutable() { return blocks_; }
279 :
280 : // @{
281 : // Retrieve the section with the given id.
282 : //
283 : // @param id The id of the section to retrieve.
284 : // @returns the section in question or NULL if no such section.
285 : Section* GetSectionById(SectionId id);
286 : const Section* GetSectionById(SectionId id) const;
287 : // @}
288 :
289 : // @{
290 : // Retrieve the block with id.
291 : // @returns the block in question or NULL if no such block.
292 : Block* GetBlockById(BlockId id);
293 : const Block* GetBlockById(BlockId id) const;
294 : // @}
295 :
296 : const std::string& InternString(const base::StringPiece& str);
297 :
298 : private:
299 : // Give BlockGraphSerializer access to our innards for serialization.
300 : friend BlockGraphSerializer;
301 :
302 : // Removes a block by the iterator to it. The iterator must be valid.
303 : bool RemoveBlockByIterator(BlockMap::iterator it);
304 :
305 : // All sections we contain.
306 : SectionMap sections_;
307 :
308 : // Our section ID allocator.
309 : SectionId next_section_id_;
310 :
311 : // All blocks we contain.
312 : BlockMap blocks_;
313 :
314 : // Our block ID allocator.
315 : BlockId next_block_id_;
316 :
317 : // A set of internalized string.
318 : std::set<std::string> string_table_;
319 : };
320 :
321 : // The BlockGraph maintains a list of sections, and each block belongs
322 : // to one of them. This is the set of information we keep regarding them.
323 : struct BlockGraph::Section {
324 : // Default constructor. Required for serialization.
325 E : Section() : id_(kInvalidSectionId), characteristics_(0) {
326 E : }
327 :
328 : // Full constructor.
329 : //
330 : // @param id The section id. This must not be kInvalidSectionId.
331 : // @param name The name of the section. Must not be empty or NULL.
332 : // @param characteristics The characteristics of the section.
333 E : Section(SectionId id, const base::StringPiece& name, uint32 characteristics)
334 : : id_(id), name_(), characteristics_(characteristics) {
335 E : DCHECK_NE(kInvalidSectionId, id);
336 E : DCHECK(name != NULL);
337 E : name.CopyToString(&name_);
338 E : DCHECK(!name_.empty());
339 E : }
340 :
341 : // Get the id of this section.
342 : //
343 : // @returns the id of the section.
344 E : SectionId id() const { return id_; }
345 :
346 : // Get the name of this section.
347 : //
348 : // @returns the section name.
349 E : const std::string& name() const { return name_; }
350 :
351 : // Sets the name for this section.
352 : //
353 : // @param name The name of the section. If NULL or empty, this will fail.
354 : // @returns true if the name is set, false otherwise.
355 : bool set_name(const base::StringPiece& name);
356 :
357 : // Get the characteristics of this section.
358 : //
359 : // @returns the section characteristics.
360 E : uint32 characteristics() const { return characteristics_; }
361 :
362 : // Sets the characteristics for this section.
363 : //
364 : // @param characteristics The new characteristics to set.
365 E : void set_characteristics(uint32 characteristics) {
366 E : characteristics_ = characteristics;
367 E : }
368 :
369 : // Sets a one or more additional characteristics for this section.
370 : //
371 : // @param characteristic The new characteristic(s) to set for this section.
372 E : void set_characteristic(uint32 characteristic) {
373 E : characteristics_ |= characteristic;
374 E : }
375 :
376 : // Clears one or more characteristics for this section.
377 : //
378 : // @param characteristic The characteristic(s) to clear for this section.
379 E : void clear_characteristic(uint32 characteristic) {
380 E : characteristics_ &= ~characteristic;
381 E : }
382 :
383 : // @name Serialization functions.
384 : // @{
385 : bool Save(core::OutArchive* out_archive) const;
386 : bool Load(core::InArchive* in_archive);
387 : // @}
388 :
389 : // A simple comparison operator for serialization tests.
390 E : bool operator==(const Section& other) const {
391 : return id_ == other.id_ && name_ == other.name_ &&
392 E : characteristics_ == other.characteristics_;
393 E : }
394 :
395 : // A not-equal comparison operator.
396 E : bool operator!=(const Section& other) const {
397 E : return !operator==(other);
398 E : }
399 :
400 : private:
401 : // The id of the section. This has no particular meaning other than as a way
402 : // to identify sections uniquely.
403 : SectionId id_;
404 : // The name of the section. This will be truncated to a max of 8 characters
405 : // on output.
406 : std::string name_;
407 : // The section characteristics, a bitmask of IMAGE_SCN_* values.
408 : uint32 characteristics_;
409 : };
410 :
411 : // A label denotes the beginning (or end) of a sub-region within a (code)
412 : // block. In particular, a code label represents an instruction boundary
413 : // at which disassembly can begin and a data label represents the beginning
414 : // of embedded data.
415 : class BlockGraph::Label {
416 : public:
417 : // Default constructor.
418 E : Label() : attributes_(0) {
419 E : }
420 :
421 : // Full constructor.
422 E : Label(const base::StringPiece& name, LabelAttributes attributes)
423 : : name_(name.begin(), name.end()), attributes_(attributes) {
424 E : }
425 :
426 : // Copy construction.
427 E : Label(const Label& other)
428 : : name_(other.name_), attributes_(other.attributes_) {
429 E : }
430 :
431 : // @name Accessors.
432 : // @{
433 E : const std::string& name() const { return name_; }
434 : // @}
435 :
436 : // A helper function for logging and debugging.
437 : std::string ToString() const;
438 :
439 : // Equality comparator for unittesting.
440 E : bool operator==(const Label& other) const {
441 E : return name_ == other.name_ && attributes_ == other.attributes_;
442 E : }
443 :
444 : // The label attributes are a bitmask. You can set them wholesale,
445 : // or set and clear them individually by bitmasking.
446 E : LabelAttributes attributes() const { return attributes_; }
447 E : void set_attributes(LabelAttributes attributes) { attributes_ = attributes; }
448 :
449 : // Set or clear one or more attributes.
450 E : void set_attribute(LabelAttributes attribute) { attributes_ |= attribute; }
451 E : void clear_attribute(LabelAttributes attribute) { attributes_ &= ~attribute; }
452 :
453 : // Determines if all or any of the given attributes are set.
454 E : bool has_attributes(LabelAttributes attributes) const {
455 E : return (attributes_ & attributes) == attributes;
456 E : }
457 E : bool has_any_attributes(LabelAttributes attributes) const {
458 E : return (attributes_ & attributes) != 0;
459 E : }
460 :
461 : // @returns true if this label is valid, false otherwise.
462 : bool IsValid() const;
463 :
464 : // Tests a set of label attributes for validity.
465 : // @param attributes the attributes to test.
466 : // @returns true if the provided attributes are valid, false otherwise.
467 : static bool AreValidAttributes(LabelAttributes attributes);
468 :
469 : private:
470 : // The name by which this label is known.
471 : std::string name_;
472 :
473 : // The disposition of the bytes found at this label.
474 : LabelAttributes attributes_;
475 : };
476 :
477 : // A block represents a block of either code or data.
478 : //
479 : // Since blocks may be split and up and glued together in arbitrary ways, each
480 : // block maintains an address-space over its data, associating ranges of block
481 : // data to ranges of bytes in the original image. This effectively encodes OMAP
482 : // data, allowing the PDB file to be updated.
483 : //
484 : // Each block also stores references to other blocks in the graph, their
485 : // relative location within the block and their type and size.
486 : //
487 : // Each block has a set of attributes, including a size, a name and a
488 : // "current" address. Most of those attributes are mutable, and are set in the
489 : // process of creating and manipulating images and graph address spaces.
490 : class BlockGraph::Block {
491 : public:
492 : // Set of the blocks that have a reference to this block.
493 : // This is keyed on block and source offset (not destination offset),
494 : // to allow one to easily locate and remove the backreferences on change or
495 : // deletion.
496 : typedef std::pair<Block*, Offset> Referrer;
497 : typedef std::set<Referrer> ReferrerSet;
498 :
499 : // Map of references that this block makes to other blocks.
500 : typedef std::map<Offset, Reference> ReferenceMap;
501 :
502 : // Represents a range of data in this block.
503 : typedef core::AddressRange<Offset, Size> DataRange;
504 :
505 : // Represents a range of data in the original image.
506 : typedef core::AddressRange<RelativeAddress, Size> SourceRange;
507 :
508 : // A map between bytes in this block and bytes in the original image.
509 : typedef core::AddressRangeMap<DataRange, SourceRange> SourceRanges;
510 :
511 : // Typed labels associated with various offsets in the block. Some of these
512 : // labels (of type CODE_LABEL) represent code start points for disassembly
513 : // while others (of type DATA_LABEL) represent the start of embedded data
514 : // within the block. Note that, while possible, it is NOT guaranteed that
515 : // all basic blocks are marked with a label. Basic block decomposition should
516 : // disassemble from the code labels to discover all basic blocks.
517 : typedef std::map<Offset, Label> LabelMap;
518 :
519 : Block(BlockId id,
520 : BlockType type,
521 : Size size,
522 : const base::StringPiece& name,
523 : BlockGraph* block_graph);
524 : ~Block();
525 :
526 : // Accessors.
527 E : BlockId id() const { return id_; }
528 E : BlockType type() const { return type_; }
529 E : void set_type(BlockType type) { type_ = type; }
530 :
531 E : Size size() const { return size_; }
532 :
533 : // Set the total size of the block. Note that allocated data_size_ must
534 : // always be less than or equal to the total size.
535 E : void set_size(Size size) {
536 E : DCHECK_LE(data_size_, size);
537 E : size_ = size;
538 E : }
539 :
540 E : const std::string& name() const { return name_; }
541 E : void set_name(const base::StringPiece& name) { name.CopyToString(&name_); }
542 :
543 E : Size alignment() const { return alignment_; }
544 E : void set_alignment(Size alignment) {
545 : // Ensure that alignment is a non-zero power of two.
546 E : DCHECK(common::IsPowerOfTwo(alignment));
547 E : alignment_ = alignment;
548 E : }
549 :
550 : // The address of the block is set any time the block is assigned
551 : // an address in an address space.
552 E : RelativeAddress addr() const { return addr_; }
553 E : void set_addr(RelativeAddress addr) { addr_ = addr; }
554 :
555 : // The section ID for the block. These IDs are wrt to the SectionMap in the
556 : // parent BlockGraph.
557 E : SectionId section() const { return section_; }
558 E : void set_section(SectionId section) { section_ = section; }
559 :
560 : // The block attributes are a bitmask. You can set them wholesale,
561 : // or set and clear them individually by bitmasking.
562 E : BlockAttributes attributes() const { return attributes_; }
563 E : void set_attributes(BlockAttributes attributes) { attributes_ = attributes; }
564 :
565 : // Set or clear one or more attributes.
566 E : void set_attribute(BlockAttributes attribute) { attributes_ |= attribute; }
567 E : void clear_attribute(BlockAttributes attribute) {
568 E : attributes_ &= ~attribute;
569 E : }
570 :
571 : // This is true iff data_ is in the ownership of the block.
572 : // Iff true, the block will delete [] data_ on destruction or when
573 : // data is overwritten.
574 E : bool owns_data() const { return owns_data_; }
575 :
576 : // Makes room for the given amount of data at the given offset. This is
577 : // special in that it will patch up any labels, source ranges and referrers
578 : // that land beyond the newly created data, shifting them to the right by
579 : // @p size. If the data for this block is actually allocated it will also
580 : // patch up the allocated data by zeroing the newly allocate range of data,
581 : // and shifting the tail by @p size. If the new data is strictly implicit
582 : // (offset > data_size), then the allocated data is not affected in any way
583 : // unless @p always_allocate_data is true.
584 : //
585 : // @param offset the offset at which to insert the new data.
586 : // @param size the size of the new data to be inserted.
587 : // @param always_allocate_data if true, then data_size will be grown if
588 : // necessary to ensure that the newly created data can be written.
589 : // @pre 0 <= offset <= size()
590 : void InsertData(Offset offset, Size size, bool always_allocate_data);
591 :
592 : // Removes the data in the given range. This will refuse to remove labels,
593 : // references and referrers that land in the range, and will fail if any
594 : // exist. It will also shift any labels, references and referrers that land
595 : // beyond the end of the removed range. Source ranges will also be fixed. If
596 : // the removed range lies within the initialized data then the data will also
597 : // be truncated/shifted as necessary.
598 : //
599 : // @param offset the offset at which to remove data.
600 : // @param size the size of the data to remove, in bytes.
601 : // @returns true on success, false otherwise.
602 : // @pre 0 <= offset <= size
603 : bool RemoveData(Offset offset, Size size);
604 :
605 : // Performs an inline resize of data in a BlockGraph. If the data is shrinking
606 : // this equates to a RemoveData operation. If it is growing it equates to an
607 : // InsertData operation.
608 : //
609 : // @param offset the offset of the data to resize.
610 : // @param current_size the current size of the data to resize.
611 : // @param new_size the desired size of the data.
612 : // @param always_allocate_data if true, then data_size will be grown if
613 : // necessary to ensure that the resized data can be written.
614 : // @returns true on success, false otherwise.
615 : // @pre 0 <= offset <= size
616 : bool InsertOrRemoveData(Offset offset, Size current_size, Size new_size,
617 : bool always_allocate_data);
618 :
619 : // Set the data the block refers to.
620 : // @param data NULL or the data this block refers to.
621 : // The underlying data must outlive this block.
622 : // @param data_size the size of data, or zero if data == NULL.
623 : // @pre data_size <= size().
624 : void SetData(const uint8* data, size_t data_size);
625 :
626 : // Allocates and returns a new data buffer of the given size. The returned
627 : // data will have been initialized to zero.
628 : // @pre data_size <= size().
629 : uint8* AllocateData(size_t data_size);
630 :
631 : // Makes a copy of data, returns a pointer to the copy.
632 : // @pre data_size <= size().
633 : uint8* CopyData(size_t data_size, const void* data);
634 :
635 : // Resizes data to new_size by truncating or zero-extending the current data.
636 : // @pre new_size <= size().
637 : const uint8* ResizeData(size_t new_size);
638 :
639 : // Returns a mutable copy of the block's data. If the block doesn't own
640 : // the data on entry, it'll be copied and the copy returned to the caller.
641 : uint8* GetMutableData();
642 :
643 : // The data bytes the block refers to.
644 E : const uint8* data() const { return data_; }
645 :
646 : // The data size may be smaller than the block size (see size()),
647 : // when the block e.g. refers to data that's all or part
648 : // zero-initialized by the linker/loader.
649 E : size_t data_size() const { return data_size_; }
650 :
651 E : const ReferenceMap& references() const { return references_; }
652 E : const ReferrerSet& referrers() const { return referrers_; }
653 E : const SourceRanges& source_ranges() const { return source_ranges_; }
654 E : SourceRanges& source_ranges() { return source_ranges_; }
655 E : const LabelMap& labels() const { return labels_; }
656 :
657 : // Returns true if there are any other blocks holding a reference to this one.
658 : bool HasExternalReferrers() const;
659 :
660 : // Set the reference at @p offset to @p ref.
661 : // If there's a pre-existing reference at @p offset, this overrides it.
662 : // @param offset offset of the reference into this block.
663 : // @param ref the reference to add.
664 : // @returns true iff this inserts a new reference.
665 : bool SetReference(Offset offset, const Reference& ref);
666 :
667 : // Retrieve the reference at @p offset if one exists.
668 : // @param reference on success returns the reference @p offset.
669 : // @returns true iff there was a reference at @p offset.
670 : bool GetReference(Offset offset, Reference* reference) const;
671 :
672 : // Remove the reference at @p offset.
673 : // @returns true iff there was a reference at @p offset.
674 : bool RemoveReference(Offset offset);
675 :
676 : // Remove all references from this block. This is handy when removing a block
677 : // from the block graph.
678 : bool RemoveAllReferences();
679 :
680 : // Set a label to @p offset.
681 : // A label in code marks the location of the start of an instruction -
682 : // e.g. a location where disassembly can usefully commence. Labels
683 : // appear to be inserted by the VS tool chain where e.g. a switch
684 : // statement is implemented with a jump table, to note the location
685 : // of the jump destinations.
686 : // @param offset the offset of the label to set.
687 : // @param name the name of the label.
688 : // @param attributes the attributes of the label.
689 : // @returns true iff a new label is inserted.
690 : // @note that only one label can exist at each offset, and the first
691 : // label set at any offset will stay there.
692 : // @{
693 : bool SetLabel(Offset offset, const Label& label);
694 : bool SetLabel(Offset offset,
695 : const base::StringPiece& name,
696 E : LabelAttributes attributes) {
697 E : return SetLabel(offset, Label(name, attributes));
698 E : }
699 : // @}
700 :
701 : // Gets the label at the given @p offset.
702 : // @param offset the offset of the label to get.
703 : // @param label the string to receive the label.
704 : // @return true if the label exists, false otherwise.
705 : bool GetLabel(Offset offset, Label* label) const;
706 :
707 : // Removes the label at the given @p offset.
708 : // @param offset the offset of the label to remove.
709 : // @return true if the label existed and was removed, false it it did not
710 : // exist.
711 : bool RemoveLabel(Offset offset);
712 :
713 : // Returns true iff the block has a label at @p offset.
714 : // @param offset the offset of the label to search for.
715 : bool HasLabel(Offset offset) const;
716 :
717 : // Change all references to this block to refer to @p new_block instead,
718 : // while offsetting each reference by @p offset.
719 : // @note this fails if any of the transferred references end up with offsets
720 : // less than zero, or greater than new_block->size().
721 : // @returns true iff all references were transferred successfully.
722 : bool TransferReferrers(Offset offset, Block* new_block);
723 :
724 : // Returns true if this block contains the given range of bytes.
725 : bool Contains(RelativeAddress address, size_t size) const;
726 :
727 : protected:
728 : // Give BlockGraph access to our innards for serialization.
729 : friend class BlockGraph;
730 : // Give BlockGraphSerializer access to our innards for serialization.
731 : friend class BlockGraphSerializer;
732 :
733 : // This constructor is used by serialization.
734 : explicit Block(BlockGraph* block_graph);
735 :
736 : // Allocates and returns a new data buffer of the given size. The returned
737 : // data buffer will not have been initialized in any way.
738 : uint8* AllocateRawData(size_t size);
739 :
740 : BlockId id_;
741 : BlockType type_;
742 : Size size_;
743 : Size alignment_;
744 : std::string name_;
745 : RelativeAddress addr_;
746 :
747 : // BlockGraph to which belongs this Block. A block can only belongs to one
748 : // Block Graph.
749 : BlockGraph * block_graph_;
750 :
751 : SectionId section_;
752 : BlockAttributes attributes_;
753 :
754 : ReferenceMap references_;
755 : ReferrerSet referrers_;
756 : SourceRanges source_ranges_;
757 : LabelMap labels_;
758 :
759 : // True iff data_ is ours to deallocate with delete [].
760 : // If this is false, data_ must be guaranteed to outlive the block.
761 : bool owns_data_;
762 : // A pointer to the code or data we represent.
763 : const uint8* data_;
764 : // Size of the above.
765 : size_t data_size_;
766 : };
767 :
768 : // A graph address space endows a graph with a non-overlapping ordering
769 : // on blocks, where each block occupies zero or one address ranges in the
770 : // address space. No two blocks may overlap in an address space.
771 : class BlockGraph::AddressSpace {
772 : public:
773 : typedef core::AddressSpace<RelativeAddress, BlockGraph::Size, Block*>
774 : AddressSpaceImpl;
775 : typedef AddressSpaceImpl::Range Range;
776 : typedef AddressSpaceImpl::RangeMap RangeMap;
777 : typedef AddressSpaceImpl::RangeMapIter RangeMapIter;
778 : typedef AddressSpaceImpl::RangeMapConstIter RangeMapConstIter;
779 : typedef AddressSpaceImpl::RangeMapIterPair RangeMapIterPair;
780 : typedef AddressSpaceImpl::RangeMapConstIterPair RangeMapConstIterPair;
781 :
782 : // Constructs a new empty address space.
783 : // @p start to @p start + @p size on @p graph.
784 : explicit AddressSpace(BlockGraph* graph);
785 :
786 : // Add a block of type @p type and @p size at @p address to our associated
787 : // graph, and return the new block.
788 : // @returns the new block, or NULL if the new block would overlap
789 : // an existing block.
790 : Block* AddBlock(BlockType type,
791 : RelativeAddress addr,
792 : Size size,
793 : const base::StringPiece& name);
794 :
795 : // Merges all blocks that intersect @p range into a single block.
796 : // Moves labels and references from the intersecting blocks to the
797 : // merged block, and changes referring blocks to refer to the new,
798 : // merged block. Removes the original blocks from the BlockGraph.
799 : // @returns the new, merged block if there was at least one intersecting
800 : // block in @p range, or NULL otherwise.
801 : Block* MergeIntersectingBlocks(const Range& range);
802 :
803 : // Insert existing block @p block at @p address.
804 : // @returns true on success, or false if the @p block would overlap
805 : // an existing block.
806 : bool InsertBlock(RelativeAddress addr, Block* block);
807 :
808 : // Returns a pointer to the block containing address, or NULL
809 : // if no block contains address.
810 : Block* GetBlockByAddress(RelativeAddress addr) const;
811 :
812 : // Returns a pointer to the block containing the address range
813 : // [address, address + size), or NULL if no block contains that
814 : // range.
815 : Block* GetContainingBlock(RelativeAddress addr, Size size) const;
816 :
817 : // Finds the first block, if any that intersects
818 : // [@p address, @p address + @p size).
819 : Block* GetFirstIntersectingBlock(RelativeAddress address, Size size);
820 :
821 : // Check whether the address space contains @p block.
822 : // @param block the block in question.
823 : // @returns true if the block is in the address space, false otherwise.
824 : bool ContainsBlock(const Block* block);
825 :
826 : // Locates all blocks that intersect [@p address, @p address + @p size).
827 : // @returns a pair of iterators that iterate over the found blocks.
828 : RangeMapConstIterPair GetIntersectingBlocks(RelativeAddress address,
829 : Size size) const;
830 : RangeMapIterPair GetIntersectingBlocks(RelativeAddress address, Size size);
831 :
832 : // Retrieve the address off @p block.
833 : // @param block the block in question.
834 : // @param addr on success, returns the address of @p block in this
835 : // address space.
836 : // @returns true on success, false if @p block is not in this
837 : // address space.
838 : bool GetAddressOf(const Block* block, RelativeAddress* addr) const;
839 :
840 : // Accessor.
841 E : BlockGraph* graph() { return graph_; }
842 E : const BlockGraph* graph() const { return graph_; }
843 :
844 E : RangeMapConstIter begin() const {
845 E : return address_space_.ranges().begin();
846 E : }
847 :
848 E : RangeMapConstIter end() const {
849 E : return address_space_.ranges().end();
850 E : }
851 :
852 E : size_t size() const {
853 E : return address_space_.ranges().size();
854 E : }
855 :
856 E : const AddressSpaceImpl& address_space_impl() const {
857 E : return address_space_;
858 E : }
859 :
860 : private:
861 : bool InsertImpl(RelativeAddress addr, Block* block);
862 :
863 : typedef stdext::hash_map<const Block*, RelativeAddress> BlockAddressMap;
864 :
865 : AddressSpaceImpl address_space_;
866 : BlockAddressMap block_addresses_;
867 : BlockGraph* graph_;
868 : };
869 :
870 : // Represents a reference from one block to another. References may be offset.
871 : // That is, they may refer to an object at a given location, but actually point
872 : // to a location that is some fixed distance away from that object. This allows,
873 : // for example, non-zero based indexing into a table. The object that is
874 : // intended to be dereferenced is called the 'base' of the offset.
875 : //
876 : // BlockGraph references are from a location (offset) in one block, to some
877 : // location in another block. The referenced block itself plays the role of the
878 : // 'base' of the reference, with the offset of the reference being stored as
879 : // an integer from the beginning of the block. However, basic block
880 : // decomposition requires breaking the block into smaller pieces and thus we
881 : // need to carry around an explicit base value, indicating which byte in the
882 : // block is intended to be referenced.
883 : //
884 : // A direct reference to a location will have the same value for 'base' and
885 : // 'offset'.
886 : //
887 : // Here is an example:
888 : //
889 : // /----------\
890 : // +---------------------------+
891 : // O | B | <--- Referenced block
892 : // +---------------------------+ B = base
893 : // \-----/ O = offset
894 : //
895 : class BlockGraph::Reference {
896 : public:
897 : Reference() :
898 E : type_(RELATIVE_REF), size_(0), referenced_(NULL), offset_(0), base_(0) {
899 E : }
900 :
901 : // @param type type of reference.
902 : // @param size size of reference.
903 : // @param referenced the referenced block.
904 : // @param offset offset from the beginning of the block of the location to be
905 : // explicitly referred to.
906 : // @param base offset into the block of the location actually being
907 : // referenced. This must be strictly within @p referenced.
908 : Reference(ReferenceType type,
909 : Size size,
910 : Block* referenced,
911 : Offset offset,
912 : Offset base)
913 : : type_(type),
914 : size_(size),
915 : referenced_(referenced),
916 : offset_(offset),
917 E : base_(base) {
918 E : DCHECK(IsValid());
919 E : }
920 :
921 : // Copy constructor.
922 : Reference(const Reference& other)
923 : : type_(other.type_),
924 : size_(other.size_),
925 : referenced_(other.referenced_),
926 : offset_(other.offset_),
927 E : base_(other.base_) {
928 E : }
929 :
930 : // Accessors.
931 E : ReferenceType type() const { return type_; }
932 E : Size size() const { return size_; }
933 E : Block* referenced() const { return referenced_; }
934 E : Offset offset() const { return offset_; }
935 E : Offset base() const { return base_; }
936 :
937 : // Determines if this is a direct reference. That is, if the actual location
938 : // being referenced (offset) and the intended location being referenced (base)
939 : // are the same.
940 : //
941 : // @returns true if the reference is direct, false otherwise.
942 E : bool IsDirect() const { return base_ == offset_; }
943 :
944 : // Determines if this is a valid reference, by imposing size constraints on
945 : // reference types, and determining if the base address of the reference is
946 : // strictly contained within the referenced block.
947 : //
948 : // @returns true if valid, false otherwise.
949 : bool IsValid() const;
950 :
951 E : bool operator==(const Reference& other) const {
952 : return type_ == other.type_ &&
953 : size_ == other.size_ &&
954 : referenced_ == other.referenced_ &&
955 : offset_ == other.offset_ &&
956 E : base_ == other.base_;
957 E : }
958 :
959 : // The maximum size that a reference may have. This needs to be kept in sync
960 : // with the expectations of IsValid().
961 : static const size_t kMaximumSize = 4;
962 :
963 : // Returns true if the given reference type and size combination is valid.
964 : static bool IsValidTypeSize(ReferenceType type, Size size);
965 :
966 : private:
967 : // Type of this reference.
968 : ReferenceType type_;
969 :
970 : // Size of this reference.
971 : // Absolute references are always pointer wide, but PC-relative
972 : // references can be 1, 2 or 4 bytes wide, which affects their range.
973 : Size size_;
974 :
975 : // The block referenced.
976 : Block* referenced_;
977 :
978 : // Offset into the referenced block.
979 : Offset offset_;
980 :
981 : // The base of the reference, as in offset in the block. This must be a
982 : // location strictly within the block.
983 : Offset base_;
984 : };
985 :
986 : // Commonly used container types.
987 : typedef std::vector<BlockGraph::Block*> BlockVector;
988 : typedef std::vector<const BlockGraph::Block*> ConstBlockVector;
989 :
990 : } // namespace block_graph
991 :
992 : #endif // SYZYGY_BLOCK_GRAPH_BLOCK_GRAPH_H_
|