1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // The COFF decomposer parses a COFF file and constructs a corresponding
16 : // block graph and image layout.
17 : //
18 : // COFF files are expected to be compiled with function-level linking (/Gy
19 : // in MSVC), and are made of the following parts:
20 : // - a file header;
21 : // - a section table containing section headers;
22 : // - a symbol table followed by a string table;
23 : // - a chunk of raw data for each initialized section;
24 : // - and a relocation table for each section that needs one.
25 : //
26 : // The COFF decomposer creates blocks that mirror that organization:
27 : // - one block for the file and section headers;
28 : // - one block for the symbol table;
29 : // - and a separate block for the string table;
30 : // - one block for the raw data of each section;
31 : // - one unmapped block for each BSS section;
32 : // - and one block for each relocation table.
33 : //
34 : // The different blocks have been split in this way in anticipation of
35 : // modifications that may grow or shrink them independently; symbols may be
36 : // added without the need to shift the contents of the string table.
37 : //
38 : // When working with COFF files, relative addresses in the image layout are
39 : // to be interpreted as file offsets. The two can be converted from and to
40 : // freely, although, for the sake of consistency, file offsets should be
41 : // used in COFF-aware code (e.g., the decomposer and reader), while relative
42 : // addresses can continue to be used elsewhere in generic transforms working
43 : // on blocks.
44 : //
45 : // In addition, the decomposer attaches references to blocks to represent
46 : // connections between blocks that need to be preserved through the
47 : // transforms. There are three kinds of references created, which require
48 : // different handling when assembling and writing back a modified COFF
49 : // file. These differences should not affect other (non-COFF-aware)
50 : // transforms, however.
51 : //
52 : // - Pointer references, from headers and tables to other parts of the COFF
53 : // file, indicate actual addresses encoded at the source location of the
54 : // reference; the contents at the source address will require update
55 : // before writing back.
56 : //
57 : // - Relocation references, from raw section data to other sections,
58 : // represent COFF relocations. They are NOT encoded at the source address
59 : // of the reference. Instead, they should be translated to relocations
60 : // (replacing the associated relocation table) when recomposing a modified
61 : // COFF file.
62 : //
63 : // - Symbol references, from raw section data to entries within the symbol
64 : // table, are placeholders. They refer to things that are not physically
65 : // encoded in the COFF file, such as external functions or uninitialized
66 : // data. All such occurrences in a COFF file use the symbol table to
67 : // specify the target to look up. The references in the block graph need
68 : // to be translated to relocations along with relocation references.
69 :
70 : #ifndef SYZYGY_PE_COFF_DECOMPOSER_H_
71 : #define SYZYGY_PE_COFF_DECOMPOSER_H_
72 :
73 : #include <windows.h> // NOLINT
74 : #include <map>
75 :
76 : #include "syzygy/pe/coff_file.h"
77 : #include "syzygy/pe/image_layout.h"
78 :
79 m : namespace pe {
80 :
81 : // A CoffDecomposer extracts code and data from a CoffFile into an
82 : // ImageLayout, and the corresponding block graph.
83 : //
84 : // The block graph contains all data from the COFF file as well as
85 : // references for all locations that will need to be relocated on
86 : // output. References include relocations in the code and data sections, as
87 : // well as internal file offset pointers in headers and metadata, such that
88 : // writing back the COFF file only require patching those specific
89 : // references.
90 m : class CoffDecomposer {
91 m : public:
92 : // The separator that is used between the section and COMDAT name in
93 : // a block name.
94 m : static const char kSectionComdatSep[];
95 :
96 : // Initialize the decomposer for the given image file.
97 : //
98 : // @param image_file the image file to decompose; must outlive the
99 : // instance of the decomposer.
100 m : explicit CoffDecomposer(const CoffFile& image_file);
101 :
102 : // Decompose the image file into an image layout, including a block
103 : // graph. The resulting block graph contains the breakdown of code and
104 : // data blocks with typed references; the remaining components of the
105 : // layout hold information on where the blocks resided in the original
106 : // image.
107 : //
108 : // @param image_layout the image layout to populate.
109 : // @returns true on success, false on failure.
110 : //
111 : // @note In COFF decomposition, the relative addresses in the block graph
112 : // and image layout are equal to the file offsets of the COFF file.
113 m : bool Decompose(ImageLayout* image_layout);
114 :
115 m : private:
116 m : typedef block_graph::BlockGraph BlockGraph;
117 m : typedef core::FileOffsetAddress FileOffsetAddress;
118 m : typedef core::RelativeAddress BlockGraphAddress;
119 :
120 : // A map from section indexes to the corresponding block in the
121 : // block graph.
122 m : typedef std::map<size_t, BlockGraph::Block*> SectionBlockMap;
123 :
124 : // @name Decomposition steps.
125 : // @{
126 : // Add non-section contents as blocks with associated references in the
127 : // block graph.
128 : //
129 : // @returns true on success, false on failure.
130 m : bool CreateBlocksAndReferencesFromNonSections();
131 :
132 : // Add header contents as blocks with associated references in the block
133 : // graph.
134 : //
135 : // @returns true on success, false on failure.
136 m : bool CreateBlocksAndReferencesFromHeaders();
137 :
138 : // Add the symbol table and string table as blocks with associated
139 : // references in the block graph.
140 : //
141 : // @returns true on success, false on failure.
142 m : bool CreateBlocksAndReferencesFromSymbolAndStringTables();
143 :
144 : // Add every relocation table as a block. We do not track references
145 : // originating from the relocation tables, as that information is already
146 : // stored as parsed references from the section data directly to the
147 : // destination.
148 : //
149 : // When needed, the relocation tables will have to be regenerated from the
150 : // references in each section, in accordance with an up-to-date symbol
151 : // table. Reference types and sizes contain all the information necessary
152 : // to infer relocation entries; addresses will need to be converted to
153 : // symbols through the symbol table.
154 : //
155 : // @returns true on success, false on failure.
156 m : bool CreateBlocksFromRelocationTables();
157 :
158 : // Add section contents as blocks in the block graph.
159 : //
160 : // @returns true on success, false on failure.
161 m : bool CreateBlocksFromSections();
162 :
163 : // Add references to section blocks created with
164 : // CreateBlocksFromSections(), computed from the relocation table
165 : // associated with each section.
166 : //
167 : // @returns true on success, false on failure.
168 m : bool CreateReferencesFromRelocations();
169 :
170 : // Add references to debug section blocks created with
171 : // CreateBlocksFromSections(), for debug symbol offsets and pointers not
172 : // covered by relocations.
173 : //
174 : // Also add attributes to blocks based on debug information.
175 : //
176 : // @returns true on success, false on failure.
177 m : bool CreateReferencesFromDebugInfo();
178 :
179 : // Add jump and case table labels to code blocks, based on STATIC entries
180 : // present in the COFF symbol table.
181 : //
182 : // @returns true on success, false on failure.
183 m : bool CreateLabelsFromSymbols();
184 : // @}
185 :
186 : // Create a new block with the given properties, and data read from the
187 : // image file.
188 : //
189 : // @param type the type of block to create.
190 : // @param addr the offset where the block starts in the COFF file.
191 : // @param size the size of data, in bytes.
192 : // @param name the name of the block, which needs not be unique, but
193 : // should be informative.
194 : // @returns the new block, or NULL if it would overlap with an existing
195 : // block.
196 m : BlockGraph::Block* CreateBlock(BlockGraph::BlockType type,
197 m : FileOffsetAddress addr,
198 m : BlockGraph::Size size,
199 m : const base::StringPiece& name);
200 :
201 : // Create a reference as specified, ignoring any existing identical
202 : // reference at the same source offset.
203 : //
204 : // @param src_addr the source offset where the reference is located.
205 : // @param ref_type the type of reference to create.
206 : // @param ref_size the size of the reference to create.
207 : // @param target the destination block of the reference.
208 : // @param offset the offset within @p target to the destination.
209 : // @returns true on success, false on failure.
210 m : bool CreateReference(FileOffsetAddress src_addr,
211 m : BlockGraph::ReferenceType ref_type,
212 m : BlockGraph::Size ref_size,
213 m : BlockGraph::Block* target,
214 m : BlockGraph::Offset offset);
215 :
216 : // Create a reference to the specified file offset, ignoring any existing
217 : // identical reference at the same source offset.
218 : //
219 : // @param src_addr the source offset where the reference is located.
220 : // @param ref_type the type of reference to create.
221 : // @param ref_size the size of the reference to create.
222 : // @param dst_addr the destination, as an offset within the COFF file.
223 : // @returns true on success, false on failure.
224 m : bool CreateFileOffsetReference(FileOffsetAddress src_addr,
225 m : BlockGraph::ReferenceType ref_type,
226 m : BlockGraph::Size ref_size,
227 m : FileOffsetAddress dst_addr);
228 :
229 : // Create a reference to the specified section offset, ignoring any
230 : // existing identical reference at the same source offset.
231 : //
232 : // @param src_addr the source offset where the reference is located.
233 : // @param ref_type the type of reference to create.
234 : // @param ref_size the size of the reference to create.
235 : // @param section_index the destination section of the reference.
236 : // @param section_offset the offset to the destination within the section.
237 : // @returns true on success, false on failure.
238 m : bool CreateSectionOffsetReference(FileOffsetAddress src_addr,
239 m : BlockGraph::ReferenceType ref_type,
240 m : BlockGraph::Size ref_size,
241 m : size_t section_index,
242 m : size_t section_offset);
243 :
244 : // Create a reference to the specified symbol, ignoring any existing
245 : // identical reference at the same source offset.
246 : //
247 : // References to symbols differ from normal references in that they may
248 : // point either to the actual target of the symbol, or to the symbol
249 : // itself if it is unbound in the object file (external symbol).
250 : //
251 : // If @p symbol is an external symbol, then @p offset is ignored and
252 : // should be set to either 0 (for a section reference) or the value of the
253 : // Value field of @p symbol (for a normal reference).
254 : //
255 : // @param src_addr the source offset where the reference is located.
256 : // @param ref_type the type of reference to create.
257 : // @param ref_size the size of the reference to create.
258 : // @param symbol the destination symbol of the reference.
259 : // @param offset the offset from @p symbol to the destination; ignored for
260 : // external symbols.
261 : // @returns true on success, false on failure.
262 m : bool CreateSymbolOffsetReference(FileOffsetAddress src_addr,
263 m : BlockGraph::ReferenceType ref_type,
264 m : BlockGraph::Size ref_size,
265 m : const IMAGE_SYMBOL* symbol,
266 m : size_t offset);
267 :
268 : // Translate a file offset to a block and offset within that block.
269 : // Translated offsets are always positive or zero and fall within
270 : // the boundaries of the block.
271 : //
272 : // @param addr the file offset to translate.
273 : // @param block where to store the resulting block pointer.
274 : // @param offset where to store the resulting offset.
275 : // @returns true on success, false on failure; on failure, the
276 : // contents of the output arguments is unspecified.
277 m : bool FileOffsetToBlockOffset(FileOffsetAddress addr,
278 m : BlockGraph::Block** block,
279 m : BlockGraph::Offset* offset);
280 :
281 : // Translate a section index and offset to a block and offset within that
282 : // block. Translated offsets are always positive or zero and fall within
283 : // the boundaries of the block.
284 : //
285 : // @param section_index the index of the section.
286 : // @param section_offset the offset within that section.
287 : // @param block where to store the resulting block pointer.
288 : // @param offset where to store the resulting offset.
289 : // @returns true on success, false on failure; on failure, the
290 : // contents of the output arguments is unspecified.
291 m : bool SectionOffsetToBlockOffset(size_t section_index, size_t section_offset,
292 m : BlockGraph::Block** block,
293 m : BlockGraph::Offset* offset);
294 :
295 : // Convert a file offset to a relative address suitable for use in the
296 : // block graph and associated structures. The values of the address
297 : // objects will be equal.
298 : //
299 : // @param addr the file offset to translate.
300 : // @returns a relative address with the same value as the file offset.
301 m : BlockGraphAddress FileOffsetToBlockGraphAddress(FileOffsetAddress addr);
302 :
303 : // The CoffFile that is being decomposed.
304 m : const CoffFile& image_file_;
305 :
306 : // A map from section indexes to the corresponding block in the block
307 : // graph.
308 m : SectionBlockMap section_block_map_;
309 :
310 : // @name Temporaries that are only valid while inside of Decompose().
311 : // @{
312 : // The image layout we are building.
313 m : ImageLayout* image_layout_;
314 :
315 : // The image address space we are decomposing to.
316 m : BlockGraph::AddressSpace* image_;
317 : // @}
318 m : };
319 :
320 m : } // namespace pe
321 :
322 : #endif // SYZYGY_PE_COFF_DECOMPOSER_H_
|