1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #ifndef SYZYGY_PE_PE_FILE_PARSER_H_
16 : #define SYZYGY_PE_PE_FILE_PARSER_H_
17 :
18 : #include "base/callback.h"
19 : #include "syzygy/block_graph/block_graph.h"
20 : #include "syzygy/core/address.h"
21 : #include "syzygy/pe/pe_file.h"
22 :
23 : namespace pe {
24 :
25 : // Forward declaration.
26 : template <class ItemType> class PEFileStructPtr;
27 :
28 : // This class parses the PE image data in an PEImage instance, chunks out
29 : // the image header and various other PE image sections to an address space.
30 : class PEFileParser {
31 : public:
32 : typedef block_graph::BlockGraph BlockGraph;
33 : typedef core::RelativeAddress RelativeAddress;
34 :
35 : typedef base::Callback<bool(RelativeAddress,
36 : BlockGraph::ReferenceType,
37 : BlockGraph::Size,
38 : RelativeAddress)>
39 : AddReferenceCallback;
40 :
41 : // Callback that is invoked for every named import thunk that is parsed. The
42 : // arguments are as follows:
43 : // 1. const char* module_name
44 : // The name of the module being imported.
45 : // 2. const char* symbol_name
46 : // The name of the imported symbol.
47 : // 3. BlockGraph::Block* thunk
48 : // The block containing the thunk which will be initialized to point to
49 : // the symbol at runtime.
50 : typedef base::Callback<bool(const char*,
51 : const char*,
52 : BlockGraph::Block*)>
53 : OnImportThunkCallback;
54 :
55 : PEFileParser(const PEFile& image_file,
56 : BlockGraph::AddressSpace* address_space,
57 : const AddReferenceCallback& add_reference);
58 :
59 : struct PEHeader {
60 E : PEHeader() {
61 E : memset(this, 0, sizeof(*this));
62 E : }
63 :
64 : // The block that describes the DOS header, including the DOS stub.
65 : BlockGraph::Block* dos_header;
66 :
67 : // The block that describes the NT and the section headers.
68 : BlockGraph::Block* nt_headers;
69 :
70 : // The blocks that describe the data directory chunks.
71 : BlockGraph::Block* data_directory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
72 : };
73 :
74 E : void set_on_import_thunk(const OnImportThunkCallback& on_import_thunk) {
75 E : on_import_thunk_ = on_import_thunk;
76 E : }
77 :
78 : // Parses the image, chunks the various blocks it decomposes into and
79 : // invokes the AddReferenceCallback for all references encountered.
80 : bool ParseImage(PEHeader* pe_header);
81 :
82 : // Tables of thunks come in various flavours.
83 : enum ThunkTableType {
84 : // For parsing of normal imports.
85 : kImportNameTable,
86 : kImportAddressTable,
87 :
88 : // For parsing of delay-load imports.
89 : kDelayLoadImportNameTable,
90 : kDelayLoadImportAddressTable,
91 : kDelayLoadBoundImportAddressTable,
92 : };
93 :
94 : // Thunks in import tables may point to various discrete things. These depend
95 : // on the ThunkTableType and whether or not the table is bound.
96 : enum ThunkDataType {
97 : kNullThunkData, // Pointer values should all be NULL.
98 : kImageThunkData, // IMAGE_THUNK_DATA containing RelativeAddress to a string
99 : // in the image.
100 : kCodeInImageThunkData, // AbsoluteAddress in image.
101 : kCodeOutOfImageThunkData, // AbsoluteAddress out of image.
102 : kArbitraryThunkData, // Anything goes.
103 : };
104 :
105 : protected:
106 : // Parses the image header, chunks the various blocks it refers and
107 : // invokes the AddReferenceCallback for all references encountered.
108 : bool ParseImageHeader(PEHeader* pe_header);
109 :
110 : // These methods parse a given data directory section, chunk out the blocks
111 : // and create references as they're encountered.
112 : // @param dir the data directory entry.
113 : // @returns the chunked block for that directory, or NULL on error.
114 : // IMAGE_DIRECTORY_ENTRY_EXPORT
115 : BlockGraph::Block* ParseExportDir(const IMAGE_DATA_DIRECTORY& dir);
116 : // IMAGE_DIRECTORY_ENTRY_IMPORT
117 : BlockGraph::Block* ParseImportDir(const IMAGE_DATA_DIRECTORY& dir);
118 : // IMAGE_DIRECTORY_ENTRY_RESOURCE
119 : BlockGraph::Block* ParseResourceDir(const IMAGE_DATA_DIRECTORY& dir);
120 : // IMAGE_DIRECTORY_ENTRY_EXCEPTION
121 : BlockGraph::Block* ParseExceptionDir(const IMAGE_DATA_DIRECTORY& dir);
122 : // IMAGE_DIRECTORY_ENTRY_SECURITY
123 : BlockGraph::Block* ParseSecurityDir(const IMAGE_DATA_DIRECTORY& dir);
124 : // IMAGE_DIRECTORY_ENTRY_BASERELOC
125 : BlockGraph::Block* ParseRelocDir(const IMAGE_DATA_DIRECTORY& dir);
126 : // IMAGE_DIRECTORY_ENTRY_DEBUG
127 : BlockGraph::Block* ParseDebugDir(const IMAGE_DATA_DIRECTORY& dir);
128 : // IMAGE_DIRECTORY_ENTRY_ARCHITECTURE
129 : BlockGraph::Block* ParseArchitectureDir(const IMAGE_DATA_DIRECTORY& dir);
130 : // IMAGE_DIRECTORY_ENTRY_GLOBALPTR
131 : BlockGraph::Block* ParseGlobalDir(const IMAGE_DATA_DIRECTORY& dir);
132 : // IMAGE_DIRECTORY_ENTRY_TLS
133 : BlockGraph::Block* ParseTlsDir(const IMAGE_DATA_DIRECTORY& dir);
134 : // IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG
135 : BlockGraph::Block* ParseLoadConfigDir(const IMAGE_DATA_DIRECTORY& dir);
136 : // IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT
137 : BlockGraph::Block* ParseBoundImportDir(const IMAGE_DATA_DIRECTORY& dir);
138 : // IMAGE_DIRECTORY_ENTRY_IAT
139 : BlockGraph::Block* ParseIatDir(const IMAGE_DATA_DIRECTORY& dir);
140 : // IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT
141 : BlockGraph::Block* ParseDelayImportDir(const IMAGE_DATA_DIRECTORY& dir);
142 : // IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR
143 : BlockGraph::Block* ParseComDescriptorDir(const IMAGE_DATA_DIRECTORY& dir);
144 :
145 : private:
146 : // Counts the number of import thunks in the run starting at thunk_start.
147 : // @returns the number of thunks excluding the terminating sentinel, or
148 : // zero on error.
149 : size_t CountImportThunks(RelativeAddress thunk_start);
150 :
151 : // Parses the IAT/INT/BoundIAT starting at thunk_start.
152 : // @param thunk_start the RVA where the import thunks start.
153 : // @param is_bound true iff thunks are bound, in which case we tag absolute
154 : // references instead of relative. This may only be true if table_type is
155 : // kImportAddressTable or kDelayLoadImportAddressTable.
156 : // @param table_type the type of thunk table.
157 : // @param thunk_type human readable type for thunk.
158 : // @param import_name name of imported dll.
159 : // @returns true on success, false otherwise.
160 : bool ParseImportThunks(RelativeAddress thunk_start,
161 : size_t num_thunks,
162 : bool is_bound,
163 : ThunkTableType table_type,
164 : const char* thunk_type,
165 : const char* import_name);
166 : // Parse a single entry in an import table.
167 : // @param thunk_addr the address of the thunk.
168 : // @param thunk_data_type the type of data in the thunk.
169 : // @param thunk_type human readable type for thunk.
170 : // @param chunk_name if true, and thunk_data_type is kImageThunkData, will
171 : // parse out the referenced IMAGE_THUNK_DATA as a block.
172 : // @returns true on success, false otherwise.
173 : bool ParseImportThunk(RelativeAddress thunk_addr,
174 : ThunkDataType thunk_data_type,
175 : const char* thunk_type,
176 : const char* module_name,
177 : bool chunk_name);
178 :
179 : // Special handling for delay-load bound import address tables. We've seen
180 : // cases in actual binaries where these are malformed. This is generally not
181 : // an issue, but we handle these a little more loosely so that we can complete
182 : // the decomposition process. If such malformed tables are encountered, this
183 : // will causes a warning to be logged.
184 : // @param iat_addr the beginning of the delay-load bound IAT.
185 : // @param iat_size the expected size of the delay-load bound IAT.
186 : // @param iat_name the name to give the chunked block, or the label to
187 : // associate with iat_addr in an existing block.
188 : // @returns the block containing the delay-load IAT table.
189 : BlockGraph::Block* ChunkDelayBoundIATBlock(RelativeAddress iat_addr,
190 : size_t iat_size,
191 : const char* iat_name);
192 :
193 : BlockGraph::Block* AddBlock(BlockGraph::BlockType type,
194 : RelativeAddress addr,
195 : BlockGraph::Size size,
196 : const char* name);
197 :
198 : bool AddReference(RelativeAddress src,
199 : BlockGraph::ReferenceType type,
200 : BlockGraph::Size size,
201 : RelativeAddress dst);
202 :
203 : template <typename ItemType>
204 : bool AddRelative(const PEFileStructPtr<ItemType>& structure,
205 : const DWORD* item);
206 :
207 : template <typename ItemType>
208 : bool AddAbsolute(const PEFileStructPtr<ItemType>& structure,
209 : const DWORD* item);
210 :
211 : template <typename ItemType>
212 : bool MaybeAddAbsolute(const PEFileStructPtr<ItemType>& structure,
213 : const DWORD* item);
214 :
215 : template <typename ItemType>
216 : bool AddFileOffset(const PEFileStructPtr<ItemType>& structure,
217 : const DWORD* item);
218 :
219 : bool ParseResourceDirImpl(BlockGraph::Block* resource_block,
220 : size_t root_offset);
221 :
222 : // Pointer to a data dir parser function.
223 : typedef BlockGraph::Block* (PEFileParser::*ParseDirFunction)(
224 : const IMAGE_DATA_DIRECTORY& dir);
225 :
226 : struct DataDirParseEntry {
227 : int entry;
228 : const char* name;
229 : ParseDirFunction parser;
230 : };
231 :
232 : // Array of data directory parser entries used to parse the
233 : // sundry data directory entries.
234 : static const DataDirParseEntry parsers_[];
235 :
236 : const PEFile& image_file_;
237 : BlockGraph::AddressSpace* address_space_;
238 : AddReferenceCallback add_reference_;
239 : OnImportThunkCallback on_import_thunk_;
240 : };
241 :
242 : } // namespace pe
243 :
244 : #endif // SYZYGY_PE_PE_FILE_PARSER_H_
|