1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #ifndef SYZYGY_PE_PE_FILE_H_
16 : #define SYZYGY_PE_PE_FILE_H_
17 :
18 : #include <windows.h>
19 : #include <winnt.h>
20 : #include <map>
21 : #include <set>
22 : #include <string>
23 : #include <vector>
24 :
25 : #include "base/logging.h"
26 : #include "base/files/file_path.h"
27 : #include "base/files/file_util.h"
28 : #include "syzygy/core/address.h"
29 : #include "syzygy/core/address_space.h"
30 : #include "syzygy/core/section_offset_address.h"
31 : #include "syzygy/core/serialization.h"
32 : #include "syzygy/pe/pe_coff_file.h"
33 :
34 : namespace pe {
35 :
36 : // Traits of the PE address space.
37 : struct PEAddressSpaceTraits {
38 : // Native addresses for PE files: relative virtual addresses (RVAs).
39 : typedef core::RelativeAddress AddressType;
40 :
41 : // Native sizes for PE files.
42 : typedef size_t SizeType;
43 :
44 : // @returns an address different from all valid addresses for the
45 : // specified address type.
46 E : static const AddressType invalid_address() {
47 E : return AddressType::kInvalidAddress;
48 E : }
49 :
50 : // @returns the address at which to insert global headers.
51 E : static const AddressType header_address() {
52 E : return AddressType(0);
53 E : }
54 :
55 : // Return the RVA to which the section will be mapped when the
56 : // program is loaded.
57 : //
58 : // @param header the section header.
59 : // @returns the RVA of the section.
60 E : static AddressType GetSectionAddress(const IMAGE_SECTION_HEADER& header) {
61 E : return AddressType(header.VirtualAddress);
62 E : }
63 :
64 : // Return the number of bytes that will be occupied by the section
65 : // when the program is loaded, including any run-time padding.
66 : //
67 : // @param header the section header.
68 : // @returns the run-time size of the section.
69 E : static SizeType GetSectionSize(const IMAGE_SECTION_HEADER& header) {
70 E : return SizeType(header.Misc.VirtualSize);
71 E : }
72 : };
73 :
74 : // A raw, sparse, representation of a PE file. It offers a view of the
75 : // contents of the file as would be mapped into memory, if the program
76 : // were loaded.
77 : template <typename ImageNtHeaders, DWORD MagicValidation>
78 : class PEFileBase : public PECoffFile<PEAddressSpaceTraits> {
79 : public:
80 : struct Signature;
81 :
82 : typedef core::AbsoluteAddress AbsoluteAddress;
83 : typedef core::FileOffsetAddress FileOffsetAddress;
84 : typedef core::RelativeAddress RelativeAddress;
85 : typedef core::SectionOffsetAddress SectionOffsetAddress;
86 :
87 : // A set of locations in the RVA address space where an address is
88 : // present and needs to be relocated.
89 : typedef std::set<RelativeAddress> RelocSet;
90 :
91 : // A map from locations in the RVA address space where an address is
92 : // present and needs to be relocated, to the absolute addresses they
93 : // refer to.
94 : typedef std::map<RelativeAddress, AbsoluteAddress> RelocMap;
95 :
96 : // Information about a single export.
97 : struct ExportInfo;
98 : typedef std::vector<ExportInfo> ExportInfoVector;
99 :
100 : // Information about a single import.
101 : struct ImportInfo;
102 : typedef std::vector<ImportInfo> ImportInfoVector;
103 :
104 : // Information about all imports for a given DLL.
105 : struct ImportDll;
106 : typedef std::vector<ImportDll> ImportDllVector;
107 :
108 : // Allow overloading of the following functions inherited from
109 : // PECoffFile.
110 : using PECoffFile<PEAddressSpaceTraits>::ReadImage;
111 : using PECoffFile<PEAddressSpaceTraits>::ReadImageString;
112 : using PECoffFile<PEAddressSpaceTraits>::GetImageData;
113 : using PECoffFile<PEAddressSpaceTraits>::Contains;
114 : using PECoffFile<PEAddressSpaceTraits>::GetSectionIndex;
115 : using PECoffFile<PEAddressSpaceTraits>::GetSectionHeader;
116 :
117 : // Construct a PEFileBase object not yet bound to any file.
118 E : PEFileBase() : dos_header_(NULL), nt_headers_(NULL) {}
119 :
120 : // Destroy this PEFileBase object, invalidating all pointers obtained
121 : // through GetImageData(), or headers returned by corresponding
122 : // accessor methods.
123 E : ~PEFileBase() {}
124 :
125 : // Read in the image file at @p path, making its data
126 : // available. A PE file reader may only read a single file.
127 : //
128 : // @param path the path to the file to read.
129 : // @returns true on success, false on error.
130 : bool Init(const base::FilePath& path);
131 :
132 : // Retrieve the signature of this PE file. May only be called after
133 : // a file has been read with Init().
134 : //
135 : // @param signature the object to copy the signature to.
136 : void GetSignature(Signature* signature) const;
137 :
138 : // Decode relocation information from the image, inserting the
139 : // results into @p relocs.
140 : //
141 : // TODO(siggi): Consider folding this member into ReadRelocs.
142 : //
143 : // @param relocs the set to which relocations are to be added.
144 : // @returns true on success, false on error.
145 : bool DecodeRelocs(RelocSet* relocs) const;
146 :
147 : // Retrieve relocation target addresses for the specified set of
148 : // relocations.
149 : //
150 : // @param relocs the set of relocations to look up.
151 : // @param reloc_values the map to which relocation--target pairs are
152 : // to be added.
153 : // @returns true on success, false on error.
154 : bool ReadRelocs(const RelocSet& relocs, RelocMap* reloc_values) const;
155 :
156 : // Decode import information from the image.
157 : //
158 : // @param imports where to place the decoded imports.
159 : // @returns true on success, false on error.
160 : bool DecodeImports(ImportDllVector* imports) const;
161 :
162 : // Decode export information from the image.
163 : //
164 : // @param exports where to place the decoded exports.
165 : // @returns true on success, false on error.
166 : bool DecodeExports(ExportInfoVector* exports) const;
167 :
168 : // Translate a relative address to an absolute address, based on the
169 : // preferred loading address of this PE file.
170 : //
171 : // @param rel the address to translate.
172 : // @param abs where to place the resulting address.
173 : // @returns true on success, false on error.
174 : bool Translate(RelativeAddress rel, AbsoluteAddress* abs) const;
175 :
176 : // Translate an absolute address to a relative address, based on the
177 : // preferred loading address of this PE file.
178 : //
179 : // @param abs the address to translate.
180 : // @param rel where to place the resulting address.
181 : // @returns true on success, false on error.
182 : bool Translate(AbsoluteAddress abs, RelativeAddress* rel) const;
183 :
184 : // Translate a file offset present in the on-disk file to the
185 : // relative address it maps to at run-time.
186 : //
187 : // @param offs the file offset to translate.
188 : // @param rel where to place the resulting address.
189 : // @returns true on success, false on error.
190 : bool Translate(FileOffsetAddress offs, RelativeAddress* rel) const;
191 :
192 : // Translate a relative address to the file offset it is mapped from
193 : // in the on-disk file.
194 : //
195 : // @param rel the address to translate.
196 : // @param offs where to place the resulting address.
197 : // @returns true on success, false on error.
198 : bool Translate(RelativeAddress rel, FileOffsetAddress* offs) const;
199 :
200 : // Translate a relative address to an offset in a section, based on the
201 : // preferred loading address of this PE file.
202 : //
203 : // @param relative_address the address to translate.
204 : // @param section_offset_address where to place the resulting address.
205 : // @returns true on success, false on error.
206 : bool Translate(RelativeAddress relative_address,
207 : SectionOffsetAddress* section_offset_address) const;
208 :
209 : // Absolute address wrappers around the same-named methods from
210 : // PECoffFile, which deal with relative addresses. Each of the
211 : // following method is equivalent to applying Translate() to the
212 : // absolute address then calling the corresponding RVA-based method.
213 : //
214 : // @see pe::PECoffFile @{
215 : bool ReadImage(AbsoluteAddress addr, void* data, size_t len) const;
216 : bool ReadImageString(AbsoluteAddress addr, std::string* str) const;
217 : const uint8* GetImageData(AbsoluteAddress addr, size_t len) const;
218 : uint8* GetImageData(AbsoluteAddress addr, size_t len);
219 : bool Contains(AbsoluteAddress addr, size_t len) const;
220 : size_t GetSectionIndex(AbsoluteAddress addr, size_t len) const;
221 : const IMAGE_SECTION_HEADER* GetSectionHeader(AbsoluteAddress addr,
222 : size_t len) const;
223 : // @}
224 :
225 : // Retrieve the index of the first section with the specified name.
226 : //
227 : // @param name the name of the section to look up.
228 : // @returns the index of the section, or kInvalidSection if none is
229 : // found.
230 : size_t GetSectionIndex(const char* name) const;
231 :
232 : // Retrieve a pointer to the header structure of the first section
233 : // with the specified name.
234 : //
235 : // @param name the name of the section to look up.
236 : // @returns a pointer to the header structure of the section, or
237 : // NULL if none is found.
238 : const IMAGE_SECTION_HEADER* GetSectionHeader(const char* name) const;
239 :
240 : // @returns a pointer to the DOS header structure of this PE file.
241 : const IMAGE_DOS_HEADER* dos_header() const;
242 :
243 : // @returns a pointer to the NT headers structure of this PE file.
244 : const ImageNtHeaders* nt_headers() const;
245 :
246 : // Subtract the preferred loading address of this PE file from the
247 : // specified displacement.
248 : //
249 : // @param abs_disp the value to translate.
250 : // @returns the new offset, relative to the preferred loading
251 : // address.
252 : size_t AbsToRelDisplacement(size_t abs_disp) const;
253 :
254 : private:
255 : // Read all NT headers, including common COFF headers. Insert
256 : // a range covering all headers.
257 : //
258 : // @returns true on success, false on error.
259 : bool ReadHeaders();
260 :
261 : const IMAGE_DOS_HEADER* dos_header_;
262 : const ImageNtHeaders* nt_headers_;
263 :
264 : DISALLOW_COPY_AND_ASSIGN(PEFileBase);
265 : };
266 :
267 : // A parsed PE file signature; a signature describes some module. It
268 : // offers access to the exploded components of the PE signature,
269 : // comparison, and serialization.
270 : template<class ImageNtHeaders, DWORD MagicValidation>
271 : struct PEFileBase<ImageNtHeaders, MagicValidation>::Signature {
272 : // Construct a default all-zero signature.
273 E : Signature() : module_size(0), module_checksum(0), module_time_date_stamp(0) {
274 E : }
275 :
276 : // Constructor with full initializer list.
277 : // @param path The path to the module.
278 : // @param base_address The base address of the module.
279 : // @param module_size The size of the module.
280 : // @param module_checksum The checksum of the module.
281 : // @param module_time_date_stamp The time-date stamp of the module.
282 : Signature(const base::StringPiece16& path,
283 : AbsoluteAddress base_address,
284 : size_t module_size,
285 : uint32 module_checksum,
286 : uint32 module_time_date_stamp)
287 : : path(path.begin(), path.end()),
288 : base_address(base_address),
289 : module_size(module_size),
290 : module_checksum(module_checksum),
291 E : module_time_date_stamp(module_time_date_stamp) {
292 E : }
293 :
294 : // Copy constructor.
295 : // @param rhs Object to copy.
296 : Signature(const Signature& rhs)
297 : : path(rhs.path),
298 : base_address(rhs.base_address),
299 : module_size(rhs.module_size),
300 : module_checksum(rhs.module_checksum),
301 E : module_time_date_stamp(rhs.module_time_date_stamp) {
302 E : }
303 :
304 : // The original module path, kept for convenience. This should
305 : // always be an absolute path.
306 : //
307 : // TODO(chrisha): Check that the path is absolute at all sites where this
308 : // path is used.
309 : std::wstring path;
310 :
311 : // The four signature components. The order of these fields is the same as
312 : // Sawbuck's ModuleInformation, and TraceModuleData, for consistency.
313 : // @{
314 : // The preferred loading address of the module.
315 : AbsoluteAddress base_address;
316 :
317 : // The on-disk size in bytes of the module file.
318 : size_t module_size;
319 :
320 : // A 32-bit checksum of the module file.
321 : uint32 module_checksum;
322 :
323 : // The on-disk modification time of the module file.
324 : uint32 module_time_date_stamp;
325 : // @}
326 :
327 : // Compare the specified signature with this one. Signatures are
328 : // consistent with one another if their four components match; paths
329 : // may differ.
330 : //
331 : // @param signature the signature to compare to.
332 : // @returns true if the signatures are consistent, false otherwise.
333 : bool IsConsistent(const Signature& signature) const;
334 :
335 : // Compare the specified signature with this one in the same way as
336 : // IsConsistent(), except that in addition signatures may differ.
337 : //
338 : // @param signature the signature to compare to.
339 : // @returns true if the signatures are consistent except possibly
340 : // for the signature, false otherwise.
341 : bool IsConsistentExceptForChecksum(const Signature& signature) const;
342 :
343 : // Compare the specified signature with this one. Signatures are
344 : // equal if their paths are the same and they are consistent.
345 : //
346 : // @param signature the signature to compare to.
347 : // @returns true if the signatures are equal, false otherwise.
348 : // @note We need an equality operator for serialization unittests.
349 : bool operator==(const Signature& signature) const;
350 :
351 : // Serialize this signature to @p out_archive.
352 : //
353 : // @param out_archive the archive to serialize to.
354 : // @returns true on success, false on error.
355 : bool Save(core::OutArchive* out_archive) const;
356 :
357 : // Deserializea a signature from @p in_archive, replacing the
358 : // contents of this structure.
359 : //
360 : // @param in_archive the archive to deserialize from.
361 : // @returns true on success, false on error.
362 : bool Load(core::InArchive* in_archive);
363 : };
364 :
365 : // A structure exposing information about a single export.
366 : template<class ImageNtHeaders, DWORD MagicValidation>
367 : struct PEFileBase<ImageNtHeaders, MagicValidation>::ExportInfo {
368 : // The address of the exported function.
369 : RelativeAddress function;
370 :
371 : // The name of the export, if any.
372 : std::string name;
373 :
374 : // The export forward string, if any.
375 : std::string forward;
376 :
377 : // The export ordinal.
378 : uint16 ordinal;
379 : };
380 :
381 : // A structure exposing information about a single import.
382 : template<class ImageNtHeaders, DWORD MagicValidation>
383 : struct PEFileBase<ImageNtHeaders, MagicValidation>::ImportInfo {
384 : // Construct an ImportInfo structure from its components.
385 : //
386 : // @param h the ordinal hint.
387 : // @param o the function ordinal.
388 : // @param n the function name.
389 : ImportInfo(uint16 h, uint16 o, const char* n)
390 : : hint(h),
391 : ordinal(o),
392 E : function(n) {
393 E : }
394 :
395 : // Construct an ImportInfo structure for a named function with no
396 : // ordinal information.
397 : //
398 : // @param function_name the function name.
399 : explicit ImportInfo(const char* function_name)
400 : : hint(0),
401 : ordinal(0),
402 : function(function_name) {
403 : }
404 :
405 : // Construct an ImportInfo structure for a function referenced by
406 : // ordinal.
407 : //
408 : // @param function_ordinal the function ordinal.
409 : explicit ImportInfo(uint16 function_ordinal)
410 : : hint(0),
411 : ordinal(function_ordinal) {
412 : }
413 :
414 : // Construct a default all-zero ImportInfo structure.
415 E : ImportInfo() : hint(0), ordinal(0) {
416 E : }
417 :
418 : // Compare the specified structure with this one. ImportInfo
419 : // structures are equal if their components are equal.
420 : //
421 : // @param o the structure to compare to.
422 : // @returns true if the signatures are equal, false otherwise.
423 E : bool operator==(const ImportInfo& o) const {
424 E : return hint == o.hint && ordinal == o.ordinal && function == o.function;
425 E : }
426 :
427 : // The loader ordinal hint for this import.
428 : uint16 hint;
429 :
430 : // The ordinal of the function if the function field is empty.
431 : uint16 ordinal;
432 :
433 : // The name of the function, or the empty string for imports by
434 : // ordinal.
435 : std::string function;
436 : };
437 :
438 : // A structure holding information about all imports from a given DLL.
439 : template<class ImageNtHeaders, DWORD MagicValidation>
440 : struct PEFileBase<ImageNtHeaders, MagicValidation>::ImportDll {
441 : // Construct a default empty ImportDll structure.
442 E : ImportDll() {
443 E : memset(&desc, 0, sizeof(desc));
444 E : desc.ForwarderChain = SIZE_MAX;
445 E : }
446 :
447 : // The import descriptor.
448 : IMAGE_IMPORT_DESCRIPTOR desc;
449 :
450 : // Name of the DLL imported.
451 : std::string name;
452 :
453 : // A vector of ImportInfo structures, one for each imported
454 : // function.
455 : ImportInfoVector functions;
456 : };
457 :
458 : typedef PEFileBase<IMAGE_NT_HEADERS32, IMAGE_NT_OPTIONAL_HDR32_MAGIC> PEFile;
459 :
460 : // Please note that 64-bit PE File support is only currently tested for
461 : // manipulation of imports.
462 : typedef PEFileBase<IMAGE_NT_HEADERS64, IMAGE_NT_OPTIONAL_HDR64_MAGIC> PEFile64;
463 :
464 : // We alias the PE file signature to ModuleInformation, which is used to track
465 : // modules in traces.
466 : typedef PEFile::Signature ModuleInformation;
467 : } // namespace pe
468 :
469 : #include "syzygy/pe/pe_file_impl.h"
470 :
471 : #endif // SYZYGY_PE_PE_FILE_H_
|