1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #ifndef SYZYGY_PDB_PDB_DATA_H_
16 : #define SYZYGY_PDB_PDB_DATA_H_
17 :
18 : #include <windows.h>
19 :
20 : #include <map>
21 : #include <string>
22 : #include <vector>
23 :
24 : #include "base/basictypes.h"
25 : #include "syzygy/common/assertions.h"
26 : #include "syzygy/pdb/pdb_constants.h"
27 :
28 m : namespace pdb {
29 :
30 : // Pdb Info Stream Header, this is at the start of stream #1.
31 m : struct PdbInfoHeader70 {
32 : // Equal to kPdbCurrentVersion for PDBs seen from VS 9.0.
33 m : uint32 version;
34 : // This looks to be the time of the PDB file creation.
35 m : uint32 timestamp;
36 : // Updated every time the PDB file is written.
37 m : uint32 pdb_age;
38 : // This must match the GUID stored off the image's debug directory.
39 m : GUID signature;
40 m : };
41 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PdbInfoHeader70, 28);
42 :
43 : // A structure that we find in the type info hash header, this is not totally
44 : // deciphered yet.
45 m : struct OffsetCb {
46 m : uint32 offset;
47 m : uint32 cb;
48 m : };
49 m : COMPILE_ASSERT_IS_POD_OF_SIZE(OffsetCb, 8);
50 :
51 : // Type Info Stream Hash, this is contained in the type info header. This part
52 : // hasn't been deciphered yet (the field names are known but we still need to
53 : // find out what their content mean).
54 m : struct TypeInfoHashHeader {
55 m : uint16 stream_number;
56 m : uint16 padding;
57 m : uint32 hash_key;
58 m : uint32 cb_hash_buckets;
59 m : OffsetCb offset_cb_hash_vals;
60 m : OffsetCb offset_cb_type_info_offset;
61 m : OffsetCb offset_cb_hash_adj;
62 m : };
63 m : COMPILE_ASSERT_IS_POD_OF_SIZE(TypeInfoHashHeader, 36);
64 :
65 : // Type Info Stream Header, this is at the beginning of stream #2.
66 : // See http://moyix.blogspot.ca/2007_10_01_archive.html
67 m : struct TypeInfoHeader {
68 m : uint32 version;
69 m : uint32 len;
70 m : uint32 type_min;
71 m : uint32 type_max;
72 m : uint32 type_info_data_size;
73 m : TypeInfoHashHeader type_info_hash;
74 m : };
75 : // We coerce a stream of bytes to this structure, so we require it to be
76 : // exactly 56 bytes in size.
77 m : COMPILE_ASSERT_IS_POD_OF_SIZE(TypeInfoHeader, 56);
78 :
79 : // Dbi Info Stream Header, this is at the start of stream #3.
80 : // See http://code.google.com/p/pdbparser/wiki/DBI_Format
81 m : struct DbiHeader {
82 m : int32 signature;
83 m : uint32 version;
84 m : uint32 age;
85 m : int16 global_symbol_info_stream;
86 m : uint16 pdb_dll_version;
87 m : int16 public_symbol_info_stream;
88 m : uint16 pdb_dll_build_major;
89 m : int16 symbol_record_stream;
90 m : uint16 pdb_dll_build_minor;
91 m : uint32 gp_modi_size;
92 m : uint32 section_contribution_size;
93 m : uint32 section_map_size;
94 m : uint32 file_info_size;
95 m : uint32 ts_map_size;
96 m : uint32 mfc_index;
97 m : uint32 dbg_header_size;
98 m : uint32 ec_info_size;
99 m : uint16 flags;
100 m : uint16 machine;
101 m : uint32 reserved;
102 m : };
103 : // We coerce a stream of bytes to this structure, so we require it to be
104 : // exactly 64 bytes in size.
105 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiHeader, 64);
106 :
107 : // Dbi Debug Header
108 : // See http://ccimetadata.codeplex.com/SourceControl/changeset/view/52123#96529
109 : // From introspection, it looks like these are stream numbers or -1 if not
110 : // defined.
111 m : struct DbiDbgHeader {
112 m : int16 fpo;
113 m : int16 exception;
114 m : int16 fixup;
115 m : int16 omap_to_src;
116 m : int16 omap_from_src;
117 m : int16 section_header;
118 m : int16 token_rid_map;
119 m : int16 x_data;
120 m : int16 p_data;
121 m : int16 new_fpo;
122 m : int16 section_header_origin;
123 m : };
124 : // We coerce a stream of bytes to this structure, so we require it to be
125 : // exactly 22 bytes in size.
126 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiDbgHeader, 22);
127 :
128 : // Dbi Section Contrib
129 : // Represent an element for the section contrib substream of the Dbi stream.
130 m : struct DbiSectionContrib {
131 m : int16 section;
132 m : int16 pad1;
133 m : int32 offset;
134 m : int32 size;
135 m : uint32 flags;
136 m : int16 module;
137 m : int16 pad2;
138 m : uint32 data_crc;
139 m : uint32 reloc_crc;
140 m : };
141 : // We coerce a stream of bytes to this structure, so we require it to be
142 : // exactly 28 bytes in size.
143 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiSectionContrib, 28);
144 :
145 : // Dbi Module Info
146 : // Represent an element for the module info substream of the Dbi stream. This
147 : // struct doesn't contain the full module and object name.
148 m : struct DbiModuleInfoBase {
149 m : uint32 opened;
150 m : DbiSectionContrib section;
151 m : uint16 flags;
152 m : int16 stream;
153 m : uint32 symbol_bytes;
154 m : uint32 old_lines_bytes;
155 m : uint32 lines_bytes;
156 m : int16 num_files;
157 m : uint16 padding;
158 m : uint32 offsets;
159 m : uint32 num_source;
160 m : uint32 num_compiler;
161 : // There are two trailing null-terminated 8-bit strings, the first being the
162 : // module_name and the second being the object_name. Then this structure is
163 : // padded with zeros to have a length that is a multiple of 4.
164 m : };
165 : // We coerce a stream of bytes to this structure, so we require it to be
166 : // exactly 64 bytes in size.
167 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiModuleInfoBase, 64);
168 :
169 : // Dbi Section Map
170 : // Represent an element for the section map substream of the Dbi stream.
171 m : struct DbiSectionMapItem {
172 m : uint8 flags;
173 m : uint8 section_type;
174 : // This field hasn't been deciphered but it is always 0x00000000 or 0xFFFFFFFF
175 : // and modifying it doesn't seem to invalidate the PDB.
176 m : uint16 unknown_data_1[2];
177 m : uint16 section_number;
178 : // Same thing as for unknown_data_1.
179 m : uint16 unknown_data_2[2];
180 : // Value added to the address offset when calculating the RVA.
181 m : uint32 rva_offset;
182 m : uint32 section_length;
183 m : };
184 : // We coerce a stream of bytes to this structure, so we require it to be
185 : // exactly 20 bytes in size.
186 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiSectionMapItem, 20);
187 :
188 : // Header of the string table found in the name stream and in the EC info header
189 : // of the debug info stream.
190 m : struct StringTableHeader {
191 : // The signature, which is |kPdbStringTableSignature|.
192 m : uint32 signature;
193 :
194 : // The version, which is |kPdbStringTableVersion|.
195 m : uint32 version;
196 :
197 : // The size of the concatenated null-terminated strings that follow,
198 : // in bytes.
199 m : uint32 size;
200 m : };
201 m : COMPILE_ASSERT_IS_POD_OF_SIZE(StringTableHeader, 12);
202 :
203 : // Header of a symbol record from the symbol record stream.
204 m : struct SymbolRecordHeader {
205 : // Length of the symbol record in bytes, without this field. The length
206 : // including this field is always a multiple of 4.
207 m : uint16 length;
208 :
209 : // Type of the symbol record. If must be a value from Microsoft_Cci_Pdb::SYM.
210 m : uint16 type;
211 m : };
212 m : COMPILE_ASSERT_IS_POD_OF_SIZE(SymbolRecordHeader, 4);
213 :
214 : // Header of the public stream.
215 : // This comes from observations made on multiple PDB files.
216 m : struct PublicStreamHeader {
217 : // The offset of the sorted table of public symbols, in bytes and relative
218 : // to the |unknown| field of this header.
219 m : uint32 sorted_symbols_offset;
220 :
221 : // The size of the sorted table of public symbols, in bytes.
222 : // This is equal to 4 times the number of public symbols.
223 m : uint32 sorted_symbols_size;
224 :
225 : // These fields are always equal to zero.
226 m : uint32 zero_0;
227 m : uint32 zero_1;
228 m : uint32 zero_2;
229 m : uint32 zero_3;
230 :
231 : // Padding field, which can have any value.
232 m : uint32 padding;
233 :
234 : // An unknown field that is always equal to -1.
235 m : uint32 unknown;
236 :
237 : // The signature of the stream, which is equal to |kPublicStreamSignature|.
238 m : uint32 signature;
239 :
240 : // The size of the table of public symbol offsets.
241 : // This is equal to 8 times the number of public symbols.
242 m : uint32 offset_table_size;
243 :
244 : // The size of the hash table of public symbols, in bytes. This includes
245 : // a 512-byte bitset with a 1 in used buckets followed by an array identifying
246 : // a representative of each bucket.
247 m : uint32 hash_table_size;
248 m : };
249 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PublicStreamHeader, 44);
250 :
251 : // Structure found in the public stream to define the offset of a public symbol
252 : // in the symbol record stream. This comes from observations made on multiple
253 : // PDB files.
254 m : struct PublicStreamSymbolOffset {
255 : // Offset of the symbol in the symbol record stream.
256 m : uint32 offset;
257 :
258 : // An unknown field that is always equal to 1.
259 m : uint32 unknown;
260 m : };
261 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PublicStreamSymbolOffset, 8);
262 :
263 : // Multi-Stream Format (MSF) Header
264 : // See http://code.google.com/p/pdbparser/wiki/MSF_Format
265 m : struct PdbHeader {
266 m : uint8 magic_string[kPdbHeaderMagicStringSize];
267 m : uint32 page_size;
268 m : uint32 free_page_map;
269 m : uint32 num_pages;
270 m : uint32 directory_size;
271 m : uint32 reserved;
272 m : uint32 root_pages[kPdbMaxDirPages];
273 m : };
274 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PdbHeader, 344);
275 :
276 : // This is for parsing the FIXUP stream in PDB files generated with the
277 : // '/PROFILE' flag. The form of this struct was inferred from looking at
278 : // binary dumps of FIXUP streams and correlating them with the disassembly
279 : // of the image they refer to. These efforts are documented here:
280 : // http://go/syzygy-fixups
281 m : struct PdbFixup {
282 m : enum Type {
283 m : TYPE_ABSOLUTE = 0x6,
284 m : TYPE_RELATIVE = 0x7,
285 m : TYPE_OFFSET_32BIT = 0xB,
286 m : TYPE_OFFSET_8BIT = 0xD,
287 m : TYPE_PC_RELATIVE = 0x14,
288 m : };
289 :
290 m : enum Flags {
291 m : FLAG_IS_DATA = 0x4000,
292 m : FLAG_REFERS_TO_CODE = 0x8000,
293 m : FLAG_UNKNOWN = 0x3fff,
294 m : };
295 :
296 : // The fixup header.
297 m : union {
298 m : uint32 header;
299 m : struct {
300 m : Type type:16;
301 m : unsigned int flags:16;
302 m : };
303 m : };
304 : // The location of the reference in the image, stored as an RVA. The reference
305 : // will always take 4-bytes in the image.
306 m : uint32 rva_location;
307 : // The base to which this reference is tied, stored as an RVA.
308 m : uint32 rva_base;
309 :
310 : // This validates that the fixup is of a known type. Any FIXUP that does not
311 : // conform to a type that we have already witnessed in sample data will cause
312 : // this to return false.
313 m : bool ValidHeader() const;
314 :
315 : // Refers to code as opposed to data.
316 m : bool refers_to_code() const { return (flags & FLAG_REFERS_TO_CODE) != 0; }
317 :
318 : // Is stored in data as opposed to being part of an instruction. This is
319 : // not always reported properly, as immediate operands to 'jmp'
320 : // instructions in thunks (__imp__function_name) set this bit.
321 m : bool is_data() const { return (flags & FLAG_IS_DATA) != 0; }
322 :
323 : // Returns true if the fixup is an offset from some address.
324 m : bool is_offset() const;
325 :
326 : // This function returns the size of the reference as encoded at the
327 : // address 'rva_location'.
328 m : size_t size() const;
329 m : };
330 : // We coerce a stream of bytes to this structure, so we require it to be
331 : // exactly 12 bytes in size.
332 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PdbFixup, 12);
333 :
334 m : } // namespace pdb
335 :
336 : #endif // SYZYGY_PDB_PDB_DATA_H_
|