1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #ifndef SYZYGY_PDB_PDB_DATA_H_
16 : #define SYZYGY_PDB_PDB_DATA_H_
17 :
18 : #include <windows.h>
19 :
20 : #include <map>
21 : #include <string>
22 : #include <vector>
23 :
24 : #include "base/basictypes.h"
25 : #include "syzygy/common/assertions.h"
26 : #include "syzygy/msf/msf_constants.h"
27 : #include "syzygy/pdb/pdb_constants.h"
28 :
29 m : namespace pdb {
30 :
31 : // Pdb Info Stream Header, this is at the start of stream #1.
32 m : struct PdbInfoHeader70 {
33 : // Equal to kPdbCurrentVersion for PDBs seen from VS 9.0.
34 m : uint32 version;
35 : // This looks to be the time of the PDB file creation.
36 m : uint32 timestamp;
37 : // Updated every time the PDB file is written.
38 m : uint32 pdb_age;
39 : // This must match the GUID stored off the image's debug directory.
40 m : GUID signature;
41 m : };
42 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PdbInfoHeader70, 28);
43 :
44 : // A structure that we find in the type info hash header, this is not totally
45 : // deciphered yet.
46 m : struct OffsetCb {
47 m : uint32 offset;
48 m : uint32 cb;
49 m : };
50 m : COMPILE_ASSERT_IS_POD_OF_SIZE(OffsetCb, 8);
51 :
52 : // Type Info Stream Hash, this is contained in the type info header. This part
53 : // hasn't been deciphered yet (the field names are known but we still need to
54 : // find out what their content mean).
55 m : struct TypeInfoHashHeader {
56 m : uint16 stream_number;
57 m : uint16 padding;
58 m : uint32 hash_key;
59 m : uint32 cb_hash_buckets;
60 m : OffsetCb offset_cb_hash_vals;
61 m : OffsetCb offset_cb_type_info_offset;
62 m : OffsetCb offset_cb_hash_adj;
63 m : };
64 m : COMPILE_ASSERT_IS_POD_OF_SIZE(TypeInfoHashHeader, 36);
65 :
66 : // Type Info Stream Header, this is at the beginning of stream #2.
67 : // See http://moyix.blogspot.ca/2007_10_01_archive.html
68 m : struct TypeInfoHeader {
69 m : uint32 version;
70 m : uint32 len;
71 m : uint32 type_min;
72 m : uint32 type_max;
73 m : uint32 type_info_data_size;
74 m : TypeInfoHashHeader type_info_hash;
75 m : };
76 : // We coerce a stream of bytes to this structure, so we require it to be
77 : // exactly 56 bytes in size.
78 m : COMPILE_ASSERT_IS_POD_OF_SIZE(TypeInfoHeader, 56);
79 :
80 : // Dbi Info Stream Header, this is at the start of stream #3.
81 : // See http://code.google.com/p/pdbparser/wiki/DBI_Format
82 m : struct DbiHeader {
83 m : int32 signature;
84 m : uint32 version;
85 m : uint32 age;
86 m : int16 global_symbol_info_stream;
87 m : uint16 pdb_dll_version;
88 m : int16 public_symbol_info_stream;
89 m : uint16 pdb_dll_build_major;
90 m : int16 symbol_record_stream;
91 m : uint16 pdb_dll_build_minor;
92 m : uint32 gp_modi_size;
93 m : uint32 section_contribution_size;
94 m : uint32 section_map_size;
95 m : uint32 file_info_size;
96 m : uint32 ts_map_size;
97 m : uint32 mfc_index;
98 m : uint32 dbg_header_size;
99 m : uint32 ec_info_size;
100 m : uint16 flags;
101 m : uint16 machine;
102 m : uint32 reserved;
103 m : };
104 : // We coerce a stream of bytes to this structure, so we require it to be
105 : // exactly 64 bytes in size.
106 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiHeader, 64);
107 :
108 : // Dbi Debug Header
109 : // See http://ccimetadata.codeplex.com/SourceControl/changeset/view/52123#96529
110 : // From introspection, it looks like these are stream numbers or -1 if not
111 : // defined.
112 m : struct DbiDbgHeader {
113 m : int16 fpo;
114 m : int16 exception;
115 m : int16 fixup;
116 m : int16 omap_to_src;
117 m : int16 omap_from_src;
118 m : int16 section_header;
119 m : int16 token_rid_map;
120 m : int16 x_data;
121 m : int16 p_data;
122 m : int16 new_fpo;
123 m : int16 section_header_origin;
124 m : };
125 : // We coerce a stream of bytes to this structure, so we require it to be
126 : // exactly 22 bytes in size.
127 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiDbgHeader, 22);
128 :
129 : // Dbi Section Contrib
130 : // Represent an element for the section contrib substream of the Dbi stream.
131 m : struct DbiSectionContrib {
132 m : int16 section;
133 m : int16 pad1;
134 m : int32 offset;
135 m : int32 size;
136 m : uint32 flags;
137 m : int16 module;
138 m : int16 pad2;
139 m : uint32 data_crc;
140 m : uint32 reloc_crc;
141 m : };
142 : // We coerce a stream of bytes to this structure, so we require it to be
143 : // exactly 28 bytes in size.
144 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiSectionContrib, 28);
145 :
146 : // Dbi Module Info
147 : // Represent an element for the module info substream of the Dbi stream. This
148 : // struct doesn't contain the full module and object name.
149 m : struct DbiModuleInfoBase {
150 m : uint32 opened;
151 m : DbiSectionContrib section;
152 m : uint16 flags;
153 m : int16 stream;
154 m : uint32 symbol_bytes;
155 m : uint32 old_lines_bytes;
156 m : uint32 lines_bytes;
157 m : int16 num_files;
158 m : uint16 padding;
159 m : uint32 offsets;
160 m : uint32 num_source;
161 m : uint32 num_compiler;
162 : // There are two trailing null-terminated 8-bit strings, the first being the
163 : // module_name and the second being the object_name. Then this structure is
164 : // padded with zeros to have a length that is a multiple of 4.
165 m : };
166 : // We coerce a stream of bytes to this structure, so we require it to be
167 : // exactly 64 bytes in size.
168 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiModuleInfoBase, 64);
169 :
170 : // Dbi Section Map
171 : // Represent an element for the section map substream of the Dbi stream.
172 m : struct DbiSectionMapItem {
173 m : uint8 flags;
174 m : uint8 section_type;
175 : // This field hasn't been deciphered but it is always 0x00000000 or 0xFFFFFFFF
176 : // and modifying it doesn't seem to invalidate the PDB.
177 m : uint16 unknown_data_1[2];
178 m : uint16 section_number;
179 : // Same thing as for unknown_data_1.
180 m : uint16 unknown_data_2[2];
181 : // Value added to the address offset when calculating the RVA.
182 m : uint32 rva_offset;
183 m : uint32 section_length;
184 m : };
185 : // We coerce a stream of bytes to this structure, so we require it to be
186 : // exactly 20 bytes in size.
187 m : COMPILE_ASSERT_IS_POD_OF_SIZE(DbiSectionMapItem, 20);
188 :
189 : // Header of the string table found in the name stream and in the EC info header
190 : // of the debug info stream.
191 m : struct StringTableHeader {
192 : // The signature, which is |kPdbStringTableSignature|.
193 m : uint32 signature;
194 :
195 : // The version, which is |kPdbStringTableVersion|.
196 m : uint32 version;
197 :
198 : // The size of the concatenated null-terminated strings that follow,
199 : // in bytes.
200 m : uint32 size;
201 m : };
202 m : COMPILE_ASSERT_IS_POD_OF_SIZE(StringTableHeader, 12);
203 :
204 : // Header of a symbol record from the symbol record stream.
205 m : struct SymbolRecordHeader {
206 : // Length of the symbol record in bytes, without this field. The length
207 : // including this field is always a multiple of 4.
208 m : uint16 length;
209 :
210 : // Type of the symbol record. If must be a value from Microsoft_Cci_Pdb::SYM.
211 m : uint16 type;
212 m : };
213 m : COMPILE_ASSERT_IS_POD_OF_SIZE(SymbolRecordHeader, 4);
214 :
215 : // Header of the public stream.
216 : // This comes from observations made on multiple PDB files.
217 m : struct PublicStreamHeader {
218 : // The offset of the sorted table of public symbols, in bytes and relative
219 : // to the |unknown| field of this header.
220 m : uint32 sorted_symbols_offset;
221 :
222 : // The size of the sorted table of public symbols, in bytes.
223 : // This is equal to 4 times the number of public symbols.
224 m : uint32 sorted_symbols_size;
225 :
226 : // These fields are always equal to zero.
227 m : uint32 zero_0;
228 m : uint32 zero_1;
229 m : uint32 zero_2;
230 m : uint32 zero_3;
231 :
232 : // Padding field, which can have any value.
233 m : uint32 padding;
234 :
235 : // An unknown field that is always equal to -1.
236 m : uint32 unknown;
237 :
238 : // The signature of the stream, which is equal to |kPublicStreamSignature|.
239 m : uint32 signature;
240 :
241 : // The size of the table of public symbol offsets.
242 : // This is equal to 8 times the number of public symbols.
243 m : uint32 offset_table_size;
244 :
245 : // The size of the hash table of public symbols, in bytes. This includes
246 : // a 512-byte bitset with a 1 in used buckets followed by an array identifying
247 : // a representative of each bucket.
248 m : uint32 hash_table_size;
249 m : };
250 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PublicStreamHeader, 44);
251 :
252 : // Structure found in the public stream to define the offset of a public symbol
253 : // in the symbol record stream. This comes from observations made on multiple
254 : // PDB files.
255 m : struct PublicStreamSymbolOffset {
256 : // Offset of the symbol in the symbol record stream.
257 m : uint32 offset;
258 :
259 : // An unknown field that is always equal to 1.
260 m : uint32 unknown;
261 m : };
262 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PublicStreamSymbolOffset, 8);
263 :
264 : // Multi-Stream Format (MSF) Header
265 : // See http://code.google.com/p/pdbparser/wiki/MSF_Format
266 m : struct PdbHeader {
267 m : uint8 magic_string[msf::kMsfHeaderMagicStringSize];
268 m : uint32 page_size;
269 m : uint32 free_page_map;
270 m : uint32 num_pages;
271 m : uint32 directory_size;
272 m : uint32 reserved;
273 m : uint32 root_pages[msf::kMsfMaxDirPages];
274 m : };
275 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PdbHeader, 344);
276 :
277 : // This is for parsing the FIXUP stream in PDB files generated with the
278 : // '/PROFILE' flag. The form of this struct was inferred from looking at
279 : // binary dumps of FIXUP streams and correlating them with the disassembly
280 : // of the image they refer to. These efforts are documented here:
281 : // http://go/syzygy-fixups
282 m : struct PdbFixup {
283 m : enum Type {
284 m : TYPE_ABSOLUTE = 0x6,
285 m : TYPE_RELATIVE = 0x7,
286 m : TYPE_OFFSET_32BIT = 0xB,
287 m : TYPE_OFFSET_8BIT = 0xD,
288 m : TYPE_PC_RELATIVE = 0x14,
289 m : };
290 :
291 m : enum Flags {
292 m : FLAG_IS_DATA = 0x4000,
293 m : FLAG_REFERS_TO_CODE = 0x8000,
294 m : FLAG_UNKNOWN = 0x3fff,
295 m : };
296 :
297 : // The fixup header.
298 m : union {
299 m : uint32 header;
300 m : struct {
301 m : Type type:16;
302 m : unsigned int flags:16;
303 m : };
304 m : };
305 : // The location of the reference in the image, stored as an RVA. The reference
306 : // will always take 4-bytes in the image.
307 m : uint32 rva_location;
308 : // The base to which this reference is tied, stored as an RVA.
309 m : uint32 rva_base;
310 :
311 : // This validates that the fixup is of a known type. Any FIXUP that does not
312 : // conform to a type that we have already witnessed in sample data will cause
313 : // this to return false.
314 m : bool ValidHeader() const;
315 :
316 : // Refers to code as opposed to data.
317 m : bool refers_to_code() const { return (flags & FLAG_REFERS_TO_CODE) != 0; }
318 :
319 : // Is stored in data as opposed to being part of an instruction. This is
320 : // not always reported properly, as immediate operands to 'jmp'
321 : // instructions in thunks (__imp__function_name) set this bit.
322 m : bool is_data() const { return (flags & FLAG_IS_DATA) != 0; }
323 :
324 : // Returns true if the fixup is an offset from some address.
325 m : bool is_offset() const;
326 :
327 : // This function returns the size of the reference as encoded at the
328 : // address 'rva_location'.
329 m : size_t size() const;
330 m : };
331 : // We coerce a stream of bytes to this structure, so we require it to be
332 : // exactly 12 bytes in size.
333 m : COMPILE_ASSERT_IS_POD_OF_SIZE(PdbFixup, 12);
334 :
335 m : } // namespace pdb
336 :
337 : #endif // SYZYGY_PDB_PDB_DATA_H_
|