1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/pe_utils.h"
16 :
17 : #include "base/strings/string_split.h"
18 : #include "base/strings/string_util.h"
19 : #include "syzygy/block_graph/typed_block.h"
20 : #include "syzygy/pe/dos_stub.h"
21 :
22 : namespace pe {
23 :
24 : using block_graph::BlockGraph;
25 : using block_graph::ConstTypedBlock;
26 : using block_graph::TypedBlock;
27 : using core::RelativeAddress;
28 :
29 : namespace {
30 :
31 : // A simple struct that can be used to let us access strings using TypedBlock.
32 : struct StringStruct {
33 : const char string[1];
34 : };
35 :
36 : typedef TypedBlock<IMAGE_DOS_HEADER> DosHeader;
37 : typedef TypedBlock<IMAGE_IMPORT_DESCRIPTOR> ImageImportDescriptor;
38 : typedef TypedBlock<IMAGE_NT_HEADERS> NtHeaders;
39 : typedef TypedBlock<StringStruct> String;
40 :
41 : template <typename BlockPtr>
42 : BlockPtr UncheckedGetNtHeadersBlockFromDosHeaderBlock(
43 E : BlockPtr dos_header_block) {
44 E : BlockGraph::Reference ref;
45 : if (!dos_header_block->GetReference(offsetof(IMAGE_DOS_HEADER, e_lfanew),
46 E : &ref)) {
47 : // No NT headers reference.
48 E : return NULL;
49 : }
50 :
51 : if (ref.offset() != 0 ||
52 : ref.type() != BlockGraph::RELATIVE_REF ||
53 E : ref.size() != sizeof(RelativeAddress)) {
54 : // The reference is of incorrect type.
55 E : return NULL;
56 : }
57 :
58 E : return ref.referenced();
59 E : }
60 :
61 : template <typename BlockPtr>
62 : BlockPtr CheckedGetNtHeadersBlockFromDosHeaderBlock(
63 E : BlockPtr dos_header_block) {
64 E : DCHECK(IsValidDosHeaderBlock(dos_header_block));
65 :
66 : BlockPtr nt_headers_block =
67 E : UncheckedGetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
68 : if (nt_headers_block == NULL ||
69 E : !IsValidNtHeadersBlock(nt_headers_block)) {
70 i : return NULL;
71 : }
72 :
73 E : return nt_headers_block;
74 E : }
75 :
76 : } // namespace
77 :
78 : const char kCodeSectionName[] = ".text";
79 : const char kReadOnlyDataSectionName[] = ".rdata";
80 : const char kReadWriteDataSectionName[] = ".data";
81 : const char kRelocSectionName[] = ".reloc";
82 : const char kResourceSectionName[] = ".rsrc";
83 : const char kTlsSectionName[] = ".tls";
84 :
85 : // These constants reflect what we see in MSVS-produced PE files. They do not
86 : // exhaustively cover all possibilities and there are very likely other valid
87 : // combinations of characteristics.
88 : const DWORD kCodeCharacteristics =
89 : IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_EXECUTE;
90 : const DWORD kReadOnlyDataCharacteristics =
91 : IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
92 : const DWORD kReadWriteDataCharacteristics =
93 : IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE;
94 : const DWORD kRelocCharacteristics =
95 : IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_DISCARDABLE |
96 : IMAGE_SCN_MEM_READ;
97 :
98 E : bool IsValidDosHeaderBlock(const BlockGraph::Block* dos_header_block) {
99 E : ConstTypedBlock<IMAGE_DOS_HEADER> dos_header;
100 :
101 E : if (!dos_header.Init(0, dos_header_block)) {
102 : // Too small or no data.
103 E : return false;
104 : }
105 :
106 E : if (dos_header->e_magic != IMAGE_DOS_SIGNATURE) {
107 : // Wrong signature.
108 E : return false;
109 : }
110 :
111 : // The "DOS file size" is encoded in a rather wonky manner.
112 : // - e_cp is the number of "pages" in the file, but
113 : // - e_cblp is the number of bytes used on the last page.
114 E : size_t dos_file_size = 512 * dos_header->e_cp;
115 E : if (dos_header->e_cblp != 0) {
116 : // Let's not go below zero size.
117 E : if (dos_file_size < 512)
118 E : return false;
119 :
120 E : dos_file_size -= 512;
121 E : dos_file_size += dos_header->e_cblp;
122 : }
123 : // The VC linker yields a DOS header with a size that's larger than
124 : // the DOS header and the NT headers combined. I wonder if anyone cares
125 : // about these sizes anymore.
126 E : if (dos_file_size < dos_header_block->size())
127 E : return false;
128 :
129 : // Check the paragraph size of the header.
130 E : if (dos_header->e_cparhdr * 16 < sizeof(IMAGE_DOS_HEADER))
131 E : return false;
132 :
133 : // Retrieve the NT headers.
134 : const BlockGraph::Block* nt_headers =
135 E : UncheckedGetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
136 E : if (nt_headers == NULL) {
137 : // No DOS header reference.
138 E : return false;
139 : }
140 :
141 E : return true;
142 E : }
143 :
144 E : bool IsValidNtHeadersBlock(const BlockGraph::Block* nt_headers_block) {
145 : // Check the signatures.
146 E : ConstTypedBlock<IMAGE_NT_HEADERS> nt_headers;
147 :
148 E : if (!nt_headers.Init(0, nt_headers_block)) {
149 : // Short or no data.
150 i : return false;
151 : }
152 :
153 E : if (nt_headers->Signature!= IMAGE_NT_SIGNATURE) {
154 : // Wrong signature.
155 E : return false;
156 : }
157 : if (nt_headers->FileHeader.SizeOfOptionalHeader !=
158 E : sizeof(IMAGE_OPTIONAL_HEADER)) {
159 : // Wrong optional header size.
160 E : return false;
161 : }
162 E : if (nt_headers->OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR_MAGIC) {
163 : // Wrong magic for optional header.
164 E : return false;
165 : }
166 :
167 : // Calculate the minimum size for the NT headers and the section header.
168 : size_t header_size = sizeof(IMAGE_NT_HEADERS) +
169 E : sizeof(IMAGE_SECTION_HEADER) * nt_headers->FileHeader.NumberOfSections;
170 :
171 : if (nt_headers_block->size() < header_size ||
172 E : nt_headers_block->data_size() < header_size) {
173 : // The block's size isn't large enough for the section headers.
174 i : return false;
175 : }
176 :
177 E : return true;
178 E : }
179 :
180 : const BlockGraph::Block* GetNtHeadersBlockFromDosHeaderBlock(
181 E : const BlockGraph::Block* dos_header_block) {
182 E : return CheckedGetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
183 E : }
184 :
185 : BlockGraph::Block* GetNtHeadersBlockFromDosHeaderBlock(
186 E : BlockGraph::Block* dos_header_block) {
187 E : return CheckedGetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
188 E : }
189 :
190 E : bool UpdateDosHeader(BlockGraph::Block* dos_header_block) {
191 E : DCHECK(dos_header_block != NULL);
192 :
193 : // The DOS header has to be a multiple of 16 bytes for historic reasons.
194 : size_t dos_header_size = common::AlignUp(
195 E : sizeof(IMAGE_DOS_HEADER) + pe::kDosStubSize, 16);
196 :
197 : // If the new header block is shorter than it was, go ahead and
198 : // trim the source ranges to match the new, shorter size.
199 E : if (dos_header_block->size() > dos_header_size) {
200 : BlockGraph::Block::DataRange range(
201 E : dos_header_size, dos_header_block->size() - dos_header_size);
202 E : dos_header_block->source_ranges().RemoveMappedRange(range);
203 : }
204 :
205 E : dos_header_block->ResizeData(dos_header_size);
206 E : dos_header_block->set_size(dos_header_size);
207 E : DCHECK_EQ(dos_header_size, dos_header_block->size());
208 E : DCHECK_EQ(dos_header_size, dos_header_block->data_size());
209 :
210 E : TypedBlock<IMAGE_DOS_HEADER> dos_header;
211 E : if (!dos_header.InitWithSize(0, dos_header_size, dos_header_block)) {
212 i : LOG(ERROR) << "Unable to cast IMAGE_DOS_HEADER.";
213 i : return false;
214 : }
215 :
216 : // Wipe the DOS header and fill in the stub.
217 E : memset(dos_header.Get(), 0, sizeof(IMAGE_DOS_HEADER));
218 E : memcpy(dos_header.Get() + 1, pe::kDosStub, pe::kDosStubSize);
219 :
220 E : dos_header->e_magic = IMAGE_DOS_SIGNATURE;
221 : // Calculate the number of bytes used on the last DOS executable "page".
222 E : dos_header->e_cblp = dos_header_size % 512;
223 : // Calculate the number of pages used by the DOS executable.
224 E : dos_header->e_cp = dos_header_size / 512;
225 : // Count the last page if we didn't have an even multiple
226 E : if (dos_header->e_cblp != 0)
227 E : dos_header->e_cp++;
228 :
229 : // Header length in "paragraphs".
230 E : dos_header->e_cparhdr = sizeof(*dos_header) / 16;
231 :
232 : // Set this to max allowed, just because.
233 E : dos_header->e_maxalloc = 0xFFFF;
234 :
235 : // Location of relocs - our header has zero relocs, but we set this anyway.
236 E : dos_header->e_lfarlc = sizeof(*dos_header);
237 :
238 E : DCHECK(IsValidDosHeaderBlock(dos_header_block));
239 :
240 E : return true;
241 E : }
242 :
243 E : SectionType GetSectionType(const IMAGE_SECTION_HEADER& header) {
244 E : if ((header.Characteristics & IMAGE_SCN_CNT_CODE) != 0)
245 E : return kSectionCode;
246 E : if ((header.Characteristics & kReadOnlyDataCharacteristics) != 0)
247 E : return kSectionData;
248 E : return kSectionUnknown;
249 E : }
250 :
251 : // We use ", " as a separator between symbol names. We sometimes see commas
252 : // in symbol names but do not see whitespace. Thus, this provides a useful
253 : // separator that is also human friendly to read.
254 : const char kLabelNameSep[] = ", ";
255 :
256 : bool AddLabelToBlock(BlockGraph::Offset offset,
257 : const base::StringPiece& name,
258 : BlockGraph::LabelAttributes label_attributes,
259 E : BlockGraph::Block* block) {
260 E : DCHECK(block != NULL);
261 :
262 : // It is possible for labels to be attached to the first byte past a block
263 : // (things like debug end, scope end, etc). It is up to the caller to be more
264 : // strict about the offset if need be.
265 E : DCHECK_LE(0, offset);
266 E : DCHECK_LE(offset, static_cast<BlockGraph::Offset>(block->size()));
267 :
268 : // Try to create the label.
269 E : if (block->SetLabel(offset, name, label_attributes)) {
270 : // If there was no label at offset 0, then this block has not yet been
271 : // renamed, and still has its section contribution as a name. Update it to
272 : // the first symbol we get for it. We parse symbols from most useful
273 : // (undecorated function names) to least useful (mangled public symbols), so
274 : // this ensures a block has the most useful name.
275 E : if (offset == 0)
276 E : block->set_name(name);
277 :
278 E : return true;
279 : }
280 :
281 : // If we get here there's an already existing label. Update it.
282 E : BlockGraph::Label label;
283 E : CHECK(block->GetLabel(offset, &label));
284 :
285 : // Merge the names if this isn't a repeated name.
286 E : std::string name_str = name.as_string();
287 E : std::string new_name = label.name();
288 E : std::vector<std::string> names;
289 E : base::SplitStringUsingSubstr(label.name(), kLabelNameSep, &names);
290 E : if (std::find(names.begin(), names.end(), name_str) == names.end()) {
291 E : names.push_back(name_str);
292 E : new_name.append(kLabelNameSep);
293 E : new_name.append(name_str);
294 : }
295 :
296 : // Merge the attributes.
297 : BlockGraph::LabelAttributes new_label_attr = label.attributes() |
298 E : label_attributes;
299 :
300 : // We often see code labels that coincide with data labels, as a terminating
301 : // label of a switch statement. Data labels take priority.
302 : if ((new_label_attr & BlockGraph::DATA_LABEL) &&
303 E : (new_label_attr & BlockGraph::CODE_LABEL)) {
304 E : new_label_attr ^= BlockGraph::CODE_LABEL;
305 : }
306 :
307 : // Update the label.
308 E : label = BlockGraph::Label(new_name, new_label_attr);
309 E : CHECK(block->RemoveLabel(offset));
310 E : CHECK(block->SetLabel(offset, label));
311 :
312 E : return true;
313 E : }
314 :
315 : namespace {
316 :
317 : enum EntryPointTypeEnum { kExeEntryPoint, kDllEntryPoint };
318 :
319 : bool GetImageEntryPoint(BlockGraph::Block* dos_header_block,
320 : EntryPointTypeEnum desired_entry_point_type,
321 E : EntryPoint* entry_point) {
322 E : DCHECK(dos_header_block != NULL);
323 E : DCHECK(entry_point != NULL);
324 :
325 E : *entry_point = EntryPoint(static_cast<BlockGraph::Block*>(NULL), 0);
326 :
327 : BlockGraph::Block* nt_headers_block =
328 E : pe::GetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
329 :
330 E : TypedBlock<IMAGE_NT_HEADERS> nt_headers;
331 E : if (nt_headers_block == NULL || !nt_headers.Init(0, nt_headers_block)) {
332 i : LOG(ERROR) << "Unable to retrieve NT Headers.";
333 i : return false;
334 : }
335 :
336 E : EntryPointTypeEnum entry_point_type = kExeEntryPoint;
337 E : if ((nt_headers->FileHeader.Characteristics & IMAGE_FILE_DLL) != 0)
338 E : entry_point_type = kDllEntryPoint;
339 :
340 E : if (entry_point_type != desired_entry_point_type)
341 E : return true;
342 :
343 E : BlockGraph::Reference entry_point_ref;
344 : bool found = nt_headers.block()->GetReference(
345 : offsetof(IMAGE_NT_HEADERS, OptionalHeader.AddressOfEntryPoint),
346 E : &entry_point_ref);
347 :
348 E : if (!found && entry_point_type == kExeEntryPoint) {
349 E : LOG(ERROR) << "Malformed PE Headers: No entry point found for executable.";
350 E : return false;
351 : }
352 :
353 E : if (found) {
354 : *entry_point = EntryPoint(entry_point_ref.referenced(),
355 E : entry_point_ref.offset());
356 : }
357 :
358 E : return true;
359 E : }
360 :
361 : } // namespace
362 :
363 : bool GetExeEntryPoint(BlockGraph::Block* dos_header_block,
364 E : EntryPoint* entry_point) {
365 E : return GetImageEntryPoint(dos_header_block, kExeEntryPoint, entry_point);
366 E : }
367 :
368 : bool GetDllEntryPoint(BlockGraph::Block* dos_header_block,
369 E : EntryPoint* entry_point) {
370 E : return GetImageEntryPoint(dos_header_block, kDllEntryPoint, entry_point);
371 E : }
372 :
373 : bool GetTlsInitializers(BlockGraph::Block* dos_header_block,
374 E : EntryPointSet* entry_points) {
375 E : DCHECK(dos_header_block != NULL);
376 E : DCHECK(entry_points != NULL);
377 :
378 : BlockGraph::Block* nt_headers_block =
379 E : pe::GetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
380 :
381 E : TypedBlock<IMAGE_NT_HEADERS> nt_headers;
382 E : if (nt_headers_block == NULL || !nt_headers.Init(0, nt_headers_block)) {
383 i : LOG(ERROR) << "Unable to retrieve NT Headers.";
384 i : return false;
385 : }
386 :
387 : // If the module has no TLS directory then there are no TLS initializers
388 : // and hence nothing to do.
389 : const IMAGE_DATA_DIRECTORY& data_dir =
390 E : nt_headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_TLS];
391 E : if (data_dir.Size == 0 || !nt_headers.HasReference(data_dir.VirtualAddress)) {
392 E : return true;
393 : }
394 :
395 : // Find the TLS directory.
396 E : TypedBlock<IMAGE_TLS_DIRECTORY> tls_dir;
397 E : if (!nt_headers.Dereference(data_dir.VirtualAddress, &tls_dir)) {
398 i : LOG(ERROR) << "Failed to cast TLS directory.";
399 i : return false;
400 : }
401 :
402 : // Get the TLS initializer callbacks. We manually lookup the reference
403 : // because it is an indirect reference, which can't be dereferenced by
404 : // TypedBlock.
405 : typedef BlockGraph::Block::ReferenceMap ReferenceMap;
406 : ReferenceMap::const_iterator callback_ref =
407 : tls_dir.block()->references().find(
408 E : tls_dir.OffsetOf(tls_dir->AddressOfCallBacks));
409 E : if (callback_ref == tls_dir.block()->references().end()) {
410 i : LOG(ERROR) << "Failed to locate TLS initializers.";
411 i : return false;
412 : }
413 :
414 : // Note each of the TLS entry points.
415 E : const BlockGraph::Block* callbacks_block = callback_ref->second.referenced();
416 E : const ReferenceMap& ref_map = callbacks_block->references();
417 E : ReferenceMap::const_iterator iter = ref_map.begin();
418 E : for (; iter != ref_map.end(); ++iter) {
419 E : const BlockGraph::Reference& ref = iter->second;
420 E : DCHECK(ref.size() == sizeof(core::AbsoluteAddress));
421 : entry_points->insert(
422 E : std::make_pair(ref.referenced(), ref.offset()));
423 E : }
424 :
425 E : return true;
426 E : }
427 :
428 : bool HasImportEntry(block_graph::BlockGraph::Block* header_block,
429 : const base::StringPiece& dll_name,
430 E : bool* has_import_entry) {
431 E : DCHECK(header_block != NULL);
432 E : DCHECK(dll_name != NULL);
433 E : DCHECK(!dll_name.empty());
434 E : DCHECK(has_import_entry != NULL);
435 :
436 E : *has_import_entry = false;
437 :
438 E : DosHeader dos_header;
439 E : NtHeaders nt_headers;
440 : if (!dos_header.Init(0, header_block) ||
441 E : !dos_header.Dereference(dos_header->e_lfanew, &nt_headers)) {
442 i : LOG(ERROR) << "Unable to cast image headers.";
443 i : return false;
444 : }
445 :
446 : BlockGraph::Block* image_import_descriptor_block;
447 : IMAGE_DATA_DIRECTORY* import_directory =
448 E : nt_headers->OptionalHeader.DataDirectory + IMAGE_DIRECTORY_ENTRY_IMPORT;
449 E : DCHECK(nt_headers.HasReference(import_directory->VirtualAddress));
450 :
451 E : ImageImportDescriptor image_import_descriptor;
452 : if (!nt_headers.Dereference(import_directory->VirtualAddress,
453 E : &image_import_descriptor)) {
454 : // This could happen if the image import descriptor array is empty, and
455 : // terminated by a *partial* null entry. However, we've not yet seen that.
456 i : LOG(ERROR) << "Failed to dereference Image Import Descriptor Array.";
457 i : return false;
458 : }
459 :
460 E : image_import_descriptor_block = image_import_descriptor.block();
461 :
462 E : ImageImportDescriptor iida;
463 E : if (!iida.Init(0, image_import_descriptor_block)) {
464 i : LOG(ERROR) << "Unable to cast Image Import Descriptor.";
465 i : return false;
466 : }
467 :
468 : // The array is NULL terminated with a potentially incomplete descriptor so
469 : // we can't use ElementCount - 1.
470 E : DCHECK_GT(image_import_descriptor_block->size(), 0U);
471 : size_t descriptor_count =
472 : (common::AlignUp(image_import_descriptor_block->size(),
473 : sizeof(IMAGE_IMPORT_DESCRIPTOR)) /
474 E : sizeof(IMAGE_IMPORT_DESCRIPTOR)) - 1;
475 :
476 E : for (size_t iida_index = 0; iida_index < descriptor_count; ++iida_index) {
477 E : String ref_dll_name;
478 E : if (!iida.Dereference(iida[iida_index].Name, &ref_dll_name)) {
479 i : LOG(ERROR) << "Unable to dereference DLL name.";
480 i : return false;
481 : }
482 :
483 E : size_t max_len = ref_dll_name.ElementCount();
484 : if (base::CompareCaseInsensitiveASCII(
485 : base::StringPiece(ref_dll_name->string,
486 : std::min(max_len, dll_name.size())),
487 E : dll_name.data()) == 0) {
488 E : *has_import_entry = true;
489 E : break;
490 : }
491 E : }
492 :
493 E : return true;
494 E : }
495 :
496 E : void RedirectReferences(const ReferenceMap& redirects) {
497 E : std::set<BlockGraph::Block*> visited_referred;
498 E : std::set<BlockGraph::Block*> visited_referrer;
499 :
500 : // Iterate over the original destinations. We'll redirect their referrers.
501 E : ReferenceMap::const_iterator dst_block_it = redirects.begin();
502 E : for (; dst_block_it != redirects.end(); ++dst_block_it) {
503 : // Process each referred block only once. We keep track of already visited
504 : // blocks because a block may occur multiple times in |redirects|.
505 E : BlockGraph::Block* referred = dst_block_it->first.first;
506 E : bool already_visited = !visited_referred.insert(referred).second;
507 E : if (already_visited)
508 E : continue;
509 :
510 : // Iterate over all their referrers.
511 E : BlockGraph::Block::ReferrerSet referrers = referred->referrers();
512 E : BlockGraph::Block::ReferrerSet::iterator referrer_it = referrers.begin();
513 E : for (; referrer_it != referrers.end(); ++referrer_it) {
514 : // Don't redirect references from PE parsed blocks. This actually ends up
515 : // redirecting the IAT entries as well in the worst case.
516 E : BlockGraph::Block* referrer = referrer_it->first;
517 E : if (referrer->attributes() & BlockGraph::PE_PARSED)
518 E : continue;
519 :
520 : // Process each referrer block only once.
521 E : already_visited = !visited_referrer.insert(referrer).second;
522 E : if (already_visited)
523 E : continue;
524 :
525 : // Iterate over all references originating from the referring block.
526 : BlockGraph::Block::ReferenceMap::const_iterator reference_it =
527 E : referrer->references().begin();
528 E : for (; reference_it != referrer->references().end(); ++reference_it) {
529 : // Look for an original destination to be redirected.
530 E : const BlockGraph::Reference& ref(reference_it->second);
531 E : ReferenceDest dest(std::make_pair(ref.referenced(), ref.offset()));
532 E : ReferenceMap::const_iterator it(redirects.find(dest));
533 E : if (it == redirects.end())
534 E : continue;
535 :
536 : // Perform the redirection, preserving the gap between the base and the
537 : // offset.
538 E : BlockGraph::Offset delta = ref.base() - ref.offset();
539 : BlockGraph::Reference new_reference(ref.type(),
540 : ref.size(),
541 : it->second.first,
542 : it->second.second,
543 E : it->second.second + delta);
544 E : referrer->SetReference(reference_it->first, new_reference);
545 E : }
546 E : }
547 E : }
548 E : }
549 :
550 : } // namespace pe
|