1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/decomposer.h"
16 :
17 : #include <cvconst.h>
18 : #include <algorithm>
19 :
20 : #include "base/bind.h"
21 : #include "base/file_path.h"
22 : #include "base/logging.h"
23 : #include "base/path_service.h"
24 : #include "base/string_util.h"
25 : #include "base/stringprintf.h"
26 : #include "base/utf_string_conversions.h"
27 : #include "base/memory/scoped_ptr.h"
28 : #include "base/win/scoped_bstr.h"
29 : #include "base/win/scoped_comptr.h"
30 : #include "sawbuck/common/com_utils.h"
31 : #include "sawbuck/sym_util/types.h"
32 : #include "syzygy/block_graph/block_util.h"
33 : #include "syzygy/block_graph/typed_block.h"
34 : #include "syzygy/core/disassembler_util.h"
35 : #include "syzygy/core/zstream.h"
36 : #include "syzygy/pdb/omap.h"
37 : #include "syzygy/pdb/pdb_byte_stream.h"
38 : #include "syzygy/pdb/pdb_util.h"
39 : #include "syzygy/pe/dia_util.h"
40 : #include "syzygy/pe/find.h"
41 : #include "syzygy/pe/metadata.h"
42 : #include "syzygy/pe/pdb_info.h"
43 : #include "syzygy/pe/pe_file_parser.h"
44 : #include "syzygy/pe/pe_utils.h"
45 : #include "syzygy/pe/serialization.h"
46 :
47 : namespace pe {
48 : namespace {
49 :
50 : using base::win::ScopedBstr;
51 : using base::win::ScopedComPtr;
52 : using block_graph::BlockGraph;
53 : using block_graph::ConstTypedBlock;
54 : using builder::Opt;
55 : using builder::Seq;
56 : using builder::Star;
57 : using core::AbsoluteAddress;
58 : using core::Disassembler;
59 : using core::RelativeAddress;
60 :
61 : typedef Disassembler::CallbackDirective CallbackDirective;
62 :
63 : const size_t kPointerSize = sizeof(AbsoluteAddress);
64 :
65 : // Converts from PdbFixup::Type to BlockGraph::ReferenceType.
66 : BlockGraph::ReferenceType PdbFixupTypeToReferenceType(
67 E : pdb::PdbFixup::Type type) {
68 E : switch (type) {
69 : case pdb::PdbFixup::TYPE_ABSOLUTE:
70 E : return BlockGraph::ABSOLUTE_REF;
71 :
72 : case pdb::PdbFixup::TYPE_RELATIVE:
73 E : return BlockGraph::RELATIVE_REF;
74 :
75 : case pdb::PdbFixup::TYPE_PC_RELATIVE:
76 E : return BlockGraph::PC_RELATIVE_REF;
77 :
78 : default:
79 i : NOTREACHED() << "Invalid PdbFixup::Type.";
80 : // The return type here is meaningless.
81 i : return BlockGraph::ABSOLUTE_REF;
82 : }
83 E : }
84 :
85 : // Adds a reference to the provided intermediate reference map. If one already
86 : // exists, will validate that they are consistent.
87 : bool AddReference(RelativeAddress src_addr,
88 : BlockGraph::ReferenceType type,
89 : BlockGraph::Size size,
90 : RelativeAddress dst_base,
91 : BlockGraph::Offset dst_offset,
92 E : Decomposer::IntermediateReferenceMap* references) {
93 E : DCHECK(references != NULL);
94 :
95 : // If we get an iterator to a reference and it has the same source address
96 : // then ensure that we are consistent with it.
97 : Decomposer::IntermediateReferenceMap::iterator it =
98 E : references->lower_bound(src_addr);
99 E : if (it != references->end() && it->first == src_addr) {
100 : if (type != it->second.type || size != it->second.size ||
101 E : dst_base != it->second.base || dst_offset != it->second.offset) {
102 i : LOG(ERROR) << "Trying to insert inconsistent and colliding intermediate "
103 : "references.";
104 i : return false;
105 : }
106 : }
107 :
108 E : Decomposer::IntermediateReference ref = { type,
109 E : size,
110 E : dst_base,
111 E : dst_offset };
112 :
113 : // Since we used lower_bound above, we can use it as a hint for the
114 : // insertion. This saves us from incurring the lookup cost twice.
115 E : references->insert(it, std::make_pair(src_addr, ref));
116 E : return true;
117 E : }
118 :
119 : // Validates the given reference against the given fixup map entry. If they
120 : // are consistent, marks the fixup as having been visited.
121 : bool ValidateReference(RelativeAddress src_addr,
122 : BlockGraph::ReferenceType type,
123 : BlockGraph::Size size,
124 E : Decomposer::FixupMap::iterator fixup_it) {
125 E : if (type != fixup_it->second.type || size != kPointerSize) {
126 i : LOG(ERROR) << "Reference at " << src_addr
127 : << " not consistent with corresponding fixup.";
128 i : return false;
129 : }
130 :
131 : // Mark this fixup as having been visited.
132 E : fixup_it->second.visited = true;
133 :
134 E : return true;
135 E : }
136 :
137 : enum ValidateOrAddReferenceMode {
138 : // Look for an existing fixup. If we find one, validate against it,
139 : // otherwise create a new intermediate reference.
140 : FIXUP_MAY_EXIST,
141 : // Compare against an existing fixup, bailing if there is none. Does not
142 : // create a new intermediate reference.
143 : FIXUP_MUST_EXIST,
144 : // Look for an existing fixup, and fail if one exists. Otherwise, create
145 : // a new intermediate reference.
146 : FIXUP_MUST_NOT_EXIST
147 : };
148 : bool ValidateOrAddReference(ValidateOrAddReferenceMode mode,
149 : RelativeAddress src_addr,
150 : BlockGraph::ReferenceType type,
151 : BlockGraph::Size size,
152 : RelativeAddress dst_base,
153 : BlockGraph::Offset dst_offset,
154 : Decomposer::FixupMap* fixup_map,
155 E : Decomposer::IntermediateReferenceMap* references) {
156 E : DCHECK(fixup_map != NULL);
157 E : DCHECK(references != NULL);
158 :
159 E : Decomposer::FixupMap::iterator it = fixup_map->find(src_addr);
160 :
161 E : switch (mode) {
162 : case FIXUP_MAY_EXIST: {
163 : if (it != fixup_map->end() &&
164 E : !ValidateReference(src_addr, type, size, it))
165 i : return false;
166 : return AddReference(src_addr, type, size, dst_base, dst_offset,
167 E : references);
168 : }
169 :
170 : case FIXUP_MUST_EXIST: {
171 E : if (it == fixup_map->end()) {
172 i : LOG(ERROR) << "Reference at " << src_addr << " has no matching fixup.";
173 i : return false;
174 : }
175 E : if (!ValidateReference(src_addr, type, size, it))
176 i : return false;
177 : // Do not create a new intermediate reference.
178 E : return true;
179 : }
180 :
181 : case FIXUP_MUST_NOT_EXIST: {
182 E : if (it != fixup_map->end()) {
183 i : LOG(ERROR) << "Reference at " << src_addr
184 : << " collides with an existing fixup.";
185 i : return false;
186 : }
187 : return AddReference(src_addr, type, size, dst_base, dst_offset,
188 E : references);
189 : }
190 :
191 : default: {
192 i : NOTREACHED() << "Invalid ValidateOrAddReferenceMode.";
193 i : return false;
194 : }
195 : }
196 E : }
197 :
198 E : bool GetSymTag(IDiaSymbol* symbol, DWORD* sym_tag) {
199 E : DCHECK(sym_tag != NULL);
200 E : *sym_tag = SymTagNull;
201 E : HRESULT hr = symbol->get_symTag(sym_tag);
202 E : if (hr != S_OK) {
203 i : LOG(ERROR) << "Error getting sym tag: " << com::LogHr(hr) << ".";
204 i : return false;
205 : }
206 E : return true;
207 E : }
208 :
209 E : bool GetTypeInfo(IDiaSymbol* symbol, size_t* length) {
210 E : DCHECK(symbol != NULL);
211 E : DCHECK(length != NULL);
212 :
213 E : *length = 0;
214 E : ScopedComPtr<IDiaSymbol> type;
215 E : HRESULT hr = symbol->get_type(type.Receive());
216 : // This happens if the symbol has no type information.
217 E : if (hr == S_FALSE)
218 E : return true;
219 E : if (hr != S_OK) {
220 i : LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
221 i : return false;
222 : }
223 :
224 E : ULONGLONG ull_length = 0;
225 E : hr = type->get_length(&ull_length);
226 E : if (hr != S_OK) {
227 i : LOG(ERROR) << "Failed to retrieve type length properties: "
228 : << com::LogHr(hr) << ".";
229 i : return false;
230 : }
231 E : *length = ull_length;
232 :
233 E : return true;
234 E : }
235 :
236 : enum SectionType {
237 : kSectionCode,
238 : kSectionData,
239 : kSectionUnknown
240 : };
241 :
242 E : SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
243 E : DCHECK(header != NULL);
244 E : if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
245 E : return kSectionCode;
246 E : if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
247 E : return kSectionData;
248 i : return kSectionUnknown;
249 E : }
250 :
251 E : bool IsSymTag(IDiaSymbol* symbol, DWORD expected_sym_tag) {
252 E : DWORD sym_tag = SymTagNull;
253 E : if (!GetSymTag(symbol, &sym_tag))
254 i : return false;
255 :
256 E : return sym_tag == expected_sym_tag;
257 E : }
258 :
259 E : size_t GuessAddressAlignment(RelativeAddress address) {
260 : // Count the trailing zeros in the original address. We only care
261 : // about alignment up to 16, so only have to check the first 4 bits.
262 : // TODO(chrisha): This can be done quite efficiently using various bit
263 : // twiddling tricks, and there may very well be a library implementation
264 : // of this somewhere (typically named ctz for 'count training zeros').
265 E : size_t i = address.value();
266 E : if ((i & ((1 << 4) - 1)) == 0)
267 E : return (1 << 4); // 16.
268 :
269 E : if ((i & ((1 << 3) - 1)) == 0)
270 E : return (1 << 3); // 8.
271 :
272 E : if ((i & ((1 << 2) - 1)) == 0)
273 E : return (1 << 2); // 4.
274 :
275 E : if ((i & ((1 << 1) - 1)) == 0)
276 E : return (1 << 1); // 2.
277 :
278 E : return 1;
279 E : }
280 :
281 E : void GuessDataBlockAlignment(BlockGraph::Block* block) {
282 E : DCHECK(block != NULL);
283 E : block->set_alignment(GuessAddressAlignment(block->addr()));
284 E : }
285 :
286 : bool AreMatchedBlockAndLabelAttributes(
287 : BlockGraph::BlockType bt,
288 : BlockGraph::LabelAttributes la) {
289 : return (bt == BlockGraph::CODE_BLOCK && (la & BlockGraph::CODE_LABEL) != 0) ||
290 : (bt == BlockGraph::DATA_BLOCK && (la & BlockGraph::DATA_LABEL) != 0);
291 : }
292 :
293 E : BlockGraph::LabelAttributes SymTagToLabelAttributes(enum SymTagEnum sym_tag) {
294 E : switch (sym_tag) {
295 : case SymTagData:
296 E : return BlockGraph::DATA_LABEL;
297 : case SymTagLabel:
298 E : return BlockGraph::CODE_LABEL;
299 : case SymTagFuncDebugStart:
300 E : return BlockGraph::DEBUG_START_LABEL;
301 : case SymTagFuncDebugEnd:
302 E : return BlockGraph::DEBUG_END_LABEL;
303 : case SymTagBlock:
304 E : return BlockGraph::SCOPE_START_LABEL;
305 : #if _MSC_VER >= 1600
306 : // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
307 : case SymTagCallSite:
308 : return BlockGraph::CALL_SITE_LABEL;
309 : #endif
310 : }
311 :
312 i : NOTREACHED();
313 i : return 0;
314 E : }
315 :
316 : bool AddLabelToBlock(RelativeAddress addr,
317 : const base::StringPiece& name,
318 : BlockGraph::LabelAttributes label_attributes,
319 E : BlockGraph::Block* block) {
320 E : DCHECK(block != NULL);
321 E : DCHECK_LE(block->addr(), addr);
322 E : DCHECK_GT(block->addr() + block->size(), addr);
323 :
324 E : BlockGraph::Offset offset = addr - block->addr();
325 :
326 : // Try to create the label.
327 E : if (block->SetLabel(offset, name, label_attributes)) {
328 : // If there was no label at offset 0, then this block has not yet been
329 : // renamed, and still has its section contribution as a name. Update it to
330 : // the first symbol we get for it. We parse symbols from most useful
331 : // (undecorated function names) to least useful (mangled public symbols), so
332 : // this ensures a block has the most useful name.
333 E : if (offset == 0)
334 E : block->set_name(name);
335 :
336 E : return true;
337 : }
338 :
339 : // If we get here there's an already existing label. Update it.
340 E : BlockGraph::Label label;
341 E : CHECK(block->GetLabel(offset, &label));
342 :
343 : // It is conceivable that there could be more than one scope with either the
344 : // same beginning or the same ending. However, this doesn't appear to happen
345 : // in any version of Chrome up to 20. We add this check so that we'd at least
346 : // be made aware of the situation. (We don't rely on these labels, so we
347 : // merely output a warning rather than an error.)
348 : {
349 : const BlockGraph::LabelAttributes kScopeAttributes =
350 : BlockGraph::SCOPE_START_LABEL |
351 E : BlockGraph::SCOPE_END_LABEL;
352 : BlockGraph::LabelAttributes scope_attributes =
353 E : label_attributes & kScopeAttributes;
354 E : if (scope_attributes != 0) {
355 E : if (label.has_any_attributes(scope_attributes)) {
356 i : LOG(WARNING) << "Detected colliding scope labels at offset "
357 : << offset << " of block \"" << block->name() << "\".";
358 : }
359 : }
360 : }
361 :
362 : // Merge the names if this isn't a repeated name.
363 E : std::string new_name = label.name();
364 E : if (new_name.find(name.data()) == new_name.npos) {
365 E : new_name.append(", ");
366 E : name.AppendToString(&new_name);
367 : }
368 :
369 : // Merge the attributes.
370 : BlockGraph::LabelAttributes new_label_attr = label.attributes() |
371 E : label_attributes;
372 E : if (!BlockGraph::Label::AreValidAttributes(new_label_attr)) {
373 : // It's not clear which attributes should be the winner here, so we log an
374 : // error.
375 i : LOG(ERROR) << "Trying to merge conflicting label attributes \""
376 : << BlockGraph::LabelAttributesToString(label_attributes)
377 : << "\" for label \"" << label.ToString() << "\" at offset "
378 : << offset << " of block \"" << block->name() << "\".";
379 i : return false;
380 : }
381 :
382 : // Update the label.
383 E : label = BlockGraph::Label(new_name, new_label_attr);
384 E : CHECK(block->RemoveLabel(offset));
385 E : CHECK(block->SetLabel(offset, label));
386 :
387 E : return true;
388 E : }
389 :
390 : // The MS linker pads between code blocks with int3s.
391 : static const uint8 kInt3 = 0xCC;
392 :
393 : // If the given run of bytes consists of a single value repeated, returns that
394 : // value. Otherwise, returns -1.
395 E : int RepeatedValue(const uint8* data, size_t size) {
396 E : DCHECK(data != NULL);
397 E : const uint8* data_end = data + size;
398 E : uint8 value = *(data++);
399 E : for (; data < data_end; ++data) {
400 E : if (*data != value)
401 i : return -1;
402 E : }
403 E : return value;
404 E : }
405 :
406 : const BlockGraph::BlockId kNullBlockId(-1);
407 :
408 : void GetDisassemblyStartingPoints(
409 : const BlockGraph::Block* block,
410 : AbsoluteAddress abs_block_addr,
411 : const PEFile::RelocSet& reloc_set,
412 E : Disassembler::AddressSet* addresses) {
413 E : DCHECK(block != NULL);
414 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
415 E : DCHECK(addresses != NULL);
416 :
417 E : addresses->clear();
418 :
419 : // Use code labels as starting points.
420 E : BlockGraph::Block::LabelMap::const_iterator it(block->labels().begin());
421 E : for (; it != block->labels().end(); ++it) {
422 E : BlockGraph::Offset offset = it->first;
423 E : DCHECK_LE(0, offset);
424 E : DCHECK_GT(block->size(), static_cast<size_t>(offset));
425 :
426 E : if (it->second.has_attributes(BlockGraph::CODE_LABEL)) {
427 : // We sometimes receive code labels that land on lookup tables; we can
428 : // detect these because the label will point directly to a reloc. These
429 : // should have already been marked as data by now. DCHECK to validate.
430 : // TODO(chrisha): Get rid of this DCHECK, and allow mixed CODE and DATA
431 : // labels. Simply only use ones that are DATA only.
432 E : DCHECK_EQ(0u, reloc_set.count(block->addr() + offset));
433 :
434 E : addresses->insert(abs_block_addr + offset);
435 : }
436 E : }
437 E : }
438 :
439 : // Determines if the provided code block has the expected layout of code first,
440 : // data second. Returns true if so, false otherwise. Also returns the size of
441 : // the code portion of the block by trimming off any data labels.
442 : bool BlockHasExpectedCodeDataLayout(const BlockGraph::Block* block,
443 E : size_t* code_size) {
444 E : DCHECK(block != NULL);
445 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
446 E : DCHECK(code_size != NULL);
447 :
448 E : *code_size = block->data_size();
449 :
450 : BlockGraph::Block::LabelMap::const_reverse_iterator label_it =
451 E : block->labels().rbegin();
452 : BlockGraph::Block::LabelMap::const_reverse_iterator label_end =
453 E : block->labels().rend();
454 :
455 E : bool seen_non_data = false;
456 :
457 : // Walk through the labels in reverse order (by decreasing offset). Trim
458 : // any data labels from this blocks data_size.
459 E : for (; label_it != label_end; ++label_it) {
460 E : if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) {
461 : // We've encountered data not strictly at the end of the block. This
462 : // violates assumptions about code generated by cl.exe.
463 E : if (seen_non_data)
464 E : return false;
465 :
466 : // Otherwise, we're still in a run of data labels at the tail of the
467 : // block. Keep trimming the code size.
468 E : size_t offset = static_cast<size_t>(label_it->first);
469 E : if (offset < *code_size)
470 E : *code_size = offset;
471 E : } else {
472 E : seen_non_data = true;
473 : }
474 E : }
475 :
476 E : return true;
477 E : }
478 :
479 : // Given a compiland, returns its compiland details.
480 : bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
481 E : IDiaSymbol** compiland_details) {
482 E : DCHECK(compiland != NULL);
483 E : DCHECK(compiland_details != NULL);
484 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
485 :
486 E : *compiland_details = NULL;
487 :
488 : // Get the enumeration of compiland details.
489 E : ScopedComPtr<IDiaEnumSymbols> enum_symbols;
490 : HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
491 E : enum_symbols.Receive());
492 E : DCHECK_EQ(S_OK, hr);
493 :
494 : // We expect there to be compiland details. For compilands built by
495 : // non-standard toolchains, there usually aren't any.
496 E : LONG count = 0;
497 E : hr = enum_symbols->get_Count(&count);
498 E : DCHECK_EQ(S_OK, hr);
499 E : if (count == 0)
500 i : return false;
501 :
502 : // Get the compiland details.
503 E : ULONG fetched = 0;
504 E : hr = enum_symbols->Next(1, compiland_details, &fetched);
505 E : DCHECK_EQ(S_OK, hr);
506 E : DCHECK_EQ(1u, fetched);
507 E : return true;
508 E : }
509 :
510 : // Stores information regarding known compilers.
511 : struct KnownCompilerInfo {
512 : wchar_t* compiler_name;
513 : bool supported;
514 : };
515 :
516 : // A list of known compilers, and their status as being supported or not.
517 : KnownCompilerInfo kKnownCompilerInfos[] = {
518 : { L"Microsoft (R) Macro Assembler", false },
519 : { L"Microsoft (R) Optimizing Compiler", true },
520 : { L"Microsoft (R) LINK", false }
521 : };
522 :
523 : // Given a compiland, determines whether the compiler used is one of those that
524 : // we whitelist.
525 E : bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
526 E : DCHECK(compiland != NULL);
527 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
528 :
529 E : ScopedComPtr<IDiaSymbol> compiland_details;
530 : if (!GetCompilandDetailsForCompiland(compiland,
531 E : compiland_details.Receive())) {
532 : // If the compiland has no compiland details we assume the compiler is not
533 : // supported.
534 i : ScopedBstr compiland_name;
535 i : if (compiland->get_name(compiland_name.Receive()) == S_OK) {
536 i : VLOG(1) << "Compiland has no compiland details: "
537 : << com::ToString(compiland_name);
538 : }
539 i : return false;
540 : }
541 E : DCHECK(compiland_details.get() != NULL);
542 :
543 : // Get the compiler name.
544 E : ScopedBstr compiler_name;
545 E : HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
546 E : DCHECK_EQ(S_OK, hr);
547 :
548 : // Check the compiler name against the list of known compilers.
549 E : for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
550 E : if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
551 E : return kKnownCompilerInfos[i].supported;
552 : }
553 E : }
554 :
555 : // Anything we don't explicitly know about is not supported.
556 i : VLOG(1) << "Encountered unknown compiler: " << compiler_name;
557 i : return false;
558 E : }
559 :
560 : // Logs an error if @p error is true, a verbose logging message otherwise.
561 : #define LOG_ERROR_OR_VLOG1(error) LAZY_STREAM( \
562 : ::logging::LogMessage(__FILE__, \
563 : __LINE__, \
564 : (error) ? ::logging::LOG_ERROR : -1).stream(), \
565 : (error ? LOG_IS_ON(ERROR) : VLOG_IS_ON(1)))
566 :
567 : // Logs a warning if @p warn is true, a verbose logging message otherwise.
568 : #define LOG_WARNING_OR_VLOG1(warn) LAZY_STREAM( \
569 : ::logging::LogMessage(__FILE__, \
570 : __LINE__, \
571 : (warn) ? ::logging::LOG_WARNING : -1).stream(), \
572 : (warn ? LOG_IS_ON(WARNING) : VLOG_IS_ON(1)))
573 :
574 : // Sets the disassembler directive to an error if @p strict is true, otherwise
575 : // sets it to an early termination.
576 E : CallbackDirective AbortOrTerminateDisassembly(bool strict) {
577 E : if (strict)
578 i : return Disassembler::kDirectiveAbort;
579 : else
580 E : return Disassembler::kDirectiveTerminateWalk;
581 E : }
582 :
583 : // Returns true if the callback-directive is an early termination that should be
584 : // returned immediately.
585 E : bool IsFatalCallbackDirective(CallbackDirective directive) {
586 E : switch (directive) {
587 : case Disassembler::kDirectiveContinue:
588 : case Disassembler::kDirectiveTerminatePath:
589 E : return false;
590 :
591 : case Disassembler::kDirectiveTerminateWalk:
592 : case Disassembler::kDirectiveAbort:
593 i : return true;
594 :
595 : default:
596 i : NOTREACHED();
597 : }
598 :
599 i : return true;
600 E : }
601 :
602 : // Combines two callback directives. Higher codes supersede lower ones.
603 : CallbackDirective CombineCallbackDirectives(CallbackDirective d1,
604 E : CallbackDirective d2) {
605 : // This ensures that this logic remains valid. This should prevent people
606 : // from tinkering with CallbackDirective and breaking this code.
607 : COMPILE_ASSERT(Disassembler::kDirectiveContinue <
608 : Disassembler::kDirectiveTerminatePath &&
609 : Disassembler::kDirectiveTerminatePath <
610 : Disassembler::kDirectiveTerminateWalk &&
611 : Disassembler::kDirectiveTerminateWalk <
612 : Disassembler::kDirectiveAbort,
613 : callback_directive_enum_is_not_sorted);
614 E : return std::max(d1, d2);
615 E : }
616 :
617 : // Determines if the given block has a data label in the given range of bytes.
618 : bool HasDataLabelInRange(const BlockGraph::Block* block,
619 : BlockGraph::Offset offset,
620 E : BlockGraph::Size size) {
621 : BlockGraph::Block::LabelMap::const_iterator it =
622 E : block->labels().lower_bound(offset);
623 : BlockGraph::Block::LabelMap::const_iterator end =
624 E : block->labels().lower_bound(offset + size);
625 :
626 E : for (; it != end; ++it) {
627 i : if (it->second.has_attributes(BlockGraph::DATA_LABEL))
628 i : return true;
629 i : }
630 :
631 E : return false;
632 E : }
633 :
634 : void ReportPotentialNonReturningFunction(
635 : const Decomposer::IntermediateReferenceMap& refs,
636 : const BlockGraph::AddressSpace& image,
637 : const BlockGraph::Block* block,
638 : BlockGraph::Offset call_ref_offset,
639 i : const char* reason) {
640 : typedef Decomposer::IntermediateReferenceMap::const_iterator RefIter;
641 :
642 : // Try and track down the block being pointed at by the call. If this is a
643 : // computed address there will be no reference.
644 i : RefIter ref_it = refs.find(block->addr() + call_ref_offset);
645 i : if (ref_it == refs.end()) {
646 i : LOG(WARNING) << "Suspected non-returning function call from offset "
647 : << call_ref_offset << " (followed by " << reason
648 : << ") of block \"" << block->name()
649 : << "\", but target can not be tracked down.";
650 i : return;
651 : }
652 :
653 i : BlockGraph::Block* target = image.GetBlockByAddress(ref_it->second.base);
654 i : DCHECK(target != NULL);
655 :
656 : // If this was marked as non-returning, then its not suspicious.
657 i : if ((target->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
658 i : return;
659 :
660 : // If the target is a code block then this is a direct call.
661 i : if (target->type() == BlockGraph::CODE_BLOCK) {
662 i : LOG(WARNING) << "Suspected non-returning call from offset "
663 : << call_ref_offset << " (followed by " << reason
664 : << ") of block \"" << block->name() << "\" to code block \""
665 : << target->name() << "\".";
666 i : return;
667 : }
668 : // Otherwise the target is a data block and this is a memory indirect call
669 : // to a thunk.
670 i : DCHECK_EQ(BlockGraph::DATA_BLOCK, target->type());
671 :
672 : // Track down the import thunk.
673 i : RefIter thunk_ref_it = refs.find(ref_it->second.base);
674 i : DCHECK(thunk_ref_it != refs.end());
675 i : BlockGraph::Block* thunk = image.GetBlockByAddress(thunk_ref_it->second.base);
676 :
677 : // If this was marked as non-returning, then its not suspicious.
678 i : if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
679 i : return;
680 :
681 : // Otherwise, this is an import thunk. Get the module and symbol names.
682 i : LOG(WARNING) << "Suspected non-returning call from offset "
683 : << call_ref_offset << " (followed by " << reason
684 : << ") of block \"" << block->name() << "\" to import thunk \""
685 : << thunk->name() << "\".";
686 i : }
687 :
688 : void LookForNonReturningFunctions(
689 : const Decomposer::IntermediateReferenceMap& refs,
690 : const BlockGraph::AddressSpace& image,
691 : const BlockGraph::Block* block,
692 E : const Disassembler& disasm) {
693 E : bool saw_call = false;
694 E : bool saw_call_then_nop = false;
695 E : BlockGraph::Offset call_ref_offset = 0;
696 :
697 E : AbsoluteAddress end_of_last_inst;
698 : Disassembler::VisitedSpace::const_iterator inst_it =
699 E : disasm.visited().begin();
700 E : for (; inst_it != disasm.visited().end(); ++inst_it) {
701 : // Not contiguous with the last instruction? Then we're spanning a gap. If
702 : // it's an instruction then we didn't parse it; thus, we already know that
703 : // if the last instruction is a call it's to a non-returning function. So,
704 : // we only need to check for data.
705 E : if (inst_it->first.start() != end_of_last_inst) {
706 E : if (saw_call || saw_call_then_nop) {
707 E : BlockGraph::Offset offset = end_of_last_inst - disasm.code_addr();
708 E : BlockGraph::Size size = inst_it->first.start() - end_of_last_inst;
709 E : if (HasDataLabelInRange(block, offset, size))
710 : // We do not expect this to ever occur in cl.exe generated code.
711 : // However, it is entirely possible in hand-written assembly.
712 : ReportPotentialNonReturningFunction(
713 : refs, image, block, call_ref_offset,
714 i : saw_call ? "data" : "nop(s) and data");
715 : }
716 :
717 E : saw_call = false;
718 E : saw_call_then_nop = false;
719 : }
720 :
721 E : _DInst inst = { 0 };
722 E : BlockGraph::Offset offset = inst_it->first.start() - disasm.code_addr();
723 E : const uint8* code = disasm.code() + offset;
724 E : CHECK(core::DecodeOneInstruction(code, inst_it->first.size(), &inst));
725 :
726 : // Previous instruction was a call?
727 E : if (saw_call) {
728 E : if (core::IsNop(inst)) {
729 i : saw_call_then_nop = true;
730 E : } else if (core::IsDebugInterrupt(inst)) {
731 : ReportPotentialNonReturningFunction(
732 i : refs, image, block, call_ref_offset, "int3");
733 : }
734 E : saw_call = false;
735 E : } else if (saw_call_then_nop) {
736 : // The previous instructions we've seen have been a call followed by
737 : // arbitrary many nops. Look for another nop to continue the pattern.
738 i : saw_call_then_nop = core::IsNop(inst);
739 i : } else {
740 : // The previous instruction was not a call, so we're looking for one.
741 : // If this instruction is a call, remember that fact and also remember
742 : // the offset of its operand (the call target).
743 E : if (core::IsCall(inst)) {
744 E : saw_call = true;
745 : call_ref_offset = offset + inst_it->first.size() -
746 E : BlockGraph::Reference::kMaximumSize;
747 : }
748 : }
749 :
750 : // Remember the end of the last instruction we processed.
751 E : end_of_last_inst = inst_it->first.end();
752 E : }
753 :
754 : // If the last instruction was a call and we've marked that we've disassembled
755 : // past the end, then this is also a suspected non-returning function.
756 : if ((saw_call || saw_call_then_nop) &&
757 E : (block->attributes() & BlockGraph::DISASSEMBLED_PAST_END) != 0) {
758 i : const char* reason = saw_call ? "end of block" : "nop(s) and end of block";
759 : ReportPotentialNonReturningFunction(
760 i : refs, image, block, call_ref_offset, reason);
761 : }
762 E : }
763 :
764 : } // namespace
765 :
766 : Decomposer::Decomposer(const PEFile& image_file)
767 : : image_(NULL),
768 : image_file_(image_file),
769 : current_block_(NULL),
770 E : be_strict_with_current_block_(true) {
771 : // Register static initializer patterns that we know are always present.
772 : // CRT C/C++/etc initializers.
773 E : CHECK(RegisterStaticInitializerPatterns("(__x.*)_a", "(__x.*)_z"));
774 : // RTC (run-time checks) initializers (part of CRT).
775 E : CHECK(RegisterStaticInitializerPatterns("(__rtc_[it])aa", "(__rtc_[it])zz"));
776 : // ATL object map initializers.
777 : CHECK(RegisterStaticInitializerPatterns("(__pobjMapEntry)First",
778 E : "(__pobjMapEntry)Last"));
779 : // Thread-local storage template.
780 E : CHECK(RegisterStaticInitializerPatterns("(_tls_)start", "(_tls_)end"));
781 :
782 : // Register non-returning functions that for some reason the symbols lie to
783 : // us about.
784 E : CHECK(RegisterNonReturningFunction("_CxxThrowException"));
785 E : CHECK(RegisterNonReturningFunction("_longjmp"));
786 :
787 : // Register non-returning imports that we know about.
788 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitProcess"));
789 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitThread"));
790 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "RaiseException"));
791 E : }
792 :
793 E : bool Decomposer::Decompose(ImageLayout* image_layout) {
794 : // We start by finding the PDB path.
795 E : if (!FindAndValidatePdbPath())
796 E : return false;
797 E : DCHECK(!pdb_path_.empty());
798 :
799 : // Check if the block-graph has already been serialized into the PDB and load
800 : // it from here in this case. This allows round-trip decomposition.
801 E : bool stream_exists = false;
802 : if (LoadBlockGraphFromPdb(pdb_path_, image_file_, image_layout,
803 E : &stream_exists)) {
804 E : return true;
805 : } else {
806 : // If the stream exists but hasn't been loaded we return an error. At this
807 : // point an error message has already been logged if there was one.
808 E : if (stream_exists)
809 i : return false;
810 : }
811 :
812 : // Move on to instantiating and initializing our Debug Interface Access
813 : // session.
814 E : ScopedComPtr<IDiaDataSource> dia_source;
815 E : if (!CreateDiaSource(dia_source.Receive()))
816 i : return false;
817 :
818 : // We create the session using the PDB file directly, as we've already
819 : // validated that it matches the module.
820 E : ScopedComPtr<IDiaSession> dia_session;
821 : if (!CreateDiaSession(pdb_path_,
822 : dia_source.get(),
823 E : dia_session.Receive())) {
824 i : return false;
825 : }
826 :
827 : HRESULT hr = dia_session->put_loadAddress(
828 E : image_file_.nt_headers()->OptionalHeader.ImageBase);
829 E : if (hr != S_OK) {
830 i : LOG(ERROR) << "Failed to set the DIA load address: "
831 : << com::LogHr(hr) << ".";
832 i : return false;
833 : }
834 :
835 E : ScopedComPtr<IDiaSymbol> global;
836 E : hr = dia_session->get_globalScope(global.Receive());
837 E : if (hr != S_OK) {
838 i : LOG(ERROR) << "Failed to get the DIA global scope: "
839 : << com::LogHr(hr) << ".";
840 i : return false;
841 : }
842 :
843 E : image_ = &image_layout->blocks;
844 :
845 : // Create the sections for the image.
846 E : bool success = CreateSections();
847 :
848 : // Load FIXUP information from the PDB file. We do this early on so that we
849 : // can do accounting with references that are created later on.
850 E : if (success)
851 E : success = LoadDebugStreams(dia_session);
852 :
853 : // Create intermediate references for each fixup entry.
854 E : if (success)
855 E : success = CreateReferencesFromFixups();
856 :
857 : // Chunk out important PE image structures, like the headers and such.
858 E : PEFileParser::PEHeader header;
859 E : if (success)
860 E : success = CreatePEImageBlocksAndReferences(&header);
861 :
862 : // Parse and validate the relocation entries.
863 E : if (success)
864 E : success = ParseRelocs();
865 :
866 : // Our first round of parsing is using section contributions. This creates
867 : // both code and data blocks.
868 E : if (success)
869 E : success = CreateBlocksFromSectionContribs(dia_session);
870 :
871 : // Process the function and thunk symbols in the image. This does not create
872 : // any blocks, as all functions are covered by section contributions.
873 E : if (success)
874 E : success = ProcessCodeSymbols(global);
875 :
876 : // Process data symbols. This can cause the creation of some blocks as the
877 : // data sections are not fully covered by section contributions.
878 E : if (success)
879 E : success = ProcessDataSymbols(global);
880 :
881 : // Create labels in code blocks.
882 E : if (success)
883 E : success = CreateGlobalLabels(global);
884 :
885 : // Create gap blocks. This ensures that we have complete coverage of the
886 : // entire image.
887 E : if (success)
888 E : success = CreateGapBlocks();
889 :
890 : // Parse public symbols, augmenting code and data labels where possible.
891 : // Some public symbols land on gap blocks, so they need to have been parsed
892 : // already.
893 E : if (success)
894 E : success = ProcessPublicSymbols(global);
895 :
896 : // Parse initialization bracketing symbols. This needs to happen after
897 : // PublicSymbols have been parsed.
898 E : if (success)
899 E : success = ProcessStaticInitializers();
900 :
901 : // We know that some data blocks need to have alignment precisely preserved.
902 : // For now, we very conservatively (guaranteed to be correct, but causes many
903 : // blocks to be aligned that don't strictly need alignment) guess alignment
904 : // for each block. This must be run after static initializers have been
905 : // parsed.
906 E : if (success)
907 E : success = GuessDataBlockAlignments();
908 :
909 : // Disassemble code blocks and create PC-relative references
910 E : if (success)
911 E : success = CreateCodeReferences();
912 :
913 : // Turn the address->address format references we've created into
914 : // block->block references on the blocks in the image.
915 E : if (success)
916 E : success = FinalizeIntermediateReferences();
917 :
918 : // Everything called after this points requires the references to have been
919 : // finalized.
920 :
921 : // One way of ensuring full coverage is to check that all of the fixups
922 : // were visited during decomposition.
923 E : if (success)
924 E : success = ConfirmFixupsVisited();
925 :
926 : // Now, find and label any padding blocks.
927 E : if (success)
928 E : success = FindPaddingBlocks();
929 :
930 : // Finally, copy the image headers over to the layout.
931 E : if (success)
932 E : success = CopyHeaderToImageLayout(header.nt_headers, image_layout);
933 :
934 E : image_ = NULL;
935 :
936 E : return success;
937 E : }
938 :
939 E : bool Decomposer::FindAndValidatePdbPath() {
940 : // Manually find the PDB path if it is not specified.
941 E : if (pdb_path_.empty()) {
942 : if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
943 E : pdb_path_.empty()) {
944 i : LOG(ERROR) << "Unable to find PDB file for module: "
945 : << image_file_.path().value();
946 i : return false;
947 : }
948 : }
949 E : DCHECK(!pdb_path_.empty());
950 :
951 E : if (!file_util::PathExists(pdb_path_)) {
952 E : LOG(ERROR) << "Path not found: " << pdb_path_.value();
953 E : return false;
954 : }
955 :
956 : // Get the PDB info from the PDB file.
957 : pdb::PdbInfoHeader70 pdb_info_header;
958 E : if (!pdb::ReadPdbHeader(pdb_path_, &pdb_info_header)) {
959 i : LOG(ERROR) << "Unable to read PDB info header from PDB file: "
960 : << pdb_path_.value();
961 i : return false;
962 : }
963 :
964 : // Get the PDB info from the module.
965 E : PdbInfo pdb_info;
966 E : if (!pdb_info.Init(image_file_)) {
967 i : LOG(ERROR) << "Unable to read PDB info from PE file: "
968 : << image_file_.path().value();
969 i : return false;
970 : }
971 :
972 : // Ensure that they are consistent.
973 E : if (!pdb_info.IsConsistent(pdb_info_header)) {
974 i : LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
975 : << "module \"" << image_file_.path().value() << "\".";
976 i : return false;
977 : }
978 :
979 E : return true;
980 E : }
981 :
982 E : bool Decomposer::ProcessCodeSymbols(IDiaSymbol* global) {
983 E : if (!ProcessFunctionSymbols(global))
984 i : return false;
985 E : if (!ProcessThunkSymbols(global))
986 i : return false;
987 :
988 E : return true;
989 E : }
990 :
991 E : bool Decomposer::ProcessFunctionSymbols(IDiaSymbol* global) {
992 E : DCHECK(IsSymTag(global, SymTagExe));
993 :
994 : // Otherwise enumerate its offspring.
995 E : ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
996 : HRESULT hr = global->findChildren(SymTagFunction,
997 : NULL,
998 : nsNone,
999 E : dia_enum_symbols.Receive());
1000 E : if (hr != S_OK) {
1001 i : LOG(ERROR) << "Failed to get the DIA function enumerator: "
1002 : << com::LogHr(hr) << ".";
1003 i : return false;
1004 : }
1005 :
1006 E : LONG count = 0;
1007 E : if (dia_enum_symbols->get_Count(&count) != S_OK) {
1008 i : LOG(ERROR) << "Failed to get function enumeration length.";
1009 i : return false;
1010 : }
1011 :
1012 E : for (LONG visited = 0; visited < count; ++visited) {
1013 E : ScopedComPtr<IDiaSymbol> function;
1014 E : ULONG fetched = 0;
1015 E : hr = dia_enum_symbols->Next(1, function.Receive(), &fetched);
1016 E : if (hr != S_OK) {
1017 i : LOG(ERROR) << "Failed to enumerate functions: " << com::LogHr(hr) << ".";
1018 i : return false;
1019 : }
1020 E : if (fetched == 0)
1021 i : break;
1022 :
1023 : // Create the block representing the function.
1024 E : DCHECK(IsSymTag(function, SymTagFunction));
1025 E : if (!ProcessFunctionOrThunkSymbol(function))
1026 i : return false;
1027 E : }
1028 :
1029 E : return true;
1030 E : }
1031 :
1032 E : bool Decomposer::ProcessFunctionOrThunkSymbol(IDiaSymbol* function) {
1033 E : DCHECK(IsSymTag(function, SymTagFunction) || IsSymTag(function, SymTagThunk));
1034 :
1035 E : DWORD location_type = LocIsNull;
1036 E : HRESULT hr = E_FAIL;
1037 E : if (FAILED(hr = function->get_locationType(&location_type))) {
1038 i : LOG(ERROR) << "Failed to retrieve function address type: "
1039 : << com::LogHr(hr) << ".";
1040 i : return false;
1041 : }
1042 E : if (location_type != LocIsStatic) {
1043 i : DCHECK_EQ(static_cast<DWORD>(LocIsNull), location_type);
1044 i : return true;
1045 : }
1046 :
1047 E : DWORD rva = 0;
1048 E : ULONGLONG length = 0;
1049 E : ScopedBstr name;
1050 : if ((hr = function->get_relativeVirtualAddress(&rva)) != S_OK ||
1051 : (hr = function->get_length(&length)) != S_OK ||
1052 E : (hr = function->get_name(name.Receive())) != S_OK) {
1053 i : LOG(ERROR) << "Failed to retrieve function information: "
1054 : << com::LogHr(hr) << ".";
1055 i : return false;
1056 : }
1057 :
1058 : // Certain properties are not defined on all blocks, so the following calls
1059 : // may return S_FALSE.
1060 E : BOOL no_return = FALSE;
1061 E : if (function->get_noReturn(&no_return) != S_OK)
1062 E : no_return = FALSE;
1063 :
1064 E : BOOL has_inl_asm = FALSE;
1065 E : if (function->get_hasInlAsm(&has_inl_asm) != S_OK)
1066 E : has_inl_asm = FALSE;
1067 :
1068 E : BOOL has_eh = FALSE;
1069 E : if (function->get_hasEH(&has_eh) != S_OK)
1070 E : has_eh = FALSE;
1071 :
1072 E : BOOL has_seh = FALSE;
1073 E : if (function->get_hasSEH(&has_seh) != S_OK)
1074 E : has_seh = FALSE;
1075 :
1076 E : std::string block_name;
1077 E : if (!WideToUTF8(name, name.Length(), &block_name)) {
1078 i : LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1079 i : return false;
1080 : }
1081 :
1082 : // Find the block to which this symbol maps, and ensure it fully covers the
1083 : // symbol.
1084 E : RelativeAddress block_addr(rva);
1085 E : BlockGraph::Block* block = image_->GetBlockByAddress(block_addr);
1086 E : if (block == NULL) {
1087 i : LOG(ERROR) << "No block found for function/thunk symbol \""
1088 : << block_name << "\".";
1089 i : return false;
1090 : }
1091 E : if (block->addr() + block->size() < block_addr + length) {
1092 i : LOG(ERROR) << "Section contribution \"" << block->name() << "\" does not "
1093 : << "fully cover function/thunk symbol \"" << block_name << "\".";
1094 i : return false;
1095 : }
1096 :
1097 : // Annotate the block with a label, as this is an entry point to it. This is
1098 : // the routine that adds labels, so there should never be any collisions.
1099 E : CHECK(AddLabelToBlock(block_addr, block_name, BlockGraph::CODE_LABEL, block));
1100 :
1101 : // If we didn't get an explicit no-return flag from the symbols check our
1102 : // list of exceptions.
1103 E : if (no_return == FALSE && non_returning_functions_.count(block->name()) > 0) {
1104 E : VLOG(1) << "Forcing non-returning attribute on function \""
1105 : << block->name() << "\".";
1106 E : no_return = TRUE;
1107 : }
1108 :
1109 : // Set the block attributes.
1110 E : if (no_return == TRUE)
1111 E : block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1112 E : if (has_inl_asm == TRUE)
1113 E : block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1114 E : if (has_eh || has_seh)
1115 E : block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1116 :
1117 E : if (!CreateLabelsForFunction(function, block)) {
1118 i : LOG(ERROR) << "Failed to create labels for '" << block->name() << "'.";
1119 i : return false;
1120 : }
1121 :
1122 E : return true;
1123 E : }
1124 :
1125 : bool Decomposer::CreateLabelsForFunction(IDiaSymbol* function,
1126 E : BlockGraph::Block* block) {
1127 E : DCHECK(function != NULL);
1128 E : DCHECK(block != NULL);
1129 :
1130 : // Lookup the block address.
1131 E : RelativeAddress block_addr;
1132 E : if (!image_->GetAddressOf(block, &block_addr)) {
1133 i : NOTREACHED() << "Block " << block->name() << " has no address.";
1134 i : return false;
1135 : }
1136 :
1137 : // Enumerate all symbols which are children of function.
1138 E : ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1139 : HRESULT hr = function->findChildren(SymTagNull,
1140 : NULL,
1141 : nsNone,
1142 E : dia_enum_symbols.Receive());
1143 E : if (FAILED(hr)) {
1144 i : LOG(ERROR) << "Failed to get the DIA label enumerator: "
1145 : << com::LogHr(hr) << ".";
1146 i : return false;
1147 : }
1148 :
1149 E : while (true) {
1150 E : ScopedComPtr<IDiaSymbol> symbol;
1151 E : ULONG fetched = 0;
1152 E : hr = dia_enum_symbols->Next(1, symbol.Receive(), &fetched);
1153 E : if (FAILED(hr)) {
1154 i : LOG(ERROR) << "Failed to enumerate the DIA symbol: "
1155 : << com::LogHr(hr) << ".";
1156 i : return false;
1157 : }
1158 E : if (hr != S_OK || fetched == 0)
1159 E : break;
1160 :
1161 : // If it doesn't have an RVA then it's not interesting to us.
1162 E : DWORD temp_rva = 0;
1163 E : if (symbol->get_relativeVirtualAddress(&temp_rva) != S_OK)
1164 E : continue;
1165 :
1166 : // Get the type of symbol we're looking at.
1167 E : DWORD temp_sym_tag = 0;
1168 E : if (symbol->get_symTag(&temp_sym_tag) != S_OK) {
1169 i : LOG(ERROR) << "Failed to retrieve label information.";
1170 i : return false;
1171 : }
1172 :
1173 E : enum SymTagEnum sym_tag = static_cast<enum SymTagEnum>(temp_sym_tag);
1174 E : BlockGraph::LabelAttributes label_attr = SymTagToLabelAttributes(sym_tag);
1175 :
1176 : // TODO(rogerm): Add a flag to include/exclude the symbol types that are
1177 : // interesting for debugging purposes, but not actually needed for
1178 : // decomposition: FuncDebugStart/End, Block, etc.
1179 :
1180 : // We ignore labels that fall outside of the code block. We sometimes
1181 : // get labels at the end of a code block, and if the binary has any OMAP
1182 : // information these follow the original successor block, and they can
1183 : // end up most anywhere in the binary.
1184 E : RelativeAddress label_rva(temp_rva);
1185 E : if (label_rva < block_addr || label_rva >= block_addr + block->size())
1186 E : continue;
1187 :
1188 : // Extract the symbol's name.
1189 E : std::string label_name;
1190 : {
1191 E : ScopedBstr temp_name;
1192 : if (symbol->get_name(temp_name.Receive()) == S_OK &&
1193 E : !WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1194 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1195 i : return false;
1196 : }
1197 E : }
1198 :
1199 : // Not all symbols have a name, if we've found one without a name, make
1200 : // one up.
1201 E : BlockGraph::Offset offset = label_rva - block_addr;
1202 E : if (label_name.empty()) {
1203 E : switch (sym_tag) {
1204 : case SymTagFuncDebugStart: {
1205 E : label_name = "<debug-start>";
1206 E : break;
1207 : }
1208 :
1209 : case SymTagFuncDebugEnd: {
1210 E : label_name = "<debug-end>";
1211 E : break;
1212 : }
1213 :
1214 : case SymTagData: {
1215 E : if (reloc_set_.count(label_rva)) {
1216 E : label_name = base::StringPrintf("<jump-table-%d>", offset);
1217 E : label_attr |= BlockGraph::JUMP_TABLE_LABEL;
1218 E : } else {
1219 E : label_name = base::StringPrintf("<case-table-%d>", offset);
1220 E : label_attr |= BlockGraph::CASE_TABLE_LABEL;
1221 : }
1222 E : break;
1223 : }
1224 :
1225 : case SymTagBlock: {
1226 E : label_name = "<scope-start>";
1227 E : break;
1228 : }
1229 :
1230 : #if _MSC_VER >= 1600
1231 : // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
1232 : case SymTagCallSite: {
1233 : label_name = "<call-site>";
1234 : break;
1235 : }
1236 : #endif
1237 :
1238 : default: {
1239 i : LOG(WARNING) << "Unexpected symbol type " << sym_tag << " in "
1240 : << block->name() << " at "
1241 : << base::StringPrintf("0x%08X.", label_rva.value());
1242 i : label_name = base::StringPrintf("<anonymous-%d>", sym_tag);
1243 : }
1244 : }
1245 : }
1246 :
1247 : // We expect that we'll never see a code label that refers to a reloc.
1248 : // This happens sometimes, however, as we generally get a code label for
1249 : // the first byte after a switch statement. This can sometimes land on the
1250 : // following jump table.
1251 E : if ((label_attr & BlockGraph::CODE_LABEL) && reloc_set_.count(label_rva)) {
1252 E : VLOG(1) << "Collision between reloc and code label in "
1253 : << block->name() << " at " << label_name
1254 : << base::StringPrintf(" (0x%08X).", label_rva.value())
1255 : << " Falling back to data label.";
1256 E : label_attr = BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL;
1257 E : DCHECK_EQ(block_addr, block->addr());
1258 E : BlockGraph::Label label;
1259 : if (block->GetLabel(offset, &label) &&
1260 E : !label.has_attributes(BlockGraph::DATA_LABEL)) {
1261 i : VLOG(1) << block->name() << ": Replacing label " << label.name()
1262 : << " ("
1263 : << BlockGraph::LabelAttributesToString(label.attributes())
1264 : << ") at offset " << offset << ".";
1265 i : block->RemoveLabel(offset);
1266 : }
1267 E : }
1268 :
1269 : // Add the label to the block.
1270 E : if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1271 i : LOG(ERROR) << "Failed to add label to code block.";
1272 i : return false;
1273 : }
1274 :
1275 : // Is this a scope? Then it also has a length. Use it to create the matching
1276 : // scope end.
1277 E : if (sym_tag == SymTagBlock) {
1278 E : ULONGLONG length = 0;
1279 E : if (symbol->get_length(&length) != S_OK) {
1280 i : LOG(ERROR) << "Failed to extract code scope length for "
1281 : << block->name();
1282 i : return false;
1283 : }
1284 E : label_rva += length;
1285 E : label_name = "<scope-end>";
1286 E : label_attr = BlockGraph::SCOPE_END_LABEL;
1287 E : if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1288 i : LOG(ERROR) << "Failed to add label to code block.";
1289 i : return false;
1290 : }
1291 : }
1292 E : }
1293 :
1294 E : return true;
1295 E : }
1296 :
1297 E : bool Decomposer::ProcessThunkSymbols(IDiaSymbol* globals) {
1298 E : ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1299 : HRESULT hr = globals->findChildren(SymTagCompiland,
1300 : NULL,
1301 : nsNone,
1302 E : enum_compilands.Receive());
1303 E : if (FAILED(hr)) {
1304 i : LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1305 : << com::LogHr(hr) << ".";
1306 i : return false;
1307 : }
1308 :
1309 E : while (true) {
1310 E : ScopedComPtr<IDiaSymbol> compiland;
1311 E : ULONG fetched = 0;
1312 E : hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1313 E : if (FAILED(hr)) {
1314 i : LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1315 : << com::LogHr(hr) << ".";
1316 i : return false;
1317 : }
1318 E : if (hr != S_OK || fetched == 0)
1319 E : break;
1320 :
1321 E : ScopedComPtr<IDiaEnumSymbols> enum_thunks;
1322 : hr = compiland->findChildren(SymTagThunk,
1323 : NULL,
1324 : nsNone,
1325 E : enum_thunks.Receive());
1326 E : if (FAILED(hr)) {
1327 i : LOG(ERROR) << "Failed to retrieve thunk enumerator: "
1328 : << com::LogHr(hr) << ".";
1329 i : return false;
1330 : }
1331 :
1332 E : while (true) {
1333 E : ScopedComPtr<IDiaSymbol> thunk;
1334 E : hr = enum_thunks->Next(1, thunk.Receive(), &fetched);
1335 E : if (FAILED(hr)) {
1336 i : LOG(ERROR) << "Failed to enumerate thunk enumerator: "
1337 : << com::LogHr(hr) << ".";
1338 i : return false;
1339 : }
1340 E : if (hr != S_OK || fetched == 0)
1341 E : break;
1342 :
1343 :
1344 E : DCHECK(IsSymTag(thunk, SymTagThunk));
1345 :
1346 E : if (!ProcessFunctionOrThunkSymbol(thunk))
1347 i : return false;
1348 E : }
1349 E : }
1350 :
1351 E : return true;
1352 E : }
1353 :
1354 E : bool Decomposer::CreateGlobalLabels(IDiaSymbol* globals) {
1355 E : ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1356 : HRESULT hr = globals->findChildren(SymTagCompiland,
1357 : NULL,
1358 : nsNone,
1359 E : enum_compilands.Receive());
1360 E : if (FAILED(hr)) {
1361 i : LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1362 : << com::LogHr(hr) << ".";
1363 i : return false;
1364 : }
1365 :
1366 E : while (true) {
1367 E : ScopedComPtr<IDiaSymbol> compiland;
1368 E : ULONG fetched = 0;
1369 E : hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1370 E : if (FAILED(hr)) {
1371 i : LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1372 : << com::LogHr(hr) << ".";
1373 i : return false;
1374 : }
1375 E : if (hr != S_OK || fetched == 0)
1376 E : break;
1377 :
1378 E : ScopedComPtr<IDiaEnumSymbols> enum_labels;
1379 : hr = compiland->findChildren(SymTagLabel,
1380 : NULL,
1381 : nsNone,
1382 E : enum_labels.Receive());
1383 E : if (FAILED(hr)) {
1384 i : LOG(ERROR) << "Failed to retrieve label enumerator: "
1385 : << com::LogHr(hr) << ".";
1386 i : return false;
1387 : }
1388 :
1389 E : while (true) {
1390 E : ScopedComPtr<IDiaSymbol> label;
1391 E : hr = enum_labels->Next(1, label.Receive(), &fetched);
1392 E : if (FAILED(hr)) {
1393 i : LOG(ERROR) << "Failed to enumerate label enumerator: "
1394 : << com::LogHr(hr) << ".";
1395 i : return false;
1396 : }
1397 E : if (hr != S_OK || fetched == 0)
1398 E : break;
1399 :
1400 E : DCHECK(IsSymTag(label, SymTagLabel));
1401 :
1402 E : DWORD addr = 0;
1403 E : ScopedBstr temp_name;
1404 : if (label->get_relativeVirtualAddress(&addr) != S_OK ||
1405 E : label->get_name(temp_name.Receive()) != S_OK) {
1406 i : LOG(ERROR) << "Failed to retrieve label address or name.";
1407 i : return false;
1408 : }
1409 :
1410 E : std::string label_name;
1411 E : if (!WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1412 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1413 i : return false;
1414 : }
1415 :
1416 E : RelativeAddress label_addr(addr);
1417 E : BlockGraph::Block* block = image_->GetBlockByAddress(label_addr);
1418 E : if (block == NULL) {
1419 i : LOG(ERROR) << "No block for label " << label_name << " at " << addr;
1420 i : return false;
1421 : }
1422 :
1423 : if (!AddLabelToBlock(label_addr,
1424 : label_name,
1425 : BlockGraph::CODE_LABEL,
1426 E : block)) {
1427 i : LOG(ERROR) << "Failed to add label to code block.";
1428 i : return false;
1429 : }
1430 E : }
1431 E : }
1432 :
1433 E : return true;
1434 E : }
1435 :
1436 : bool Decomposer::CreateGapBlock(BlockGraph::BlockType block_type,
1437 : RelativeAddress address,
1438 E : BlockGraph::Size size) {
1439 : BlockGraph::Block* block = FindOrCreateBlock(block_type, address, size,
1440 : StringPrintf("Gap Block 0x%08X", address.value()).c_str(),
1441 E : kExpectNoBlock);
1442 E : if (block == NULL) {
1443 i : LOG(ERROR) << "Unable to create gap block.";
1444 i : return false;
1445 : }
1446 E : block->set_attribute(BlockGraph::GAP_BLOCK);
1447 :
1448 E : return true;
1449 E : }
1450 :
1451 : bool Decomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
1452 E : BlockGraph::BlockType block_type) {
1453 E : RelativeAddress section_begin(header->VirtualAddress);
1454 E : RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
1455 : RelativeAddress image_end(
1456 E : image_file_.nt_headers()->OptionalHeader.SizeOfImage);
1457 :
1458 : // Search for the first and last blocks interesting from the start and end
1459 : // of the section to the end of the image.
1460 : BlockGraph::AddressSpace::RangeMap::const_iterator it(
1461 : image_->address_space_impl().FindFirstIntersection(
1462 : BlockGraph::AddressSpace::Range(section_begin,
1463 E : image_end - section_begin)));
1464 : BlockGraph::AddressSpace::RangeMap::const_iterator end(
1465 : image_->address_space_impl().FindFirstIntersection(
1466 : BlockGraph::AddressSpace::Range(section_end,
1467 E : image_end - section_end)));
1468 :
1469 : // The whole section is missing. Cover it with one gap block.
1470 E : if (it == end)
1471 : return CreateGapBlock(
1472 i : block_type, section_begin, section_end - section_begin);
1473 :
1474 : // Create the head gap block if need be.
1475 E : if (section_begin < it->first.start())
1476 : if (!CreateGapBlock(
1477 i : block_type, section_begin, it->first.start() - section_begin))
1478 i : return false;
1479 :
1480 : // Now iterate the blocks and fill in gaps.
1481 E : for (; it != end; ++it) {
1482 E : const BlockGraph::Block* block = it->second;
1483 E : DCHECK(block != NULL);
1484 E : RelativeAddress block_end = it->first.start() + block->size();
1485 E : if (block_end >= section_end)
1486 E : break;
1487 :
1488 : // Walk to the next address in turn.
1489 E : BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
1490 E : ++next;
1491 E : if (next == end) {
1492 : // We're at the end of the list. Create the tail gap block.
1493 E : DCHECK_GT(section_end, block_end);
1494 E : if (!CreateGapBlock(block_type, block_end, section_end - block_end))
1495 i : return false;
1496 E : break;
1497 : }
1498 :
1499 : // Create the interstitial gap block.
1500 E : if (block_end < next->first.start())
1501 : if (!CreateGapBlock(
1502 E : block_type, block_end, next->first.start() - block_end))
1503 i : return false;
1504 E : }
1505 :
1506 E : return true;
1507 E : }
1508 :
1509 E : bool Decomposer::CreateGapBlocks() {
1510 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1511 :
1512 : // Iterate through all the image sections.
1513 E : for (size_t i = 0; i < num_sections; ++i) {
1514 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1515 E : DCHECK(header != NULL);
1516 :
1517 E : BlockGraph::BlockType type = BlockGraph::CODE_BLOCK;
1518 E : const char* section_type = NULL;
1519 E : switch (GetSectionType(header)) {
1520 : case kSectionCode:
1521 E : type = BlockGraph::CODE_BLOCK;
1522 E : section_type = "code";
1523 E : break;
1524 :
1525 : case kSectionData:
1526 E : type = BlockGraph::DATA_BLOCK;
1527 E : section_type = "data";
1528 E : break;
1529 :
1530 : default:
1531 i : continue;
1532 : }
1533 :
1534 E : if (!CreateSectionGapBlocks(header, type)) {
1535 i : LOG(ERROR) << "Unable to create gap blocks for " << section_type
1536 : << " section \"" << header->Name << "\".";
1537 i : return false;
1538 : }
1539 E : }
1540 :
1541 E : return true;
1542 E : }
1543 :
1544 : bool Decomposer::AddReferenceCallback(RelativeAddress src_addr,
1545 : BlockGraph::ReferenceType type,
1546 : BlockGraph::Size size,
1547 E : RelativeAddress dst_addr) {
1548 : // This is only called by the PEFileParser, and it creates some references
1549 : // for which there are no corresponding fixup entries.
1550 : return ValidateOrAddReference(FIXUP_MAY_EXIST, src_addr, type, size, dst_addr,
1551 E : 0, &fixup_map_, &references_);
1552 E : }
1553 :
1554 E : bool Decomposer::ParseRelocs() {
1555 E : if (!image_file_.DecodeRelocs(&reloc_set_)) {
1556 i : LOG(ERROR) << "Unable to decode image relocs.";
1557 i : return false;
1558 : }
1559 :
1560 E : PEFile::RelocMap reloc_map;
1561 E : if (!image_file_.ReadRelocs(reloc_set_, &reloc_map)) {
1562 i : LOG(ERROR) << "Unable to read image relocs.";
1563 i : return false;
1564 : }
1565 :
1566 : // Get a set of relocation destinations. These are effectively 'references'
1567 : // to labels, and will be used to weed out unreferenced labels.
1568 E : PEFile::RelocMap::const_iterator it = reloc_map.begin();
1569 E : for (; it != reloc_map.end(); ++it) {
1570 E : RelativeAddress rva;
1571 E : if (!image_file_.Translate(it->second, &rva)) {
1572 i : LOG(ERROR) << "Unable to translate absolute address to relative: "
1573 : << it->second;
1574 i : return false;
1575 : }
1576 E : reloc_refs_.insert(rva);
1577 E : }
1578 :
1579 : // Validate each relocation entry against the corresponding fixup entry.
1580 E : if (!ValidateRelocs(reloc_map))
1581 i : return false;
1582 :
1583 E : return true;
1584 E : }
1585 :
1586 E : bool Decomposer::CreateReferencesFromFixups() {
1587 E : FixupMap::const_iterator it(fixup_map_.begin());
1588 E : for (; it != fixup_map_.end(); ++it) {
1589 E : RelativeAddress src_addr(it->second.location);
1590 E : uint32 data = 0;
1591 E : if (!image_file_.ReadImage(src_addr, &data, sizeof(data))) {
1592 i : LOG(ERROR) << "Unable to read image data for fixup with source at "
1593 : << src_addr;
1594 i : return false;
1595 : }
1596 :
1597 E : RelativeAddress dst_addr;
1598 E : switch (it->second.type) {
1599 : case BlockGraph::PC_RELATIVE_REF: {
1600 E : dst_addr = src_addr + kPointerSize + data;
1601 E : break;
1602 : }
1603 :
1604 : case BlockGraph::ABSOLUTE_REF: {
1605 E : AbsoluteAddress dst_addr_abs(data);
1606 E : bool success = image_file_.Translate(dst_addr_abs, &dst_addr);
1607 E : DCHECK_EQ(true, success);
1608 E : break;
1609 : }
1610 :
1611 : case BlockGraph::RELATIVE_REF: {
1612 E : dst_addr = RelativeAddress(data);
1613 E : break;
1614 : }
1615 :
1616 : default: {
1617 i : NOTREACHED() << "Invalid reference type.";
1618 : break;
1619 : }
1620 : }
1621 :
1622 E : RelativeAddress dst_base(it->second.base);
1623 E : BlockGraph::Offset dst_offset = dst_addr - dst_base;
1624 : if (!AddReference(src_addr, it->second.type, kPointerSize, dst_base,
1625 E : dst_offset, &references_)) {
1626 i : return false;
1627 : }
1628 E : }
1629 :
1630 E : return true;
1631 E : }
1632 :
1633 E : bool Decomposer::ValidateRelocs(const PEFile::RelocMap& reloc_map) {
1634 E : PEFile::RelocMap::const_iterator it(reloc_map.begin());
1635 E : PEFile::RelocMap::const_iterator end(reloc_map.end());
1636 E : for (; it != end; ++it) {
1637 E : RelativeAddress src(it->first);
1638 E : RelativeAddress dst;
1639 E : if (!image_file_.Translate(it->second, &dst)) {
1640 i : LOG(ERROR) << "Unable to translate relocation destination.";
1641 i : return false;
1642 : }
1643 :
1644 : if (!ValidateOrAddReference(FIXUP_MUST_EXIST, src, BlockGraph::ABSOLUTE_REF,
1645 E : sizeof(dst), dst, 0, &fixup_map_, &references_))
1646 i : return false;
1647 E : }
1648 :
1649 E : return true;
1650 E : }
1651 :
1652 E : bool Decomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1653 E : ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1654 : SearchResult search_result = FindDiaTable(session,
1655 E : section_contribs.Receive());
1656 E : if (search_result != kSearchSucceeded) {
1657 i : if (search_result == kSearchFailed)
1658 i : LOG(ERROR) << "No section contribution table found.";
1659 i : return false;
1660 : }
1661 :
1662 E : size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1663 :
1664 E : LONG count = 0;
1665 E : if (section_contribs->get_Count(&count) != S_OK) {
1666 i : LOG(ERROR) << "Failed to get section contributions enumeration length.";
1667 i : return false;
1668 : }
1669 :
1670 E : for (LONG visited = 0; visited < count; ++visited) {
1671 E : ScopedComPtr<IDiaSectionContrib> section_contrib;
1672 E : ULONG fetched = 0;
1673 E : HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1674 E : if (hr != S_OK) {
1675 i : LOG(ERROR) << "Failed to get DIA section contribution: "
1676 : << com::LogHr(hr) << ".";
1677 i : return false;
1678 : }
1679 E : if (fetched == 0)
1680 i : break;
1681 :
1682 E : hr = E_FAIL;
1683 E : DWORD rva = 0;
1684 E : DWORD length = 0;
1685 E : DWORD section_id = 0;
1686 E : BOOL code = FALSE;
1687 E : ScopedComPtr<IDiaSymbol> compiland;
1688 E : ScopedBstr bstr_name;
1689 : if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1690 : (hr = section_contrib->get_length(&length)) != S_OK ||
1691 : (hr = section_contrib->get_addressSection(§ion_id)) != S_OK ||
1692 : (hr = section_contrib->get_code(&code)) != S_OK ||
1693 : (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1694 E : (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1695 i : LOG(ERROR) << "Failed to get section contribution properties: "
1696 : << com::LogHr(hr) << ".";
1697 i : return false;
1698 : }
1699 :
1700 : // Determine if this function was built by a supported compiler.
1701 : bool is_built_by_supported_compiler =
1702 E : IsBuiltBySupportedCompiler(compiland.get());
1703 :
1704 : // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1705 E : DCHECK_LT(0u, section_id);
1706 E : --section_id;
1707 :
1708 : // We don't parse the resource section, as it is parsed by the PEFileParser.
1709 E : if (section_id == rsrc_id)
1710 i : continue;
1711 :
1712 E : std::string name;
1713 E : if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1714 i : LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1715 i : return false;
1716 : }
1717 :
1718 : // Create the block.
1719 : BlockGraph::BlockType block_type =
1720 E : code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1721 : BlockGraph::Block* block = FindOrCreateBlock(block_type,
1722 : RelativeAddress(rva),
1723 : length,
1724 : name.c_str(),
1725 E : kExpectNoBlock);
1726 E : if (block == NULL) {
1727 i : LOG(ERROR) << "Unable to create block.";
1728 i : return false;
1729 : }
1730 :
1731 : // Set the block attributes.
1732 E : block->set_attribute(BlockGraph::SECTION_CONTRIB);
1733 E : if (!is_built_by_supported_compiler)
1734 E : block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1735 E : }
1736 :
1737 E : return true;
1738 E : }
1739 :
1740 : DiaBrowser::BrowserDirective Decomposer::OnDataSymbol(
1741 : const DiaBrowser& dia_browser,
1742 : const DiaBrowser::SymTagVector& sym_tags,
1743 E : const DiaBrowser::SymbolPtrVector& symbols) {
1744 E : DCHECK_LT(0u, sym_tags.size());
1745 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1746 E : DCHECK_EQ(SymTagData, sym_tags.back());
1747 :
1748 E : const DiaBrowser::SymbolPtr& data(symbols.back());
1749 :
1750 E : HRESULT hr = E_FAIL;
1751 E : DWORD location_type = LocIsNull;
1752 E : DWORD rva = 0;
1753 E : ScopedBstr name_bstr;
1754 : if (FAILED(hr = data->get_locationType(&location_type)) ||
1755 : FAILED(hr = data->get_relativeVirtualAddress(&rva)) ||
1756 E : FAILED(hr = data->get_name(name_bstr.Receive()))) {
1757 i : LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1758 i : return DiaBrowser::kBrowserAbort;
1759 : }
1760 :
1761 : // We only parse data symbols with static storage.
1762 E : if (location_type != LocIsStatic)
1763 E : return DiaBrowser::kBrowserContinue;
1764 :
1765 : // Symbols with an address of zero are essentially invalid. They appear to
1766 : // have been optimized away by the compiler, but they are still reported.
1767 E : if (rva == 0)
1768 E : return DiaBrowser::kBrowserContinue;
1769 :
1770 : // TODO(chrisha): We eventually want to get alignment info from the type
1771 : // information. This is strictly a lower bound, however, as certain
1772 : // data may be used in instructions that impose stricter alignment
1773 : // requirements.
1774 E : size_t length = 0;
1775 E : if (!GetTypeInfo(data, &length)) {
1776 i : return DiaBrowser::kBrowserAbort;
1777 : }
1778 : // Zero-length data symbols act as 'forward declares' in some sense. They
1779 : // are always followed by a non-zero length data symbol with the same name
1780 : // and location.
1781 E : if (length == 0)
1782 E : return DiaBrowser::kBrowserContinue;
1783 :
1784 E : RelativeAddress addr(rva);
1785 E : std::string name;
1786 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1787 i : LOG(ERROR) << "Failed to convert data symbol name to UTF8.";
1788 i : return DiaBrowser::kBrowserAbort;
1789 : }
1790 :
1791 : BlockGraph::Block* block = FindOrCreateBlock(BlockGraph::DATA_BLOCK,
1792 : addr, length, name.c_str(),
1793 E : kAllowCoveringBlock);
1794 :
1795 E : if (block->type() == BlockGraph::CODE_BLOCK) {
1796 : // The NativeClient bits of chrome.dll consists of hand-written assembly
1797 : // that is compiled using a custom non-Microsoft toolchain. Unfortunately
1798 : // for us this toolchain emits 1-byte data symbols instead of code labels.
1799 : static const char kNaClPrefix[] = "NaCl";
1800 : if (length == 1 &&
1801 E : name.compare(0, arraysize(kNaClPrefix) - 1, kNaClPrefix) == 0) {
1802 i : if (!AddLabelToBlock(addr, name, BlockGraph::CODE_LABEL, block)) {
1803 i : LOG(ERROR) << "Failed to add label to code block.";
1804 i : return DiaBrowser::kBrowserAbort;
1805 : }
1806 :
1807 i : return DiaBrowser::kBrowserContinue;
1808 : }
1809 : }
1810 :
1811 E : if (!AddLabelToBlock(addr, name, BlockGraph::DATA_LABEL, block)) {
1812 i : LOG(ERROR) << "Failed to add data label to block.";
1813 i : return DiaBrowser::kBrowserAbort;
1814 : }
1815 :
1816 E : return DiaBrowser::kBrowserContinue;
1817 E : }
1818 :
1819 : DiaBrowser::BrowserDirective Decomposer::OnPublicSymbol(
1820 : const DiaBrowser& dia_browser,
1821 : const DiaBrowser::SymTagVector& sym_tags,
1822 E : const DiaBrowser::SymbolPtrVector& symbols) {
1823 E : DCHECK_LT(0u, sym_tags.size());
1824 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1825 E : DCHECK_EQ(SymTagPublicSymbol, sym_tags.back());
1826 E : const DiaBrowser::SymbolPtr& symbol(symbols.back());
1827 :
1828 : // We don't care about symbols that don't have addresses.
1829 E : DWORD rva = 0;
1830 E : if (S_OK != symbol->get_relativeVirtualAddress(&rva))
1831 E : return DiaBrowser::kBrowserContinue;
1832 :
1833 E : ScopedBstr name_bstr;
1834 E : if (S_OK != symbol->get_name(name_bstr.Receive())) {
1835 i : LOG(ERROR) << "Failed to get public symbol name.";
1836 i : return DiaBrowser::kBrowserAbort;
1837 : }
1838 :
1839 E : std::string name;
1840 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1841 i : LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1842 i : return DiaBrowser::kBrowserAbort;
1843 : }
1844 :
1845 E : RelativeAddress addr(rva);
1846 E : BlockGraph::Block* block = image_->GetBlockByAddress(addr);
1847 E : if (block == NULL) {
1848 i : LOG(ERROR) << "No block found for public symbol \"" << name << "\".";
1849 i : return DiaBrowser::kBrowserAbort;
1850 : }
1851 :
1852 : // Public symbol names are mangled. Remove leading '_' as per
1853 : // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1854 E : if (name[0] == '_')
1855 E : name = name.substr(1);
1856 :
1857 : // Set the block name or add a label. For code blocks these are entry points,
1858 : // while for data blocks these are simply to aid debugging.
1859 : BlockGraph::LabelAttributes label_attributes =
1860 : block->type() == BlockGraph::CODE_BLOCK ? BlockGraph::CODE_LABEL :
1861 E : BlockGraph::DATA_LABEL;
1862 E : if (!AddLabelToBlock(addr, name, label_attributes, block))
1863 i : return DiaBrowser::kBrowserAbort;
1864 :
1865 E : return DiaBrowser::kBrowserContinue;
1866 E : }
1867 :
1868 E : bool Decomposer::ProcessStaticInitializers() {
1869 : typedef std::pair<RelativeAddress, RelativeAddress> AddressPair;
1870 : typedef std::map<std::string, AddressPair> AddressPairMap;
1871 :
1872 E : const RelativeAddress kNull(0);
1873 :
1874 : // This stores pairs of addresses, representing the beginning and the end
1875 : // of each static initializer block. It is keyed with a string, which is
1876 : // returned by the match group of the corresponding initializer pattern.
1877 : // The key is necessary to correlate matching labels (as multiple pairs
1878 : // of labels may match through a single pattern).
1879 E : AddressPairMap addr_pair_map;
1880 :
1881 : // Used for keeping track of which label, if any, we matched.
1882 : enum MatchType {
1883 : kMatchNone,
1884 : kMatchBeginLabel,
1885 : kMatchEndLabel
1886 : };
1887 :
1888 : // Iterate through all data blocks, looking for known initializer labels.
1889 E : BlockGraph::AddressSpace::RangeMapConstIter block_it = image_->begin();
1890 E : for (; block_it != image_->end(); ++block_it) {
1891 E : const BlockGraph::Block* block = block_it->second;
1892 : // Skip non-data blocks.
1893 E : if (block->type() != BlockGraph::DATA_BLOCK)
1894 E : continue;
1895 :
1896 : // Check the block name against each of the initializer patterns.
1897 E : MatchType match = kMatchNone;
1898 E : std::string block_name = block->name();
1899 E : std::string name;
1900 E : for (size_t i = 0; i < static_initializer_patterns_.size(); ++i) {
1901 E : REPair& re_pair(static_initializer_patterns_[i]);
1902 E : if (re_pair.first.FullMatch(block_name, &name))
1903 E : match = kMatchBeginLabel;
1904 E : else if (re_pair.second.FullMatch(block_name, &name))
1905 E : match = kMatchEndLabel;
1906 :
1907 E : if (match != kMatchNone)
1908 E : break;
1909 E : }
1910 :
1911 : // No pattern matched this symbol? Continue to the next one.
1912 E : if (match == kMatchNone)
1913 E : continue;
1914 :
1915 : // Ensure this symbol exists in the map. Thankfully, addresses default
1916 : // construct to NULL.
1917 E : AddressPair& addr_pair = addr_pair_map[name];
1918 :
1919 : // Update the bracketing symbol endpoint. Make sure each symbol endpoint
1920 : // is only seen once.
1921 E : RelativeAddress* addr = NULL;
1922 E : RelativeAddress new_addr;
1923 E : if (match == kMatchBeginLabel) {
1924 E : addr = &addr_pair.first;
1925 E : new_addr = block->addr();
1926 E : } else {
1927 E : addr = &addr_pair.second;
1928 E : new_addr = block->addr() + block->size();
1929 : }
1930 E : if (*addr != kNull) {
1931 i : LOG(ERROR) << "Bracketing symbol appears multiple times: "
1932 : << block_name;
1933 i : return false;
1934 : }
1935 E : *addr = new_addr;
1936 E : }
1937 :
1938 : // Use the bracketing symbols to make the initializers contiguous.
1939 E : AddressPairMap::const_iterator init_it = addr_pair_map.begin();
1940 E : for (; init_it != addr_pair_map.end(); ++init_it) {
1941 E : RelativeAddress begin_addr = init_it->second.first;
1942 E : if (begin_addr == kNull) {
1943 i : LOG(ERROR) << "Bracketing start symbol missing: " << init_it->first;
1944 i : return false;
1945 : }
1946 :
1947 E : RelativeAddress end_addr = init_it->second.second;
1948 E : if (end_addr == kNull) {
1949 i : LOG(ERROR) << "Bracketing end symbol missing: " << init_it->first;
1950 i : return false;
1951 : }
1952 :
1953 E : if (begin_addr > end_addr) {
1954 i : LOG(ERROR) << "Bracketing symbols out of order: " << init_it->first;
1955 i : return false;
1956 : }
1957 :
1958 : // Merge the initializers.
1959 E : DataSpace::Range range(begin_addr, end_addr - begin_addr);
1960 E : BlockGraph::Block* merged = image_->MergeIntersectingBlocks(range);
1961 : std::string name = StringPrintf("Bracketed Initializers: %s",
1962 E : init_it->first.c_str());
1963 E : merged->set_name(name);
1964 E : DCHECK(merged != NULL);
1965 E : }
1966 :
1967 E : return true;
1968 E : }
1969 :
1970 E : bool Decomposer::ProcessDataSymbols(IDiaSymbol* root) {
1971 : DiaBrowser::MatchCallback on_data_symbol(
1972 E : base::Bind(&Decomposer::OnDataSymbol, base::Unretained(this)));
1973 :
1974 E : DiaBrowser dia_browser;
1975 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1976 E : on_data_symbol);
1977 : dia_browser.AddPattern(Seq(SymTagCompiland, SymTagFunction,
1978 : Star(SymTagBlock), SymTagData),
1979 E : on_data_symbol);
1980 :
1981 E : return dia_browser.Browse(root);
1982 E : }
1983 :
1984 E : bool Decomposer::ProcessPublicSymbols(IDiaSymbol* root) {
1985 : DiaBrowser::MatchCallback on_public_symbol(
1986 E : base::Bind(&Decomposer::OnPublicSymbol, base::Unretained(this)));
1987 :
1988 E : DiaBrowser dia_browser;
1989 E : dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
1990 :
1991 E : return dia_browser.Browse(root);
1992 E : }
1993 :
1994 E : bool Decomposer::GuessDataBlockAlignments() {
1995 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1996 : // Iterate through all the image sections.
1997 E : for (size_t i = 0; i < num_sections; ++i) {
1998 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1999 E : DCHECK(header != NULL);
2000 :
2001 : // Only iterate through data sections.
2002 E : if (GetSectionType(header) != kSectionData)
2003 E : continue;
2004 :
2005 E : RelativeAddress section_begin(header->VirtualAddress);
2006 E : size_t section_length = header->Misc.VirtualSize;
2007 :
2008 : // Get the range of blocks in this section.
2009 : BlockGraph::AddressSpace::RangeMapIterPair it_pair =
2010 E : image_->GetIntersectingBlocks(section_begin, section_length);
2011 :
2012 : // Iterate through the blocks in the section, setting their alignment.
2013 E : BlockGraph::AddressSpace::RangeMapIter it = it_pair.first;
2014 E : for (; it != it_pair.second; ++it) {
2015 E : BlockGraph::Block* block = it->second;
2016 E : GuessDataBlockAlignment(block);
2017 E : }
2018 E : }
2019 :
2020 E : return true;
2021 E : }
2022 :
2023 E : bool Decomposer::CreateCodeReferences() {
2024 E : BlockGraph::BlockMap::iterator it(image_->graph()->blocks_mutable().begin());
2025 E : BlockGraph::BlockMap::iterator end(image_->graph()->blocks_mutable().end());
2026 E : for (; it != end; ++it) {
2027 E : BlockGraph::Block* block = &it->second;
2028 E : if (block->type() != BlockGraph::CODE_BLOCK)
2029 E : continue;
2030 :
2031 E : if (!CreateCodeReferencesForBlock(block))
2032 i : return false;
2033 E : }
2034 :
2035 E : return true;
2036 E : }
2037 :
2038 E : bool Decomposer::CreateCodeReferencesForBlock(BlockGraph::Block* block) {
2039 E : DCHECK(current_block_ == NULL);
2040 E : current_block_ = block;
2041 :
2042 E : RelativeAddress block_addr;
2043 E : if (!image_->GetAddressOf(block, &block_addr)) {
2044 i : LOG(ERROR) << "Block \"" << block->name() << "\" has no address.";
2045 i : return false;
2046 : }
2047 :
2048 E : AbsoluteAddress abs_block_addr;
2049 E : if (!image_file_.Translate(block_addr, &abs_block_addr)) {
2050 i : LOG(ERROR) << "Unable to get absolute address for " << block_addr;
2051 i : return false;
2052 : }
2053 :
2054 : Disassembler::InstructionCallback on_instruction(
2055 E : base::Bind(&Decomposer::OnInstruction, base::Unretained(this)));
2056 :
2057 : // Use block labels and code references as starting points for disassembly.
2058 E : Disassembler::AddressSet starting_points;
2059 : GetDisassemblyStartingPoints(block, abs_block_addr, reloc_set_,
2060 E : &starting_points);
2061 :
2062 : // Determine whether or not we are being strict during disassembly.
2063 E : bool strict = block_graph::CodeBlockAttributesAreBasicBlockSafe(block);
2064 E : be_strict_with_current_block_ = strict;
2065 :
2066 : // Determine the length of the code portion of the block by trimming off any
2067 : // known trailing data. Also, if we're in strict mode, ensure that our
2068 : // assumption regarding code/data layout is met.
2069 E : size_t code_size = 0;
2070 : if (!BlockHasExpectedCodeDataLayout(block, &code_size) &&
2071 E : be_strict_with_current_block_) {
2072 i : LOG(ERROR) << "Block \"" << block->name() << "\" has unexpected code/data "
2073 : << "layout.";
2074 i : return false;
2075 : }
2076 :
2077 : // Disassemble the block.
2078 : Disassembler disasm(block->data(),
2079 : code_size,
2080 : abs_block_addr,
2081 : starting_points,
2082 E : on_instruction);
2083 E : Disassembler::WalkResult result = disasm.Walk();
2084 :
2085 : // If we're strict (that is, we're confident that the block was produced by
2086 : // cl.exe), then we can use that knowledge to look for calls that appear to be
2087 : // to non-returning functions that we may not have symbol info for.
2088 E : if (be_strict_with_current_block_)
2089 E : LookForNonReturningFunctions(references_, *image_, current_block_, disasm);
2090 :
2091 E : DCHECK_EQ(block, current_block_);
2092 E : current_block_ = NULL;
2093 E : be_strict_with_current_block_ = true;
2094 :
2095 E : switch (result) {
2096 : case Disassembler::kWalkIncomplete:
2097 : // There were computed branches that couldn't be chased down.
2098 E : block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2099 E : return true;
2100 :
2101 : case Disassembler::kWalkTerminated:
2102 : // This exit condition should only ever occur for non-strict disassembly.
2103 : // If strict, we should always get kWalkError.
2104 E : DCHECK(!strict);
2105 : // This means that they code was malformed, or broke some expected
2106 : // conventions. This code is not safe for basic block disassembly.
2107 E : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2108 E : return true;
2109 :
2110 : case Disassembler::kWalkSuccess:
2111 : // Were any bytes in the block not accounted for? This generally means
2112 : // unreachable code, which we see quite often, especially in debug builds.
2113 E : if (disasm.code_size() != disasm.disassembled_bytes())
2114 E : block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2115 E : return true;
2116 :
2117 : case Disassembler::kWalkError:
2118 i : return false;
2119 :
2120 : default:
2121 i : NOTREACHED() << "Unhandled Disassembler WalkResult.";
2122 i : return false;
2123 : }
2124 E : }
2125 :
2126 : BlockGraph::Block* Decomposer::CreateBlock(BlockGraph::BlockType type,
2127 : RelativeAddress address,
2128 : BlockGraph::Size size,
2129 E : const base::StringPiece& name) {
2130 E : BlockGraph::Block* block = image_->AddBlock(type, address, size, name);
2131 E : if (block == NULL) {
2132 i : LOG(ERROR) << "Unable to add block at " << address << " with size "
2133 : << size << ".";
2134 i : return NULL;
2135 : }
2136 :
2137 : // Mark the source range from whence this block originates.
2138 : bool pushed = block->source_ranges().Push(
2139 : BlockGraph::Block::DataRange(0, size),
2140 E : BlockGraph::Block::SourceRange(address, size));
2141 E : DCHECK(pushed);
2142 :
2143 E : BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2144 E : if (section == BlockGraph::kInvalidSectionId) {
2145 i : LOG(ERROR) << "Block at " << address << " with size " << size
2146 : << " lies outside of all sections.";
2147 i : return NULL;
2148 : }
2149 E : block->set_section(section);
2150 :
2151 E : const uint8* data = image_file_.GetImageData(address, size);
2152 E : if (data != NULL)
2153 E : block->SetData(data, size);
2154 :
2155 E : return block;
2156 E : }
2157 :
2158 : BlockGraph::Block* Decomposer::FindOrCreateBlock(
2159 : BlockGraph::BlockType type,
2160 : RelativeAddress addr,
2161 : BlockGraph::Size size,
2162 : const base::StringPiece& name,
2163 E : FindOrCreateBlockDirective directive) {
2164 E : BlockGraph::Block* block = image_->GetBlockByAddress(addr);
2165 E : if (block != NULL) {
2166 : // Always allow collisions where the new block is a proper subset of
2167 : // an existing PE parsed block. The PE parser often knows more than we do
2168 : // about blocks that need to stick together.
2169 E : if (block->attributes() & BlockGraph::PE_PARSED)
2170 E : directive = kAllowCoveringBlock;
2171 :
2172 E : bool collision = false;
2173 E : switch (directive) {
2174 : case kExpectNoBlock: {
2175 i : collision = true;
2176 i : break;
2177 : }
2178 : case kAllowIdenticalBlock: {
2179 i : collision = (block->addr() != addr || block->size() != size);
2180 i : break;
2181 : }
2182 : default: {
2183 E : DCHECK(directive == kAllowCoveringBlock);
2184 : collision = block->addr() > addr ||
2185 E : (block->addr() + block->size()) < addr + size;
2186 : break;
2187 : }
2188 : }
2189 :
2190 E : if (collision) {
2191 i : LOG(ERROR) << "Block collision for function at "
2192 : << addr.value() << "(" << size << ") with " << block->name();
2193 i : return NULL;
2194 : }
2195 :
2196 E : return block;
2197 : }
2198 E : DCHECK(block == NULL);
2199 :
2200 E : return CreateBlock(type, addr, size, name);
2201 E : }
2202 :
2203 : CallbackDirective Decomposer::LookPastInstructionForData(
2204 E : RelativeAddress instr_end) {
2205 : // If this instruction terminates at a data boundary (ie: the *next*
2206 : // instruction will be data or a reloc), we can be certain that a new
2207 : // lookup table is starting at this address.
2208 E : if (reloc_set_.find(instr_end) == reloc_set_.end())
2209 E : return Disassembler::kDirectiveContinue;
2210 :
2211 : // Find the block housing the reloc. We expect the reloc to be contained
2212 : // completely within this block.
2213 E : BlockGraph::Block* block = image_->GetContainingBlock(instr_end, 4);
2214 E : if (block != current_block_) {
2215 i : CHECK(block != NULL);
2216 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2217 : << "Found an instruction/data boundary between blocks: "
2218 : << current_block_->name() << " and " << block->name();
2219 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2220 : }
2221 :
2222 E : BlockGraph::Offset offset = instr_end - block->addr();
2223 :
2224 : // We expect there to be a jump-table data label already.
2225 E : BlockGraph::Label label;
2226 E : bool have_label = block->GetLabel(offset, &label);
2227 : if (!have_label || !label.has_attributes(
2228 E : BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)) {
2229 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2230 : << "Expected there to be a data label marking the jump "
2231 : << "table at " << block->name() << " + " << offset << ".";
2232 :
2233 : // If we're in strict mode, we're a block that obeys standard conventions.
2234 : // Which means we should already be aware of any jump tables in this block.
2235 i : if (be_strict_with_current_block_)
2236 i : return Disassembler::kDirectiveAbort;
2237 :
2238 : // If we're not in strict mode, add the jump-table label.
2239 i : if (have_label) {
2240 i : CHECK(block->RemoveLabel(offset));
2241 : }
2242 :
2243 : CHECK(block->SetLabel(offset, BlockGraph::Label(
2244 : base::StringPrintf("<JUMP-TABLE-%d>", offset),
2245 i : BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)));
2246 : }
2247 :
2248 E : return Disassembler::kDirectiveTerminatePath;
2249 E : }
2250 :
2251 i : void Decomposer::MarkDisassembledPastEnd() {
2252 : static size_t count = 0;
2253 i : DCHECK(current_block_ != NULL);
2254 i : current_block_->set_attribute(BlockGraph::DISASSEMBLED_PAST_END);
2255 : // TODO(chrisha): The entire "disassembled past end" and non-returning
2256 : // function infrastructure can be ripped out once we rework the BB
2257 : // disassembler to be straight path, and remove the disassembly phase
2258 : // from the decomposer (where it's no longer needed). In the meantime
2259 : // we simply crank down this log verbosity due to all of the false
2260 : // positives.
2261 i : VLOG(1) << "Disassembled past end of block or into known data for block \""
2262 : << current_block_->name() << "\" at " << current_block_->addr()
2263 : << ".";
2264 i : }
2265 :
2266 : CallbackDirective Decomposer::VisitNonFlowControlInstruction(
2267 E : RelativeAddress instr_start, RelativeAddress instr_end) {
2268 : // TODO(chrisha): We could walk the operands and follow references
2269 : // explicitly. If any of them are of reference type and there's no
2270 : // matching reference, this would be cause to blow up and die (we
2271 : // should get all of these as relocs and/or fixups).
2272 :
2273 : IntermediateReferenceMap::const_iterator ref_it =
2274 E : references_.upper_bound(instr_start);
2275 : IntermediateReferenceMap::const_iterator ref_end =
2276 E : references_.lower_bound(instr_end);
2277 :
2278 E : for (; ref_it != ref_end; ++ref_it) {
2279 : BlockGraph::Block* ref_block = image_->GetContainingBlock(
2280 E : ref_it->second.base, 1);
2281 E : DCHECK(ref_block != NULL);
2282 :
2283 : // This is an inter-block reference.
2284 E : if (ref_block != current_block_) {
2285 : // There should be no cross-block references to the middle of other
2286 : // code blocks (to the top is fine, as we could be passing around a
2287 : // function pointer). The exception is if the remote block is not
2288 : // generated by cl.exe. In this case, there could be arbitrary labels
2289 : // that act like functions within the body of that block, and referring
2290 : // to them is perfectly fine.
2291 : if (ref_block->type() == BlockGraph::CODE_BLOCK &&
2292 : ref_it->second.base != ref_block->addr() &&
2293 E : block_graph::CodeBlockAttributesAreBasicBlockSafe(ref_block)) {
2294 E : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2295 : << "Found a non-control-flow code-block to middle-of-code-block "
2296 : << "reference from block \"" << current_block_->name()
2297 : << "\" to block \"" << ref_block->name() << "\".";
2298 E : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2299 : }
2300 E : } else {
2301 : // This is an intra-block reference.
2302 : BlockGraph::Offset ref_offset =
2303 E : ref_it->second.base - current_block_->addr();
2304 :
2305 : // If this is to offset zero, we assume we are taking a pointer to
2306 : // ourself, which is safe.
2307 E : if (ref_offset != 0) {
2308 : // If this is 'clean' code it should be to data, and there should be a
2309 : // label.
2310 E : BlockGraph::Label label;
2311 E : if (!current_block_->GetLabel(ref_offset, &label)) {
2312 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2313 : << "Found an intra-block data-reference with no label.";
2314 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2315 : } else {
2316 : if (!label.has_attributes(BlockGraph::DATA_LABEL) ||
2317 E : label.has_attributes(BlockGraph::CODE_LABEL)) {
2318 E : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2319 : << "Found an intra-block data-like reference to a non-data "
2320 : << "or code label in block \"" << current_block_->name()
2321 : << "\".";
2322 E : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2323 : }
2324 : }
2325 E : }
2326 : }
2327 E : }
2328 :
2329 E : return Disassembler::kDirectiveContinue;
2330 E : }
2331 :
2332 : CallbackDirective Decomposer::VisitPcRelativeFlowControlInstruction(
2333 : AbsoluteAddress instr_abs,
2334 : RelativeAddress instr_rel,
2335 : const _DInst& instruction,
2336 E : bool end_of_code) {
2337 E : int fc = META_GET_FC(instruction.meta);
2338 E : DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
2339 E : DCHECK_EQ(O_PC, instruction.ops[0].type);
2340 E : DCHECK_EQ(O_NONE, instruction.ops[1].type);
2341 E : DCHECK_EQ(O_NONE, instruction.ops[2].type);
2342 E : DCHECK_EQ(O_NONE, instruction.ops[3].type);
2343 : DCHECK(instruction.ops[0].size == 8 ||
2344 : instruction.ops[0].size == 16 ||
2345 E : instruction.ops[0].size == 32);
2346 : // Distorm gives us size in bits, we want bytes.
2347 E : BlockGraph::Size size = instruction.ops[0].size / 8;
2348 :
2349 : // Get the reference's address. Note we assume it's in the instruction's
2350 : // tail end - I don't know of a case where a PC-relative offset in a branch
2351 : // or call is not the very last thing in an x86 instruction.
2352 E : AbsoluteAddress abs_src = instr_abs + instruction.size - size;
2353 : AbsoluteAddress abs_dst = instr_abs + instruction.size +
2354 E : static_cast<size_t>(instruction.imm.addr);
2355 :
2356 E : RelativeAddress src, dst;
2357 : if (!image_file_.Translate(abs_src, &src) ||
2358 E : !image_file_.Translate(abs_dst, &dst)) {
2359 i : LOG(ERROR) << "Unable to translate absolute to relative addresses.";
2360 i : return Disassembler::kDirectiveAbort;
2361 : }
2362 :
2363 : // Get the block associated with the destination address. It must exist
2364 : // and be a code block.
2365 E : BlockGraph::Block* block = image_->GetContainingBlock(dst, 1);
2366 E : DCHECK(block != NULL);
2367 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
2368 :
2369 : // For short references, we should not see a fixup.
2370 E : ValidateOrAddReferenceMode mode = FIXUP_MUST_NOT_EXIST;
2371 E : if (size == kPointerSize) {
2372 : // Long PC_RELATIVE reference within a single block? FIXUPs aren't
2373 : // strictly necessary.
2374 E : if (block->Contains(src, kPointerSize))
2375 E : mode = FIXUP_MAY_EXIST;
2376 E : else
2377 : // But if they're between blocks (section contributions), we expect to
2378 : // find them.
2379 E : mode = FIXUP_MUST_EXIST;
2380 E : } else {
2381 : // Since we slice by section contributions we no longer see short
2382 : // references across blocks. If we do, bail!
2383 E : if (block != current_block_) {
2384 i : LOG(ERROR) << "Found a short PC-relative reference out of block \""
2385 : << current_block_->name() << "\".";
2386 i : return Disassembler::kDirectiveAbort;
2387 : }
2388 : }
2389 :
2390 : // Validate or create the reference, as necessary.
2391 : if (!ValidateOrAddReference(mode, src, BlockGraph::PC_RELATIVE_REF, size,
2392 E : dst, 0, &fixup_map_, &references_)) {
2393 i : LOG(ERROR) << "Failed to validate/create reference originating from "
2394 : << "block \"" << current_block_->name() << "\".";
2395 i : return Disassembler::kDirectiveAbort;
2396 : }
2397 :
2398 : // If this is a call and the destination is a non-returning function,
2399 : // then indicate that we should terminate this disassembly path.
2400 : if (fc == FC_CALL &&
2401 E : (block->attributes() & BlockGraph::NON_RETURN_FUNCTION)) {
2402 : // TODO(chrisha): For now, we enforce that the call be to the beginning
2403 : // of the function. This may not be necessary, but better safe than
2404 : // sorry for now.
2405 E : if (block->addr() != dst) {
2406 i : LOG(ERROR) << "Calling inside the body of a non-returning function: "
2407 : << block->name();
2408 i : return Disassembler::kDirectiveAbort;
2409 : }
2410 :
2411 E : return Disassembler::kDirectiveTerminatePath;
2412 : }
2413 :
2414 : // If we get here, then we don't think it's a non-returning call. If it's
2415 : // not an unconditional jump and we're at the end of the code for this block
2416 : // then we consider this as disassembling past the end.
2417 E : if (fc != FC_UNC_BRANCH && end_of_code)
2418 i : MarkDisassembledPastEnd();
2419 :
2420 E : return Disassembler::kDirectiveContinue;
2421 E : }
2422 :
2423 : CallbackDirective Decomposer::VisitIndirectMemoryCallInstruction(
2424 E : const _DInst& instruction, bool end_of_code) {
2425 E : DCHECK_EQ(FC_CALL, META_GET_FC(instruction.meta));
2426 E : DCHECK_EQ(O_DISP, instruction.ops[0].type);
2427 :
2428 E : AbsoluteAddress disp_addr_abs(static_cast<uint32>(instruction.disp));
2429 E : RelativeAddress disp_addr_rel;
2430 E : if (!image_file_.Translate(disp_addr_abs, &disp_addr_rel)) {
2431 i : LOG(ERROR) << "Unable to translate call address.";
2432 i : return Disassembler::kDirectiveAbort;
2433 : }
2434 :
2435 : // Try to dereference the address of the call instruction. This can fail
2436 : // for blocks that are only initialized at runtime, so we don't fail if
2437 : // we don't find a reference.
2438 : IntermediateReferenceMap::const_iterator ref_it =
2439 E : references_.find(disp_addr_rel);
2440 E : if (ref_it == references_.end())
2441 E : return Disassembler::kDirectiveContinue;
2442 :
2443 : // NOTE: This process derails for bound import tables. In this case the
2444 : // attempted dereference above will fail, but we could still actually
2445 : // find the import name thunk by inspecting the offset of the memory
2446 : // location.
2447 :
2448 : // The reference must be direct and 32-bit.
2449 E : const IntermediateReference& ref = ref_it->second;
2450 E : DCHECK_EQ(BlockGraph::Reference::kMaximumSize, ref.size);
2451 E : DCHECK_EQ(0, ref.offset);
2452 :
2453 : // Look up the thunk this refers to.
2454 E : BlockGraph::Block* thunk = image_->GetBlockByAddress(ref.base);
2455 E : if (thunk == NULL) {
2456 i : LOG(ERROR) << "Unable to dereference intermediate reference at "
2457 : << disp_addr_rel << " to " << ref.base << ".";
2458 i : return Disassembler::kDirectiveAbort;
2459 : }
2460 :
2461 E : if (ref.type == BlockGraph::RELATIVE_REF) {
2462 : // If this is a relative reference it must be part of an import address
2463 : // table (during runtime this address would be patched up with an absolute
2464 : // reference). Thus we expect the referenced block to be data, an import
2465 : // name thunk.
2466 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, thunk->type());
2467 E : } else {
2468 : // If this is an absolute address it should actually point directly to
2469 : // code.
2470 E : DCHECK_EQ(BlockGraph::ABSOLUTE_REF, ref.type);
2471 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, thunk->type());
2472 : }
2473 :
2474 : // Either way, if the block is non-returning we terminate this path of
2475 : // disassembly.
2476 E : if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
2477 E : return Disassembler::kDirectiveTerminatePath;
2478 :
2479 E : if (end_of_code)
2480 i : MarkDisassembledPastEnd();
2481 :
2482 E : return Disassembler::kDirectiveContinue;
2483 E : }
2484 :
2485 : CallbackDirective Decomposer::OnInstruction(const Disassembler& walker,
2486 E : const _DInst& instruction) {
2487 : // Get the relative address of this instruction.
2488 E : AbsoluteAddress instr_abs(static_cast<uint32>(instruction.addr));
2489 E : RelativeAddress instr_rel;
2490 E : if (!image_file_.Translate(instr_abs, &instr_rel)) {
2491 i : LOG(ERROR) << "Unable to translate instruction address.";
2492 i : return Disassembler::kDirectiveAbort;
2493 : }
2494 E : RelativeAddress after_instr_rel = instr_rel + instruction.size;
2495 :
2496 : #ifndef NDEBUG
2497 : // If we're in debug mode, it's helpful to have a pointer directly to the
2498 : // beginning of this instruction in memory.
2499 E : BlockGraph::Offset instr_offset = instr_rel - current_block_->addr();
2500 E : const uint8* instr_data = current_block_->data() + instr_offset;
2501 : #endif
2502 :
2503 : // TODO(chrisha): Certain instructions require aligned data (ie: MMX/SSE
2504 : // instructions). We need to follow the data that these instructions
2505 : // refer to, and set their alignment appropriately. For now, alignment
2506 : // is simply preserved from the original image.
2507 :
2508 E : CallbackDirective directive = LookPastInstructionForData(after_instr_rel);
2509 E : if (IsFatalCallbackDirective(directive))
2510 i : return directive;
2511 :
2512 : // We're at the end of code in this block if we encountered data, or this is
2513 : // the last intruction to be processed.
2514 E : RelativeAddress block_end(current_block_->addr() + current_block_->size());
2515 : bool end_of_code = (directive == Disassembler::kDirectiveTerminatePath) ||
2516 E : (after_instr_rel >= block_end);
2517 :
2518 E : int fc = META_GET_FC(instruction.meta);
2519 :
2520 E : if (fc == FC_NONE) {
2521 : // There's no control flow and we're at the end of the block. Mark the
2522 : // block as dirty.
2523 E : if (end_of_code)
2524 i : MarkDisassembledPastEnd();
2525 :
2526 : return CombineCallbackDirectives(directive,
2527 E : VisitNonFlowControlInstruction(instr_rel, after_instr_rel));
2528 : }
2529 :
2530 : if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
2531 E : instruction.ops[0].type == O_PC) {
2532 : // For all branches, calls and conditional branches to PC-relative
2533 : // addresses, record a PC-relative reference.
2534 : return CombineCallbackDirectives(directive,
2535 : VisitPcRelativeFlowControlInstruction(instr_abs,
2536 : instr_rel,
2537 : instruction,
2538 E : end_of_code));
2539 : }
2540 :
2541 : // We explicitly handle indirect memory call instructions. These can often
2542 : // be tracked down as pointing to a block in this image, or to an import
2543 : // name thunk from another module.
2544 E : if (fc == FC_CALL && instruction.ops[0].type == O_DISP) {
2545 : return CombineCallbackDirectives(directive,
2546 E : VisitIndirectMemoryCallInstruction(instruction, end_of_code));
2547 : }
2548 :
2549 : // Look out for blocks where disassembly seems to run off the end of the
2550 : // block. We do not treat interrupts as flow control as execution can
2551 : // continue past the interrupt.
2552 E : if (fc != FC_RET && fc != FC_UNC_BRANCH && end_of_code)
2553 i : MarkDisassembledPastEnd();
2554 :
2555 E : return directive;
2556 E : }
2557 :
2558 : bool Decomposer::CreatePEImageBlocksAndReferences(
2559 E : PEFileParser::PEHeader* header) {
2560 : PEFileParser::AddReferenceCallback add_reference(
2561 E : base::Bind(&Decomposer::AddReferenceCallback, base::Unretained(this)));
2562 E : PEFileParser parser(image_file_, image_, add_reference);
2563 : parser.set_on_import_thunk(
2564 E : base::Bind(&Decomposer::OnImportThunkCallback, base::Unretained(this)));
2565 :
2566 E : if (!parser.ParseImage(header)) {
2567 i : LOG(ERROR) << "Unable to parse PE image.";
2568 i : return false;
2569 : }
2570 :
2571 E : return true;
2572 E : }
2573 :
2574 E : bool Decomposer::FinalizeIntermediateReferences() {
2575 E : IntermediateReferenceMap::const_iterator it(references_.begin());
2576 E : IntermediateReferenceMap::const_iterator end(references_.end());
2577 :
2578 E : for (; it != end; ++it) {
2579 E : RelativeAddress src_addr(it->first);
2580 E : BlockGraph::Block* src = image_->GetBlockByAddress(src_addr);
2581 E : RelativeAddress dst_base_addr(it->second.base);
2582 E : RelativeAddress dst_addr(dst_base_addr + it->second.offset);
2583 E : BlockGraph::Block* dst = image_->GetBlockByAddress(dst_base_addr);
2584 :
2585 E : if (src == NULL || dst == NULL) {
2586 i : LOG(ERROR) << "Reference source or base destination address is out of "
2587 : << "range, src: " << src << ", dst: " << dst;
2588 i : return false;
2589 : }
2590 :
2591 E : RelativeAddress src_start = src->addr();
2592 E : RelativeAddress dst_start = dst->addr();
2593 :
2594 : // Get the offset of the ultimate destination relative to the start of the
2595 : // destination block.
2596 E : BlockGraph::Offset dst_offset = dst_addr - dst_start;
2597 :
2598 : // Get the offset of the actual referenced object relative to the start of
2599 : // the destination block.
2600 E : BlockGraph::Offset dst_base = dst_base_addr - dst_start;
2601 :
2602 : BlockGraph::Reference ref(it->second.type,
2603 : it->second.size,
2604 : dst,
2605 : dst_offset,
2606 E : dst_base);
2607 E : src->SetReference(src_addr - src_start, ref);
2608 E : }
2609 :
2610 E : references_.clear();
2611 :
2612 E : return true;
2613 E : }
2614 :
2615 E : bool Decomposer::ConfirmFixupsVisited() const {
2616 E : bool success = true;
2617 :
2618 : // Ideally, all fixups should have been visited during decomposition.
2619 : // TODO(chrisha): Address the root problems underlying the following
2620 : // temporary fix.
2621 E : FixupMap::const_iterator fixup_it = fixup_map_.begin();
2622 E : for (; fixup_it != fixup_map_.end(); ++fixup_it) {
2623 E : if (fixup_it->second.visited)
2624 E : continue;
2625 :
2626 : const BlockGraph::Block* block =
2627 E : image_->GetContainingBlock(fixup_it->first, kPointerSize);
2628 E : DCHECK(block != NULL);
2629 :
2630 : // We know that we currently do not have full disassembly coverage as there
2631 : // are several orphaned pieces of apparently unreachable code in the CRT
2632 : // that we do not disassemble, but which may contain jmp or call commands.
2633 : // Thus, we expect that missed fixups are all PC-relative and lie within
2634 : // code blocks.
2635 : if (block->type() == BlockGraph::CODE_BLOCK &&
2636 E : fixup_it->second.type == BlockGraph::PC_RELATIVE_REF)
2637 E : continue;
2638 :
2639 i : success = false;
2640 i : LOG(ERROR) << "Unexpected unseen fixup at " << fixup_it->second.location;
2641 i : }
2642 :
2643 E : return success;
2644 E : }
2645 :
2646 E : bool Decomposer::FindPaddingBlocks() {
2647 E : DCHECK(image_ != NULL);
2648 E : DCHECK(image_->graph() != NULL);
2649 :
2650 : BlockGraph::BlockMap::iterator block_it =
2651 E : image_->graph()->blocks_mutable().begin();
2652 E : for (; block_it != image_->graph()->blocks_mutable().end(); ++block_it) {
2653 E : BlockGraph::Block& block = block_it->second;
2654 :
2655 : // Padding blocks must not have any symbol information: no labels,
2656 : // no references, no referrers, and they must be a gap block.
2657 : if (block.labels().size() != 0 ||
2658 : block.references().size() != 0 ||
2659 : block.referrers().size() != 0 ||
2660 E : (block.attributes() & BlockGraph::GAP_BLOCK) == 0)
2661 E : continue;
2662 :
2663 E : switch (block.type()) {
2664 : // Code blocks should be fully defined and consist of only int3s.
2665 : case BlockGraph::CODE_BLOCK: {
2666 : if (block.data_size() != block.size() ||
2667 E : RepeatedValue(block.data(), block.data_size()) != kInt3)
2668 i : continue;
2669 E : break;
2670 : }
2671 :
2672 : // Data blocks should be uninitialized or have fully defined data
2673 : // consisting only of zeros.
2674 : default: {
2675 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
2676 E : if (block.data_size() == 0) // Uninitialized data blocks are padding.
2677 E : break;
2678 : if (block.data_size() != block.size() ||
2679 E : RepeatedValue(block.data(), block.data_size()) != 0)
2680 i : continue;
2681 : }
2682 : }
2683 :
2684 : // If we fall through to this point, then the block is a padding block.
2685 E : block.set_attribute(BlockGraph::PADDING_BLOCK);
2686 E : }
2687 :
2688 E : return true;
2689 E : }
2690 :
2691 E : bool Decomposer::CreateSections() {
2692 : // Iterate through the image sections, and create sections in the BlockGraph.
2693 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2694 E : for (size_t i = 0; i < num_sections; ++i) {
2695 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2696 E : std::string name = pe::PEFile::GetSectionName(*header);
2697 : BlockGraph::Section* section = image_->graph()->AddSection(
2698 E : name, header->Characteristics);
2699 E : DCHECK(section != NULL);
2700 :
2701 : // For now, we expect them to have been created with the same IDs as those
2702 : // in the original image.
2703 E : if (section->id() != i) {
2704 i : LOG(ERROR) << "Unexpected section ID.";
2705 i : return false;
2706 : }
2707 E : }
2708 :
2709 E : return true;
2710 E : }
2711 :
2712 E : bool Decomposer::LoadDebugStreams(IDiaSession* dia_session) {
2713 E : DCHECK(dia_session != NULL);
2714 :
2715 : // Load the fixups. These must exist.
2716 E : PdbFixups pdb_fixups;
2717 : SearchResult search_result = FindAndLoadDiaDebugStreamByName(
2718 E : kFixupDiaDebugStreamName, dia_session, &pdb_fixups);
2719 E : if (search_result != kSearchSucceeded) {
2720 i : if (search_result == kSearchFailed) {
2721 i : LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
2722 : "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
2723 : }
2724 i : return false;
2725 : }
2726 :
2727 : // Load the omap_from table. It is not necessary that one exist.
2728 E : std::vector<OMAP> omap_from;
2729 : search_result = FindAndLoadDiaDebugStreamByName(
2730 E : kOmapFromDiaDebugStreamName, dia_session, &omap_from);
2731 E : if (search_result == kSearchErrored)
2732 i : return false;
2733 :
2734 : // Translate and validate fixups.
2735 E : if (!OmapAndValidateFixups(omap_from, pdb_fixups))
2736 i : return false;
2737 :
2738 E : return true;
2739 E : }
2740 :
2741 : bool Decomposer::OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
2742 E : const PdbFixups& pdb_fixups) {
2743 E : bool have_omap = omap_from.size() != 0;
2744 :
2745 : // The resource section in Chrome is modified post-link by a tool that adds a
2746 : // manifest to it. This causes all of the fixups in the resource section (and
2747 : // anything beyond it) to be invalid. As long as the resource section is the
2748 : // last section in the image, this is not a problem (we can safely ignore the
2749 : // .rsrc fixups, which we know how to parse without them). However, if there
2750 : // is a section after the resource section, things will have been shifted
2751 : // and potentially crucial fixups will be invalid.
2752 E : RelativeAddress rsrc_start(0xffffffff), max_start;
2753 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2754 E : for (size_t i = 0; i < num_sections; ++i) {
2755 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2756 E : RelativeAddress start(header->VirtualAddress);
2757 E : if (start > max_start)
2758 E : max_start = start;
2759 : if (strncmp(kResourceSectionName,
2760 : reinterpret_cast<const char*>(header->Name),
2761 E : IMAGE_SIZEOF_SHORT_NAME) == 0) {
2762 E : rsrc_start = start;
2763 E : break;
2764 : }
2765 E : }
2766 :
2767 : // Ensure there are no sections after the resource section.
2768 E : if (max_start > rsrc_start) {
2769 i : LOG(ERROR) << kResourceSectionName << " section is not the last section.";
2770 i : return false;
2771 : }
2772 :
2773 : // Ensure the fixups are all valid, and populate the fixup map.
2774 E : size_t skipped = 0;
2775 E : for (size_t i = 0; i < pdb_fixups.size(); ++i) {
2776 E : if (!pdb_fixups[i].ValidHeader()) {
2777 i : LOG(ERROR) << "Unknown fixup header: "
2778 : << StringPrintf("0x%08X.", pdb_fixups[i].header);
2779 i : return false;
2780 : }
2781 :
2782 : // For now, we skip any offset fixups. We've only seen this in the context
2783 : // of TLS data access, and we don't mess with TLS structures.
2784 E : if (pdb_fixups[i].is_offset())
2785 E : continue;
2786 :
2787 : // All fixups we handle should be full size pointers.
2788 E : DCHECK_EQ(kPointerSize, pdb_fixups[i].size());
2789 :
2790 : // Get the original addresses, and map them through OMAP information.
2791 : // Normally DIA takes care of this for us, but there is no API for
2792 : // getting DIA to give us FIXUP information, so we have to do it manually.
2793 E : RelativeAddress rva_location(pdb_fixups[i].rva_location);
2794 E : RelativeAddress rva_base(pdb_fixups[i].rva_base);
2795 E : if (have_omap) {
2796 i : rva_location = pdb::TranslateAddressViaOmap(omap_from, rva_location);
2797 i : rva_base = pdb::TranslateAddressViaOmap(omap_from, rva_base);
2798 : }
2799 :
2800 : // If these are part of the .rsrc section, ignore them.
2801 E : if (rva_location >= rsrc_start)
2802 i : continue;
2803 :
2804 : // Ensure they live within the image, and refer to things within the
2805 : // image.
2806 : if (!image_file_.Contains(rva_location, kPointerSize) ||
2807 E : !image_file_.Contains(rva_base, 1)) {
2808 i : LOG(ERROR) << "Fixup refers to addresses outside of image.";
2809 i : return false;
2810 : }
2811 :
2812 : // Add the fix up, and ensure the source address is unique.
2813 E : Fixup fixup = { PdbFixupTypeToReferenceType(pdb_fixups[i].type),
2814 E : pdb_fixups[i].refers_to_code(),
2815 E : pdb_fixups[i].is_data(),
2816 E : false,
2817 E : rva_location,
2818 E : rva_base };
2819 E : bool added = fixup_map_.insert(std::make_pair(rva_location, fixup)).second;
2820 E : if (!added) {
2821 i : LOG(ERROR) << "Colliding fixups at " << rva_location;
2822 i : return false;
2823 : }
2824 E : }
2825 :
2826 E : return true;
2827 E : }
2828 :
2829 : bool Decomposer::RegisterStaticInitializerPatterns(
2830 E : const base::StringPiece& begin, const base::StringPiece& end) {
2831 : // Ensuring the patterns each have exactly one capturing group.
2832 : REPair re_pair = std::make_pair(RE(begin.as_string()),
2833 E : RE(end.as_string()));
2834 : if (re_pair.first.NumberOfCapturingGroups() != 1 ||
2835 E : re_pair.second.NumberOfCapturingGroups() != 1)
2836 i : return false;
2837 :
2838 E : static_initializer_patterns_.push_back(re_pair);
2839 :
2840 E : return true;
2841 E : }
2842 :
2843 : bool Decomposer::RegisterNonReturningFunction(
2844 E : const base::StringPiece& function_name) {
2845 E : return non_returning_functions_.insert(function_name.as_string()).second;
2846 E : }
2847 :
2848 : bool Decomposer::RegisterNonReturningImport(
2849 : const base::StringPiece& module_name,
2850 E : const base::StringPiece& function_name) {
2851 E : StringSet& module_set = non_returning_imports_[module_name.as_string()];
2852 E : return module_set.insert(function_name.as_string()).second;
2853 E : }
2854 :
2855 : bool Decomposer::LoadBlockGraphFromPdbStream(const PEFile& image_file,
2856 : pdb::PdbStream* block_graph_stream,
2857 E : ImageLayout* image_layout) {
2858 E : DCHECK(block_graph_stream != NULL);
2859 E : DCHECK(image_layout != NULL);
2860 E : LOG(INFO) << "Reading block-graph and image layout from the PDB.";
2861 :
2862 : // Initialize an input archive pointing to the stream.
2863 E : scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
2864 E : if (!byte_stream->Init(block_graph_stream))
2865 i : return false;
2866 E : DCHECK(byte_stream.get() != NULL);
2867 :
2868 E : core::ScopedInStreamPtr pdb_in_stream;
2869 : pdb_in_stream.reset(core::CreateByteInStream(
2870 E : byte_stream->data(), byte_stream->data() + byte_stream->length()));
2871 :
2872 : // Read the header.
2873 E : uint32 stream_version = 0;
2874 E : unsigned char compressed = 0;
2875 : if (!pdb_in_stream->Read(sizeof(stream_version),
2876 : reinterpret_cast<core::Byte*>(&stream_version)) ||
2877 : !pdb_in_stream->Read(sizeof(compressed),
2878 E : reinterpret_cast<core::Byte*>(&compressed))) {
2879 i : LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
2880 i : return false;
2881 : }
2882 :
2883 : // Check the stream version.
2884 E : if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
2885 E : LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
2886 : << " version (got " << stream_version << ", expected "
2887 : << pdb::kSyzygyBlockGraphStreamVersion << ").";
2888 E : return false;
2889 : }
2890 :
2891 : // If the stream is compressed insert the decompression filter.
2892 E : core::InStream* in_stream = pdb_in_stream.get();
2893 E : scoped_ptr<core::ZInStream> zip_in_stream;
2894 E : if (compressed != 0) {
2895 E : zip_in_stream.reset(new core::ZInStream(in_stream));
2896 E : if (!zip_in_stream->Init()) {
2897 i : LOG(ERROR) << "Unable to initialize ZInStream.";
2898 i : return false;
2899 : }
2900 E : in_stream = zip_in_stream.get();
2901 : }
2902 :
2903 : // Deserialize the image-layout.
2904 E : core::NativeBinaryInArchive in_archive(in_stream);
2905 E : block_graph::BlockGraphSerializer::Attributes attributes = 0;
2906 : if (!LoadBlockGraphAndImageLayout(
2907 E : image_file, &attributes, image_layout, &in_archive)) {
2908 i : LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
2909 i : return false;
2910 : }
2911 :
2912 E : return true;
2913 E : }
2914 :
2915 : bool Decomposer::LoadBlockGraphFromPdb(const FilePath& pdb_path,
2916 : const PEFile& image_file,
2917 : ImageLayout* image_layout,
2918 E : bool* stream_exists) {
2919 E : DCHECK(image_layout != NULL);
2920 E : DCHECK(stream_exists != NULL);
2921 :
2922 E : pdb::PdbFile pdb_file;
2923 E : pdb::PdbReader pdb_reader;
2924 E : if (!pdb_reader.Read(pdb_path, &pdb_file)) {
2925 i : LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
2926 : << "\".";
2927 i : return NULL;
2928 : }
2929 :
2930 : // Try to get the block-graph stream from the PDB.
2931 : scoped_refptr<pdb::PdbStream> block_graph_stream =
2932 E : GetBlockGraphStreamFromPdb(&pdb_file);
2933 E : if (block_graph_stream.get() == NULL) {
2934 E : *stream_exists = false;
2935 E : return false;
2936 : }
2937 :
2938 : // The PDB contains a block-graph stream, the block-graph and the image layout
2939 : // will be read from this stream.
2940 E : *stream_exists = true;
2941 : if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
2942 E : image_layout)) {
2943 i : return false;
2944 : }
2945 :
2946 E : return true;
2947 E : }
2948 :
2949 : scoped_refptr<pdb::PdbStream> Decomposer::GetBlockGraphStreamFromPdb(
2950 E : pdb::PdbFile* pdb_file) {
2951 E : scoped_refptr<pdb::PdbStream> block_graph_stream;
2952 : // Get the PDB header and try to get the block-graph ID stream from it.
2953 E : pdb::PdbInfoHeader70 pdb_header = {0};
2954 E : pdb::NameStreamMap name_stream_map;
2955 : if (!ReadHeaderInfoStream(pdb_file->GetStream(pdb::kPdbHeaderInfoStream),
2956 : &pdb_header,
2957 E : &name_stream_map)) {
2958 i : LOG(ERROR) << "Failed to read header info stream.";
2959 i : return block_graph_stream;
2960 : }
2961 : pdb::NameStreamMap::const_iterator name_it = name_stream_map.find(
2962 E : pdb::kSyzygyBlockGraphStreamName);
2963 E : if (name_it == name_stream_map.end()) {
2964 E : return block_graph_stream;
2965 : }
2966 :
2967 : // Get the block-graph stream and ensure that it's not empty.
2968 E : block_graph_stream = pdb_file->GetStream(name_it->second);
2969 E : if (block_graph_stream.get() == NULL) {
2970 i : LOG(ERROR) << "Failed to read the block-graph stream from the PDB.";
2971 i : return block_graph_stream;
2972 : }
2973 E : if (block_graph_stream->length() == 0) {
2974 i : LOG(ERROR) << "The block-graph stream is empty.";
2975 i : return block_graph_stream;
2976 : }
2977 :
2978 E : return block_graph_stream;
2979 E : }
2980 :
2981 : bool Decomposer::OnImportThunkCallback(const char* module_name,
2982 : const char* symbol_name,
2983 E : BlockGraph::Block* thunk) {
2984 E : DCHECK(module_name != NULL);
2985 E : DCHECK(symbol_name != NULL);
2986 E : DCHECK(thunk != NULL);
2987 :
2988 : // Look for the module first.
2989 : StringSetMap::const_iterator module_it =
2990 E : non_returning_imports_.find(std::string(module_name));
2991 E : if (module_it == non_returning_imports_.end())
2992 E : return true;
2993 :
2994 : // Look for the symbol within the module.
2995 E : if (module_it->second.count(std::string(symbol_name)) == 0)
2996 E : return true;
2997 :
2998 : // If we get here then the imported symbol is found. Decorate the thunk.
2999 E : thunk->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
3000 E : VLOG(1) << "Forcing non-returning attribute on imported symbol \""
3001 : << symbol_name << "\" from module \"" << module_name << "\".";
3002 :
3003 E : return true;
3004 E : }
3005 :
3006 : } // namespace pe
|