1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/decomposer.h"
16 :
17 : #include <cvconst.h>
18 : #include <algorithm>
19 :
20 : #include "base/bind.h"
21 : #include "base/logging.h"
22 : #include "base/path_service.h"
23 : #include "base/string_util.h"
24 : #include "base/stringprintf.h"
25 : #include "base/utf_string_conversions.h"
26 : #include "base/files/file_path.h"
27 : #include "base/memory/scoped_ptr.h"
28 : #include "base/win/scoped_bstr.h"
29 : #include "base/win/scoped_comptr.h"
30 : #include "sawbuck/common/com_utils.h"
31 : #include "sawbuck/sym_util/types.h"
32 : #include "syzygy/block_graph/block_util.h"
33 : #include "syzygy/block_graph/typed_block.h"
34 : #include "syzygy/core/disassembler_util.h"
35 : #include "syzygy/core/zstream.h"
36 : #include "syzygy/pdb/omap.h"
37 : #include "syzygy/pdb/pdb_byte_stream.h"
38 : #include "syzygy/pdb/pdb_util.h"
39 : #include "syzygy/pe/dia_util.h"
40 : #include "syzygy/pe/find.h"
41 : #include "syzygy/pe/metadata.h"
42 : #include "syzygy/pe/pdb_info.h"
43 : #include "syzygy/pe/pe_file_parser.h"
44 : #include "syzygy/pe/pe_utils.h"
45 : #include "syzygy/pe/serialization.h"
46 :
47 : namespace pe {
48 : namespace {
49 :
50 : using base::win::ScopedBstr;
51 : using base::win::ScopedComPtr;
52 : using block_graph::BlockGraph;
53 : using block_graph::ConstTypedBlock;
54 : using builder::Opt;
55 : using builder::Seq;
56 : using builder::Star;
57 : using core::AbsoluteAddress;
58 : using core::Disassembler;
59 : using core::RelativeAddress;
60 :
61 : typedef Disassembler::CallbackDirective CallbackDirective;
62 :
63 : const size_t kPointerSize = sizeof(AbsoluteAddress);
64 :
65 : // Converts from PdbFixup::Type to BlockGraph::ReferenceType.
66 : BlockGraph::ReferenceType PdbFixupTypeToReferenceType(
67 E : pdb::PdbFixup::Type type) {
68 E : switch (type) {
69 : case pdb::PdbFixup::TYPE_ABSOLUTE:
70 E : return BlockGraph::ABSOLUTE_REF;
71 :
72 : case pdb::PdbFixup::TYPE_RELATIVE:
73 E : return BlockGraph::RELATIVE_REF;
74 :
75 : case pdb::PdbFixup::TYPE_PC_RELATIVE:
76 E : return BlockGraph::PC_RELATIVE_REF;
77 :
78 : default:
79 i : NOTREACHED() << "Invalid PdbFixup::Type.";
80 : // The return type here is meaningless.
81 i : return BlockGraph::ABSOLUTE_REF;
82 : }
83 E : }
84 :
85 : // Adds a reference to the provided intermediate reference map. If one already
86 : // exists, will validate that they are consistent.
87 : bool AddReference(RelativeAddress src_addr,
88 : BlockGraph::ReferenceType type,
89 : BlockGraph::Size size,
90 : RelativeAddress dst_base,
91 : BlockGraph::Offset dst_offset,
92 E : Decomposer::IntermediateReferenceMap* references) {
93 E : DCHECK(references != NULL);
94 :
95 : // If we get an iterator to a reference and it has the same source address
96 : // then ensure that we are consistent with it.
97 : Decomposer::IntermediateReferenceMap::iterator it =
98 E : references->lower_bound(src_addr);
99 E : if (it != references->end() && it->first == src_addr) {
100 : if (type != it->second.type || size != it->second.size ||
101 E : dst_base != it->second.base || dst_offset != it->second.offset) {
102 i : LOG(ERROR) << "Trying to insert inconsistent and colliding intermediate "
103 : "references.";
104 i : return false;
105 : }
106 : }
107 :
108 E : Decomposer::IntermediateReference ref = { type,
109 E : size,
110 E : dst_base,
111 E : dst_offset };
112 :
113 : // Since we used lower_bound above, we can use it as a hint for the
114 : // insertion. This saves us from incurring the lookup cost twice.
115 E : references->insert(it, std::make_pair(src_addr, ref));
116 E : return true;
117 E : }
118 :
119 : // Validates the given reference against the given fixup map entry. If they
120 : // are consistent, marks the fixup as having been visited.
121 : bool ValidateReference(RelativeAddress src_addr,
122 : BlockGraph::ReferenceType type,
123 : BlockGraph::Size size,
124 E : Decomposer::FixupMap::iterator fixup_it) {
125 E : if (type != fixup_it->second.type || size != kPointerSize) {
126 i : LOG(ERROR) << "Reference at " << src_addr
127 : << " not consistent with corresponding fixup.";
128 i : return false;
129 : }
130 :
131 : // Mark this fixup as having been visited.
132 E : fixup_it->second.visited = true;
133 :
134 E : return true;
135 E : }
136 :
137 : enum ValidateOrAddReferenceMode {
138 : // Look for an existing fixup. If we find one, validate against it,
139 : // otherwise create a new intermediate reference.
140 : FIXUP_MAY_EXIST,
141 : // Compare against an existing fixup, bailing if there is none. Does not
142 : // create a new intermediate reference.
143 : FIXUP_MUST_EXIST,
144 : // Look for an existing fixup, and fail if one exists. Otherwise, create
145 : // a new intermediate reference.
146 : FIXUP_MUST_NOT_EXIST
147 : };
148 : bool ValidateOrAddReference(ValidateOrAddReferenceMode mode,
149 : RelativeAddress src_addr,
150 : BlockGraph::ReferenceType type,
151 : BlockGraph::Size size,
152 : RelativeAddress dst_base,
153 : BlockGraph::Offset dst_offset,
154 : Decomposer::FixupMap* fixup_map,
155 E : Decomposer::IntermediateReferenceMap* references) {
156 E : DCHECK(fixup_map != NULL);
157 E : DCHECK(references != NULL);
158 :
159 E : Decomposer::FixupMap::iterator it = fixup_map->find(src_addr);
160 :
161 E : switch (mode) {
162 : case FIXUP_MAY_EXIST: {
163 : if (it != fixup_map->end() &&
164 E : !ValidateReference(src_addr, type, size, it))
165 i : return false;
166 : return AddReference(src_addr, type, size, dst_base, dst_offset,
167 E : references);
168 : }
169 :
170 : case FIXUP_MUST_EXIST: {
171 E : if (it == fixup_map->end()) {
172 i : LOG(ERROR) << "Reference at " << src_addr << " has no matching fixup.";
173 i : return false;
174 : }
175 E : if (!ValidateReference(src_addr, type, size, it))
176 i : return false;
177 : // Do not create a new intermediate reference.
178 E : return true;
179 : }
180 :
181 : case FIXUP_MUST_NOT_EXIST: {
182 E : if (it != fixup_map->end()) {
183 i : LOG(ERROR) << "Reference at " << src_addr
184 : << " collides with an existing fixup.";
185 i : return false;
186 : }
187 : return AddReference(src_addr, type, size, dst_base, dst_offset,
188 E : references);
189 : }
190 :
191 : default: {
192 i : NOTREACHED() << "Invalid ValidateOrAddReferenceMode.";
193 i : return false;
194 : }
195 : }
196 E : }
197 :
198 E : bool GetTypeInfo(IDiaSymbol* symbol, size_t* length) {
199 E : DCHECK(symbol != NULL);
200 E : DCHECK(length != NULL);
201 :
202 E : *length = 0;
203 E : ScopedComPtr<IDiaSymbol> type;
204 E : HRESULT hr = symbol->get_type(type.Receive());
205 : // This happens if the symbol has no type information.
206 E : if (hr == S_FALSE)
207 E : return true;
208 E : if (hr != S_OK) {
209 i : LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
210 i : return false;
211 : }
212 :
213 E : ULONGLONG ull_length = 0;
214 E : hr = type->get_length(&ull_length);
215 E : if (hr != S_OK) {
216 i : LOG(ERROR) << "Failed to retrieve type length properties: "
217 : << com::LogHr(hr) << ".";
218 i : return false;
219 : }
220 E : *length = ull_length;
221 :
222 E : return true;
223 E : }
224 :
225 : enum SectionType {
226 : kSectionCode,
227 : kSectionData,
228 : kSectionUnknown
229 : };
230 :
231 E : SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
232 E : DCHECK(header != NULL);
233 E : if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
234 E : return kSectionCode;
235 E : if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
236 E : return kSectionData;
237 i : return kSectionUnknown;
238 E : }
239 :
240 E : void GuessDataBlockAlignment(BlockGraph::Block* block, uint32 max_alignment) {
241 E : DCHECK(block != NULL);
242 E : uint32 alignment = block->addr().GetAlignment();
243 : // Cap the alignment.
244 E : if (alignment > max_alignment)
245 E : alignment = max_alignment;
246 E : block->set_alignment(alignment);
247 E : }
248 :
249 : bool AreMatchedBlockAndLabelAttributes(
250 : BlockGraph::BlockType bt,
251 : BlockGraph::LabelAttributes la) {
252 : return (bt == BlockGraph::CODE_BLOCK && (la & BlockGraph::CODE_LABEL) != 0) ||
253 : (bt == BlockGraph::DATA_BLOCK && (la & BlockGraph::DATA_LABEL) != 0);
254 : }
255 :
256 E : BlockGraph::LabelAttributes SymTagToLabelAttributes(enum SymTagEnum sym_tag) {
257 E : switch (sym_tag) {
258 : case SymTagData:
259 E : return BlockGraph::DATA_LABEL;
260 : case SymTagLabel:
261 E : return BlockGraph::CODE_LABEL;
262 : case SymTagFuncDebugStart:
263 E : return BlockGraph::DEBUG_START_LABEL;
264 : case SymTagFuncDebugEnd:
265 E : return BlockGraph::DEBUG_END_LABEL;
266 : case SymTagBlock:
267 E : return BlockGraph::SCOPE_START_LABEL;
268 : case SymTagCallSite:
269 E : return BlockGraph::CALL_SITE_LABEL;
270 : }
271 :
272 i : NOTREACHED();
273 i : return 0;
274 E : }
275 :
276 : bool AddLabelToBlock(RelativeAddress addr,
277 : const base::StringPiece& name,
278 : BlockGraph::LabelAttributes label_attributes,
279 E : BlockGraph::Block* block) {
280 E : DCHECK(block != NULL);
281 E : DCHECK_LE(block->addr(), addr);
282 E : DCHECK_GT(block->addr() + block->size(), addr);
283 :
284 E : BlockGraph::Offset offset = addr - block->addr();
285 :
286 : // Try to create the label.
287 E : if (block->SetLabel(offset, name, label_attributes)) {
288 : // If there was no label at offset 0, then this block has not yet been
289 : // renamed, and still has its section contribution as a name. Update it to
290 : // the first symbol we get for it. We parse symbols from most useful
291 : // (undecorated function names) to least useful (mangled public symbols), so
292 : // this ensures a block has the most useful name.
293 E : if (offset == 0)
294 E : block->set_name(name);
295 :
296 E : return true;
297 : }
298 :
299 : // If we get here there's an already existing label. Update it.
300 E : BlockGraph::Label label;
301 E : CHECK(block->GetLabel(offset, &label));
302 :
303 : // It is conceivable that there could be more than one scope with either the
304 : // same beginning or the same ending. However, this doesn't appear to happen
305 : // in any version of Chrome up to 20. We add this check so that we'd at least
306 : // be made aware of the situation. (We don't rely on these labels, so we
307 : // merely output a warning rather than an error.)
308 : {
309 : const BlockGraph::LabelAttributes kScopeAttributes =
310 : BlockGraph::SCOPE_START_LABEL |
311 E : BlockGraph::SCOPE_END_LABEL;
312 : BlockGraph::LabelAttributes scope_attributes =
313 E : label_attributes & kScopeAttributes;
314 E : if (scope_attributes != 0) {
315 E : if (label.has_any_attributes(scope_attributes)) {
316 i : LOG(WARNING) << "Detected colliding scope labels at offset "
317 : << offset << " of block \"" << block->name() << "\".";
318 : }
319 : }
320 : }
321 :
322 : // Merge the names if this isn't a repeated name.
323 E : std::string new_name = label.name();
324 E : if (new_name.find(name.data()) == new_name.npos) {
325 E : new_name.append(", ");
326 E : name.AppendToString(&new_name);
327 : }
328 :
329 : // Merge the attributes.
330 : BlockGraph::LabelAttributes new_label_attr = label.attributes() |
331 E : label_attributes;
332 E : if (!BlockGraph::Label::AreValidAttributes(new_label_attr)) {
333 : // It's not clear which attributes should be the winner here, so we log an
334 : // error.
335 i : LOG(ERROR) << "Trying to merge conflicting label attributes \""
336 : << BlockGraph::LabelAttributesToString(label_attributes)
337 : << "\" for label \"" << label.ToString() << "\" at offset "
338 : << offset << " of block \"" << block->name() << "\".";
339 i : return false;
340 : }
341 :
342 : // Update the label.
343 E : label = BlockGraph::Label(new_name, new_label_attr);
344 E : CHECK(block->RemoveLabel(offset));
345 E : CHECK(block->SetLabel(offset, label));
346 :
347 E : return true;
348 E : }
349 :
350 : // The MS linker pads between code blocks with int3s.
351 : static const uint8 kInt3 = 0xCC;
352 :
353 : // If the given run of bytes consists of a single value repeated, returns that
354 : // value. Otherwise, returns -1.
355 E : int RepeatedValue(const uint8* data, size_t size) {
356 E : DCHECK(data != NULL);
357 E : const uint8* data_end = data + size;
358 E : uint8 value = *(data++);
359 E : for (; data < data_end; ++data) {
360 E : if (*data != value)
361 i : return -1;
362 E : }
363 E : return value;
364 E : }
365 :
366 : const BlockGraph::BlockId kNullBlockId(-1);
367 :
368 : void GetDisassemblyStartingPoints(
369 : const BlockGraph::Block* block,
370 : AbsoluteAddress abs_block_addr,
371 : const PEFile::RelocSet& reloc_set,
372 E : Disassembler::AddressSet* addresses) {
373 E : DCHECK(block != NULL);
374 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
375 E : DCHECK(addresses != NULL);
376 :
377 E : addresses->clear();
378 :
379 : // Use code labels as starting points.
380 E : BlockGraph::Block::LabelMap::const_iterator it(block->labels().begin());
381 E : for (; it != block->labels().end(); ++it) {
382 E : BlockGraph::Offset offset = it->first;
383 E : DCHECK_LE(0, offset);
384 E : DCHECK_GT(block->size(), static_cast<size_t>(offset));
385 :
386 E : if (it->second.has_attributes(BlockGraph::CODE_LABEL)) {
387 : // We sometimes receive code labels that land on lookup tables; we can
388 : // detect these because the label will point directly to a reloc. These
389 : // should have already been marked as data by now. DCHECK to validate.
390 : // TODO(chrisha): Get rid of this DCHECK, and allow mixed CODE and DATA
391 : // labels. Simply only use ones that are DATA only.
392 E : DCHECK_EQ(0u, reloc_set.count(block->addr() + offset));
393 :
394 E : addresses->insert(abs_block_addr + offset);
395 : }
396 E : }
397 E : }
398 :
399 : // Determines if the provided code block has the expected layout of code first,
400 : // data second. Returns true if so, false otherwise. Also returns the size of
401 : // the code portion of the block by trimming off any data labels.
402 : bool BlockHasExpectedCodeDataLayout(const BlockGraph::Block* block,
403 E : size_t* code_size) {
404 E : DCHECK(block != NULL);
405 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
406 E : DCHECK(code_size != NULL);
407 :
408 E : *code_size = block->data_size();
409 :
410 : BlockGraph::Block::LabelMap::const_reverse_iterator label_it =
411 E : block->labels().rbegin();
412 : BlockGraph::Block::LabelMap::const_reverse_iterator label_end =
413 E : block->labels().rend();
414 :
415 E : bool seen_non_data = false;
416 :
417 : // Walk through the labels in reverse order (by decreasing offset). Trim
418 : // any data labels from this blocks data_size.
419 E : for (; label_it != label_end; ++label_it) {
420 E : if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) {
421 : // We've encountered data not strictly at the end of the block. This
422 : // violates assumptions about code generated by cl.exe.
423 E : if (seen_non_data)
424 E : return false;
425 :
426 : // Otherwise, we're still in a run of data labels at the tail of the
427 : // block. Keep trimming the code size.
428 E : size_t offset = static_cast<size_t>(label_it->first);
429 E : if (offset < *code_size)
430 E : *code_size = offset;
431 E : } else {
432 E : seen_non_data = true;
433 : }
434 E : }
435 :
436 E : return true;
437 E : }
438 :
439 : // Given a compiland, returns its compiland details.
440 : bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
441 E : IDiaSymbol** compiland_details) {
442 E : DCHECK(compiland != NULL);
443 E : DCHECK(compiland_details != NULL);
444 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
445 :
446 E : *compiland_details = NULL;
447 :
448 : // Get the enumeration of compiland details.
449 E : ScopedComPtr<IDiaEnumSymbols> enum_symbols;
450 : HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
451 E : enum_symbols.Receive());
452 E : DCHECK_EQ(S_OK, hr);
453 :
454 : // We expect there to be compiland details. For compilands built by
455 : // non-standard toolchains, there usually aren't any.
456 E : LONG count = 0;
457 E : hr = enum_symbols->get_Count(&count);
458 E : DCHECK_EQ(S_OK, hr);
459 E : if (count == 0)
460 i : return false;
461 :
462 : // Get the compiland details.
463 E : ULONG fetched = 0;
464 E : hr = enum_symbols->Next(1, compiland_details, &fetched);
465 E : DCHECK_EQ(S_OK, hr);
466 E : DCHECK_EQ(1u, fetched);
467 E : return true;
468 E : }
469 :
470 : // Stores information regarding known compilers.
471 : struct KnownCompilerInfo {
472 : wchar_t* compiler_name;
473 : bool supported;
474 : };
475 :
476 : // A list of known compilers, and their status as being supported or not.
477 : KnownCompilerInfo kKnownCompilerInfos[] = {
478 : { L"Microsoft (R) Macro Assembler", false },
479 : { L"Microsoft (R) Optimizing Compiler", true },
480 : { L"Microsoft (R) LINK", false }
481 : };
482 :
483 : // Given a compiland, determines whether the compiler used is one of those that
484 : // we whitelist.
485 E : bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
486 E : DCHECK(compiland != NULL);
487 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
488 :
489 E : ScopedComPtr<IDiaSymbol> compiland_details;
490 : if (!GetCompilandDetailsForCompiland(compiland,
491 E : compiland_details.Receive())) {
492 : // If the compiland has no compiland details we assume the compiler is not
493 : // supported.
494 i : ScopedBstr compiland_name;
495 i : if (compiland->get_name(compiland_name.Receive()) == S_OK) {
496 i : VLOG(1) << "Compiland has no compiland details: "
497 : << com::ToString(compiland_name);
498 : }
499 i : return false;
500 : }
501 E : DCHECK(compiland_details.get() != NULL);
502 :
503 : // Get the compiler name.
504 E : ScopedBstr compiler_name;
505 E : HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
506 E : DCHECK_EQ(S_OK, hr);
507 :
508 : // Check the compiler name against the list of known compilers.
509 E : for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
510 E : if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
511 E : return kKnownCompilerInfos[i].supported;
512 : }
513 E : }
514 :
515 : // Anything we don't explicitly know about is not supported.
516 E : VLOG(1) << "Encountered unknown compiler: " << compiler_name;
517 E : return false;
518 E : }
519 :
520 : // Logs an error if @p error is true, a verbose logging message otherwise.
521 : #define LOG_ERROR_OR_VLOG1(error) LAZY_STREAM( \
522 : ::logging::LogMessage(__FILE__, \
523 : __LINE__, \
524 : (error) ? ::logging::LOG_ERROR : -1).stream(), \
525 : (error ? LOG_IS_ON(ERROR) : VLOG_IS_ON(1)))
526 :
527 : // Logs a warning if @p warn is true, a verbose logging message otherwise.
528 : #define LOG_WARNING_OR_VLOG1(warn) LAZY_STREAM( \
529 : ::logging::LogMessage(__FILE__, \
530 : __LINE__, \
531 : (warn) ? ::logging::LOG_WARNING : -1).stream(), \
532 : (warn ? LOG_IS_ON(WARNING) : VLOG_IS_ON(1)))
533 :
534 : // Sets the disassembler directive to an error if @p strict is true, otherwise
535 : // sets it to an early termination.
536 E : CallbackDirective AbortOrTerminateDisassembly(bool strict) {
537 E : if (strict)
538 i : return Disassembler::kDirectiveAbort;
539 i : else
540 E : return Disassembler::kDirectiveTerminateWalk;
541 E : }
542 :
543 : // Returns true if the callback-directive is an early termination that should be
544 : // returned immediately.
545 E : bool IsFatalCallbackDirective(CallbackDirective directive) {
546 E : switch (directive) {
547 : case Disassembler::kDirectiveContinue:
548 : case Disassembler::kDirectiveTerminatePath:
549 E : return false;
550 :
551 : case Disassembler::kDirectiveTerminateWalk:
552 : case Disassembler::kDirectiveAbort:
553 i : return true;
554 :
555 : default:
556 i : NOTREACHED();
557 : }
558 :
559 i : return true;
560 E : }
561 :
562 : // Combines two callback directives. Higher codes supersede lower ones.
563 : CallbackDirective CombineCallbackDirectives(CallbackDirective d1,
564 E : CallbackDirective d2) {
565 : // This ensures that this logic remains valid. This should prevent people
566 : // from tinkering with CallbackDirective and breaking this code.
567 : COMPILE_ASSERT(Disassembler::kDirectiveContinue <
568 : Disassembler::kDirectiveTerminatePath &&
569 : Disassembler::kDirectiveTerminatePath <
570 : Disassembler::kDirectiveTerminateWalk &&
571 : Disassembler::kDirectiveTerminateWalk <
572 : Disassembler::kDirectiveAbort,
573 : callback_directive_enum_is_not_sorted);
574 E : return std::max(d1, d2);
575 E : }
576 :
577 : // Determines if the given block has a data label in the given range of bytes.
578 : bool HasDataLabelInRange(const BlockGraph::Block* block,
579 : BlockGraph::Offset offset,
580 E : BlockGraph::Size size) {
581 : BlockGraph::Block::LabelMap::const_iterator it =
582 E : block->labels().lower_bound(offset);
583 : BlockGraph::Block::LabelMap::const_iterator end =
584 E : block->labels().lower_bound(offset + size);
585 :
586 E : for (; it != end; ++it) {
587 i : if (it->second.has_attributes(BlockGraph::DATA_LABEL))
588 i : return true;
589 i : }
590 :
591 E : return false;
592 E : }
593 :
594 : void ReportPotentialNonReturningFunction(
595 : const Decomposer::IntermediateReferenceMap& refs,
596 : const BlockGraph::AddressSpace& image,
597 : const BlockGraph::Block* block,
598 : BlockGraph::Offset call_ref_offset,
599 E : const char* reason) {
600 : typedef Decomposer::IntermediateReferenceMap::const_iterator RefIter;
601 :
602 : // Try and track down the block being pointed at by the call. If this is a
603 : // computed address there will be no reference.
604 E : RefIter ref_it = refs.find(block->addr() + call_ref_offset);
605 E : if (ref_it == refs.end()) {
606 i : LOG(WARNING) << "Suspected non-returning function call from offset "
607 : << call_ref_offset << " (followed by " << reason
608 : << ") of block \"" << block->name()
609 : << "\", but target can not be tracked down.";
610 i : return;
611 : }
612 :
613 E : BlockGraph::Block* target = image.GetBlockByAddress(ref_it->second.base);
614 E : DCHECK(target != NULL);
615 :
616 : // If this was marked as non-returning, then its not suspicious.
617 E : if ((target->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
618 E : return;
619 :
620 : // If the target is a code block then this is a direct call.
621 E : if (target->type() == BlockGraph::CODE_BLOCK) {
622 i : LOG(WARNING) << "Suspected non-returning call from offset "
623 : << call_ref_offset << " (followed by " << reason
624 : << ") of block \"" << block->name() << "\" to code block \""
625 : << target->name() << "\".";
626 i : return;
627 : }
628 : // Otherwise the target is a data block and this is a memory indirect call
629 : // to a thunk.
630 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, target->type());
631 :
632 : // Track down the import thunk.
633 E : RefIter thunk_ref_it = refs.find(ref_it->second.base);
634 E : DCHECK(thunk_ref_it != refs.end());
635 E : BlockGraph::Block* thunk = image.GetBlockByAddress(thunk_ref_it->second.base);
636 :
637 : // If this was marked as non-returning, then its not suspicious.
638 E : if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
639 E : return;
640 :
641 : // Otherwise, this is an import thunk. Get the module and symbol names.
642 i : LOG(WARNING) << "Suspected non-returning call from offset "
643 : << call_ref_offset << " (followed by " << reason
644 : << ") of block \"" << block->name() << "\" to import thunk \""
645 : << thunk->name() << "\".";
646 E : }
647 :
648 : void LookForNonReturningFunctions(
649 : const Decomposer::IntermediateReferenceMap& refs,
650 : const BlockGraph::AddressSpace& image,
651 : const BlockGraph::Block* block,
652 E : const Disassembler& disasm) {
653 E : bool saw_call = false;
654 E : bool saw_call_then_nop = false;
655 E : BlockGraph::Offset call_ref_offset = 0;
656 :
657 E : AbsoluteAddress end_of_last_inst;
658 : Disassembler::VisitedSpace::const_iterator inst_it =
659 E : disasm.visited().begin();
660 E : for (; inst_it != disasm.visited().end(); ++inst_it) {
661 : // Not contiguous with the last instruction? Then we're spanning a gap. If
662 : // it's an instruction then we didn't parse it; thus, we already know that
663 : // if the last instruction is a call it's to a non-returning function. So,
664 : // we only need to check for data.
665 E : if (inst_it->first.start() != end_of_last_inst) {
666 E : if (saw_call || saw_call_then_nop) {
667 E : BlockGraph::Offset offset = end_of_last_inst - disasm.code_addr();
668 E : BlockGraph::Size size = inst_it->first.start() - end_of_last_inst;
669 E : if (HasDataLabelInRange(block, offset, size))
670 : // We do not expect this to ever occur in cl.exe generated code.
671 : // However, it is entirely possible in hand-written assembly.
672 : ReportPotentialNonReturningFunction(
673 : refs, image, block, call_ref_offset,
674 i : saw_call ? "data" : "nop(s) and data");
675 : }
676 :
677 E : saw_call = false;
678 E : saw_call_then_nop = false;
679 : }
680 :
681 E : _DInst inst = { 0 };
682 E : BlockGraph::Offset offset = inst_it->first.start() - disasm.code_addr();
683 E : const uint8* code = disasm.code() + offset;
684 E : CHECK(core::DecodeOneInstruction(code, inst_it->first.size(), &inst));
685 :
686 : // Previous instruction was a call?
687 E : if (saw_call) {
688 E : if (core::IsNop(inst)) {
689 i : saw_call_then_nop = true;
690 E : } else if (core::IsDebugInterrupt(inst)) {
691 : ReportPotentialNonReturningFunction(
692 E : refs, image, block, call_ref_offset, "int3");
693 : }
694 E : saw_call = false;
695 E : } else if (saw_call_then_nop) {
696 : // The previous instructions we've seen have been a call followed by
697 : // arbitrary many nops. Look for another nop to continue the pattern.
698 i : saw_call_then_nop = core::IsNop(inst);
699 i : } else {
700 : // The previous instruction was not a call, so we're looking for one.
701 : // If this instruction is a call, remember that fact and also remember
702 : // the offset of its operand (the call target).
703 E : if (core::IsCall(inst)) {
704 E : saw_call = true;
705 : call_ref_offset = offset + inst_it->first.size() -
706 E : BlockGraph::Reference::kMaximumSize;
707 : }
708 : }
709 :
710 : // Remember the end of the last instruction we processed.
711 E : end_of_last_inst = inst_it->first.end();
712 E : }
713 :
714 : // If the last instruction was a call and we've marked that we've disassembled
715 : // past the end, then this is also a suspected non-returning function.
716 : if ((saw_call || saw_call_then_nop) &&
717 E : (block->attributes() & BlockGraph::DISASSEMBLED_PAST_END) != 0) {
718 i : const char* reason = saw_call ? "end of block" : "nop(s) and end of block";
719 : ReportPotentialNonReturningFunction(
720 i : refs, image, block, call_ref_offset, reason);
721 : }
722 E : }
723 :
724 E : bool CodeBlockHasAlignedJumpTables(const BlockGraph::Block* block) {
725 E : DCHECK(block != NULL);
726 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
727 :
728 : // Iterate over the labels of this block looking for jump tables.
729 E : bool has_jump_tables = false;
730 : BlockGraph::Block::LabelMap::const_iterator label_it =
731 E : block->labels().begin();
732 E : for (; label_it != block->labels().end(); ++label_it) {
733 E : if (!label_it->second.has_attributes(BlockGraph::JUMP_TABLE_LABEL))
734 E : continue;
735 :
736 E : has_jump_tables = true;
737 :
738 : // If the jump table is misaligned we can return false immediately.
739 E : if (label_it->first % kPointerSize != 0)
740 i : return false;
741 E : }
742 :
743 E : return has_jump_tables;
744 E : }
745 :
746 E : bool AlignCodeBlocksWithJumpTables(ImageLayout* image_layout) {
747 E : DCHECK(image_layout != NULL);
748 :
749 : BlockGraph::AddressSpace::RangeMapConstIter block_it =
750 E : image_layout->blocks.begin();
751 E : for (; block_it != image_layout->blocks.end(); ++block_it) {
752 E : BlockGraph::Block* block = block_it->second;
753 :
754 : // We only care about code blocks that are already aligned 0 mod 4 but
755 : // whose explicit alignment is currently less than that.
756 E : if (block->type() != BlockGraph::CODE_BLOCK)
757 E : continue;
758 E : if (block->alignment() >= kPointerSize)
759 i : continue;
760 E : if (block_it->first.start().value() % kPointerSize != 0)
761 E : continue;
762 :
763 : // Inspect them to see if they have aligned jump tables. If they do,
764 : // set the alignment of the block itself.
765 E : if (CodeBlockHasAlignedJumpTables(block_it->second))
766 E : block->set_alignment(kPointerSize);
767 E : }
768 :
769 E : return true;
770 E : }
771 :
772 : } // namespace
773 :
774 : Decomposer::Decomposer(const PEFile& image_file)
775 : : image_(NULL),
776 : image_file_(image_file),
777 : current_block_(NULL),
778 E : be_strict_with_current_block_(true) {
779 : // Register static initializer patterns that we know are always present.
780 : // CRT C/C++/etc initializers.
781 E : CHECK(RegisterStaticInitializerPatterns("(__x.*)_a", "(__x.*)_z"));
782 : // RTC (run-time checks) initializers (part of CRT).
783 E : CHECK(RegisterStaticInitializerPatterns("(__rtc_[it])aa", "(__rtc_[it])zz"));
784 : // ATL object map initializers.
785 : CHECK(RegisterStaticInitializerPatterns("(__pobjMapEntry)First",
786 E : "(__pobjMapEntry)Last"));
787 : // Thread-local storage template.
788 E : CHECK(RegisterStaticInitializerPatterns("(_tls_)start", "(_tls_)end"));
789 :
790 : // Register non-returning functions that for some reason the symbols lie to
791 : // us about.
792 E : CHECK(RegisterNonReturningFunction("_CxxThrowException"));
793 E : CHECK(RegisterNonReturningFunction("_longjmp"));
794 :
795 : // Register non-returning imports that we know about.
796 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitProcess"));
797 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitThread"));
798 E : }
799 :
800 E : bool Decomposer::Decompose(ImageLayout* image_layout) {
801 : // We start by finding the PDB path.
802 E : if (!FindAndValidatePdbPath())
803 E : return false;
804 E : DCHECK(!pdb_path_.empty());
805 :
806 : // Check if the block-graph has already been serialized into the PDB and load
807 : // it from here in this case. This allows round-trip decomposition.
808 E : bool stream_exists = false;
809 : if (LoadBlockGraphFromPdb(pdb_path_, image_file_, image_layout,
810 E : &stream_exists)) {
811 E : return true;
812 i : } else {
813 : // If the stream exists but hasn't been loaded we return an error. At this
814 : // point an error message has already been logged if there was one.
815 E : if (stream_exists)
816 i : return false;
817 : }
818 :
819 : // Move on to instantiating and initializing our Debug Interface Access
820 : // session.
821 E : ScopedComPtr<IDiaDataSource> dia_source;
822 E : if (!CreateDiaSource(dia_source.Receive()))
823 i : return false;
824 :
825 : // We create the session using the PDB file directly, as we've already
826 : // validated that it matches the module.
827 E : ScopedComPtr<IDiaSession> dia_session;
828 : if (!CreateDiaSession(pdb_path_,
829 : dia_source.get(),
830 E : dia_session.Receive())) {
831 i : return false;
832 : }
833 :
834 : HRESULT hr = dia_session->put_loadAddress(
835 E : image_file_.nt_headers()->OptionalHeader.ImageBase);
836 E : if (hr != S_OK) {
837 i : LOG(ERROR) << "Failed to set the DIA load address: "
838 : << com::LogHr(hr) << ".";
839 i : return false;
840 : }
841 :
842 E : ScopedComPtr<IDiaSymbol> global;
843 E : hr = dia_session->get_globalScope(global.Receive());
844 E : if (hr != S_OK) {
845 i : LOG(ERROR) << "Failed to get the DIA global scope: "
846 : << com::LogHr(hr) << ".";
847 i : return false;
848 : }
849 :
850 E : image_ = &image_layout->blocks;
851 :
852 : // Create the sections for the image.
853 E : bool success = CreateSections();
854 :
855 : // Load FIXUP information from the PDB file. We do this early on so that we
856 : // can do accounting with references that are created later on.
857 E : if (success)
858 E : success = LoadDebugStreams(dia_session);
859 :
860 : // Create intermediate references for each fixup entry.
861 E : if (success)
862 E : success = CreateReferencesFromFixups();
863 :
864 : // Chunk out important PE image structures, like the headers and such.
865 E : PEFileParser::PEHeader header;
866 E : if (success)
867 E : success = CreatePEImageBlocksAndReferences(&header);
868 :
869 : // Parse and validate the relocation entries.
870 E : if (success)
871 E : success = ParseRelocs();
872 :
873 : // Our first round of parsing is using section contributions. This creates
874 : // both code and data blocks.
875 E : if (success)
876 E : success = CreateBlocksFromSectionContribs(dia_session);
877 :
878 : // Process the function and thunk symbols in the image. This does not create
879 : // any blocks, as all functions are covered by section contributions.
880 E : if (success)
881 E : success = ProcessCodeSymbols(global);
882 :
883 : // Process data symbols. This can cause the creation of some blocks as the
884 : // data sections are not fully covered by section contributions.
885 E : if (success)
886 E : success = ProcessDataSymbols(global);
887 :
888 : // Create labels in code blocks.
889 E : if (success)
890 E : success = CreateGlobalLabels(global);
891 :
892 : // Create gap blocks. This ensures that we have complete coverage of the
893 : // entire image.
894 E : if (success)
895 E : success = CreateGapBlocks();
896 :
897 : // Parse public symbols, augmenting code and data labels where possible.
898 : // Some public symbols land on gap blocks, so they need to have been parsed
899 : // already.
900 E : if (success)
901 E : success = ProcessPublicSymbols(global);
902 :
903 : // Parse initialization bracketing symbols. This needs to happen after
904 : // PublicSymbols have been parsed.
905 E : if (success)
906 E : success = ProcessStaticInitializers();
907 :
908 : // We know that some data blocks need to have alignment precisely preserved.
909 : // For now, we very conservatively (guaranteed to be correct, but causes many
910 : // blocks to be aligned that don't strictly need alignment) guess alignment
911 : // for each block. This must be run after static initializers have been
912 : // parsed.
913 E : if (success)
914 E : success = GuessDataBlockAlignments();
915 :
916 : // Disassemble code blocks and create PC-relative references
917 E : if (success)
918 E : success = CreateCodeReferences();
919 :
920 : // Turn the address->address format references we've created into
921 : // block->block references on the blocks in the image.
922 E : if (success)
923 E : success = FinalizeIntermediateReferences();
924 :
925 : // Everything called after this points requires the references to have been
926 : // finalized.
927 :
928 : // One way of ensuring full coverage is to check that all of the fixups
929 : // were visited during decomposition.
930 E : if (success)
931 E : success = ConfirmFixupsVisited();
932 :
933 : // Now, find and label any padding blocks.
934 E : if (success)
935 E : success = FindPaddingBlocks();
936 :
937 : // Copy the image headers over to the layout.
938 E : if (success)
939 E : success = CopyHeaderToImageLayout(header.nt_headers, image_layout);
940 :
941 : // Set the alignment on code blocks with jump tables. This ensures that the
942 : // jump tables remain aligned post-transform.
943 E : if (success)
944 E : success = AlignCodeBlocksWithJumpTables(image_layout);
945 :
946 E : image_ = NULL;
947 :
948 E : return success;
949 E : }
950 :
951 E : bool Decomposer::FindAndValidatePdbPath() {
952 : // Manually find the PDB path if it is not specified.
953 E : if (pdb_path_.empty()) {
954 : if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
955 E : pdb_path_.empty()) {
956 i : LOG(ERROR) << "Unable to find PDB file for module: "
957 : << image_file_.path().value();
958 i : return false;
959 : }
960 : }
961 E : DCHECK(!pdb_path_.empty());
962 :
963 E : if (!file_util::PathExists(pdb_path_)) {
964 E : LOG(ERROR) << "Path not found: " << pdb_path_.value();
965 E : return false;
966 : }
967 :
968 : // Get the PDB info from the PDB file.
969 : pdb::PdbInfoHeader70 pdb_info_header;
970 E : if (!pdb::ReadPdbHeader(pdb_path_, &pdb_info_header)) {
971 i : LOG(ERROR) << "Unable to read PDB info header from PDB file: "
972 : << pdb_path_.value();
973 i : return false;
974 : }
975 :
976 : // Get the PDB info from the module.
977 E : PdbInfo pdb_info;
978 E : if (!pdb_info.Init(image_file_)) {
979 i : LOG(ERROR) << "Unable to read PDB info from PE file: "
980 : << image_file_.path().value();
981 i : return false;
982 : }
983 :
984 : // Ensure that they are consistent.
985 E : if (!pdb_info.IsConsistent(pdb_info_header)) {
986 i : LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
987 : << "module \"" << image_file_.path().value() << "\".";
988 i : return false;
989 : }
990 :
991 E : return true;
992 E : }
993 :
994 E : bool Decomposer::ProcessCodeSymbols(IDiaSymbol* global) {
995 E : if (!ProcessFunctionSymbols(global))
996 i : return false;
997 E : if (!ProcessThunkSymbols(global))
998 i : return false;
999 :
1000 E : return true;
1001 E : }
1002 :
1003 E : bool Decomposer::ProcessFunctionSymbols(IDiaSymbol* global) {
1004 E : DCHECK(IsSymTag(global, SymTagExe));
1005 :
1006 : // Otherwise enumerate its offspring.
1007 E : ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1008 : HRESULT hr = global->findChildren(SymTagFunction,
1009 : NULL,
1010 : nsNone,
1011 E : dia_enum_symbols.Receive());
1012 E : if (hr != S_OK) {
1013 i : LOG(ERROR) << "Failed to get the DIA function enumerator: "
1014 : << com::LogHr(hr) << ".";
1015 i : return false;
1016 : }
1017 :
1018 E : LONG count = 0;
1019 E : if (dia_enum_symbols->get_Count(&count) != S_OK) {
1020 i : LOG(ERROR) << "Failed to get function enumeration length.";
1021 i : return false;
1022 : }
1023 :
1024 E : for (LONG visited = 0; visited < count; ++visited) {
1025 E : ScopedComPtr<IDiaSymbol> function;
1026 E : ULONG fetched = 0;
1027 E : hr = dia_enum_symbols->Next(1, function.Receive(), &fetched);
1028 E : if (hr != S_OK) {
1029 i : LOG(ERROR) << "Failed to enumerate functions: " << com::LogHr(hr) << ".";
1030 i : return false;
1031 : }
1032 E : if (fetched == 0)
1033 i : break;
1034 :
1035 : // Create the block representing the function.
1036 E : DCHECK(IsSymTag(function, SymTagFunction));
1037 E : if (!ProcessFunctionOrThunkSymbol(function))
1038 i : return false;
1039 E : }
1040 :
1041 E : return true;
1042 E : }
1043 :
1044 E : bool Decomposer::ProcessFunctionOrThunkSymbol(IDiaSymbol* function) {
1045 E : DCHECK(IsSymTag(function, SymTagFunction) || IsSymTag(function, SymTagThunk));
1046 :
1047 E : DWORD location_type = LocIsNull;
1048 E : HRESULT hr = E_FAIL;
1049 E : if (FAILED(hr = function->get_locationType(&location_type))) {
1050 i : LOG(ERROR) << "Failed to retrieve function address type: "
1051 : << com::LogHr(hr) << ".";
1052 i : return false;
1053 : }
1054 E : if (location_type != LocIsStatic) {
1055 i : DCHECK_EQ(static_cast<DWORD>(LocIsNull), location_type);
1056 i : return true;
1057 : }
1058 :
1059 E : DWORD rva = 0;
1060 E : ULONGLONG length = 0;
1061 E : ScopedBstr name;
1062 : if ((hr = function->get_relativeVirtualAddress(&rva)) != S_OK ||
1063 : (hr = function->get_length(&length)) != S_OK ||
1064 E : (hr = function->get_name(name.Receive())) != S_OK) {
1065 i : LOG(ERROR) << "Failed to retrieve function information: "
1066 : << com::LogHr(hr) << ".";
1067 i : return false;
1068 : }
1069 :
1070 : // Certain properties are not defined on all blocks, so the following calls
1071 : // may return S_FALSE.
1072 E : BOOL no_return = FALSE;
1073 E : if (function->get_noReturn(&no_return) != S_OK)
1074 E : no_return = FALSE;
1075 :
1076 E : BOOL has_inl_asm = FALSE;
1077 E : if (function->get_hasInlAsm(&has_inl_asm) != S_OK)
1078 E : has_inl_asm = FALSE;
1079 :
1080 E : BOOL has_eh = FALSE;
1081 E : if (function->get_hasEH(&has_eh) != S_OK)
1082 E : has_eh = FALSE;
1083 :
1084 E : BOOL has_seh = FALSE;
1085 E : if (function->get_hasSEH(&has_seh) != S_OK)
1086 E : has_seh = FALSE;
1087 :
1088 E : std::string block_name;
1089 E : if (!WideToUTF8(name, name.Length(), &block_name)) {
1090 i : LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1091 i : return false;
1092 : }
1093 :
1094 : // Find the block to which this symbol maps, and ensure it fully covers the
1095 : // symbol.
1096 E : RelativeAddress block_addr(rva);
1097 E : BlockGraph::Block* block = image_->GetBlockByAddress(block_addr);
1098 E : if (block == NULL) {
1099 i : LOG(ERROR) << "No block found for function/thunk symbol \""
1100 : << block_name << "\".";
1101 i : return false;
1102 : }
1103 E : if (block->addr() + block->size() < block_addr + length) {
1104 i : LOG(ERROR) << "Section contribution \"" << block->name() << "\" does not "
1105 : << "fully cover function/thunk symbol \"" << block_name << "\".";
1106 i : return false;
1107 : }
1108 :
1109 : // Annotate the block with a label, as this is an entry point to it. This is
1110 : // the routine that adds labels, so there should never be any collisions.
1111 E : CHECK(AddLabelToBlock(block_addr, block_name, BlockGraph::CODE_LABEL, block));
1112 :
1113 : // If we didn't get an explicit no-return flag from the symbols check our
1114 : // list of exceptions.
1115 E : if (no_return == FALSE && non_returning_functions_.count(block->name()) > 0) {
1116 E : VLOG(1) << "Forcing non-returning attribute on function \""
1117 : << block->name() << "\".";
1118 E : no_return = TRUE;
1119 : }
1120 :
1121 : // Set the block attributes.
1122 E : if (no_return == TRUE)
1123 E : block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1124 E : if (has_inl_asm == TRUE)
1125 E : block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1126 E : if (has_eh || has_seh)
1127 E : block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1128 E : if (IsSymTag(function, SymTagThunk))
1129 E : block->set_attribute(BlockGraph::THUNK);
1130 :
1131 E : if (!CreateLabelsForFunction(function, block)) {
1132 i : LOG(ERROR) << "Failed to create labels for '" << block->name() << "'.";
1133 i : return false;
1134 : }
1135 :
1136 E : return true;
1137 E : }
1138 :
1139 : bool Decomposer::CreateLabelsForFunction(IDiaSymbol* function,
1140 E : BlockGraph::Block* block) {
1141 E : DCHECK(function != NULL);
1142 E : DCHECK(block != NULL);
1143 :
1144 : // Lookup the block address.
1145 E : RelativeAddress block_addr;
1146 E : if (!image_->GetAddressOf(block, &block_addr)) {
1147 i : NOTREACHED() << "Block " << block->name() << " has no address.";
1148 i : return false;
1149 : }
1150 :
1151 : // Enumerate all symbols which are children of function.
1152 E : ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1153 : HRESULT hr = function->findChildren(SymTagNull,
1154 : NULL,
1155 : nsNone,
1156 E : dia_enum_symbols.Receive());
1157 E : if (FAILED(hr)) {
1158 i : LOG(ERROR) << "Failed to get the DIA label enumerator: "
1159 : << com::LogHr(hr) << ".";
1160 i : return false;
1161 : }
1162 :
1163 E : while (true) {
1164 E : ScopedComPtr<IDiaSymbol> symbol;
1165 E : ULONG fetched = 0;
1166 E : hr = dia_enum_symbols->Next(1, symbol.Receive(), &fetched);
1167 E : if (FAILED(hr)) {
1168 i : LOG(ERROR) << "Failed to enumerate the DIA symbol: "
1169 : << com::LogHr(hr) << ".";
1170 i : return false;
1171 : }
1172 E : if (hr != S_OK || fetched == 0)
1173 E : break;
1174 :
1175 : // If it doesn't have an RVA then it's not interesting to us.
1176 E : DWORD temp_rva = 0;
1177 E : if (symbol->get_relativeVirtualAddress(&temp_rva) != S_OK)
1178 E : continue;
1179 :
1180 : // Get the type of symbol we're looking at.
1181 E : DWORD temp_sym_tag = 0;
1182 E : if (symbol->get_symTag(&temp_sym_tag) != S_OK) {
1183 i : LOG(ERROR) << "Failed to retrieve label information.";
1184 i : return false;
1185 : }
1186 :
1187 E : enum SymTagEnum sym_tag = static_cast<enum SymTagEnum>(temp_sym_tag);
1188 E : BlockGraph::LabelAttributes label_attr = SymTagToLabelAttributes(sym_tag);
1189 :
1190 : // TODO(rogerm): Add a flag to include/exclude the symbol types that are
1191 : // interesting for debugging purposes, but not actually needed for
1192 : // decomposition: FuncDebugStart/End, Block, etc.
1193 :
1194 : // We ignore labels that fall outside of the code block. We sometimes
1195 : // get labels at the end of a code block, and if the binary has any OMAP
1196 : // information these follow the original successor block, and they can
1197 : // end up most anywhere in the binary.
1198 E : RelativeAddress label_rva(temp_rva);
1199 E : if (label_rva < block_addr || label_rva >= block_addr + block->size())
1200 E : continue;
1201 :
1202 : // Extract the symbol's name.
1203 E : std::string label_name;
1204 : {
1205 E : ScopedBstr temp_name;
1206 : if (symbol->get_name(temp_name.Receive()) == S_OK &&
1207 E : !WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1208 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1209 i : return false;
1210 : }
1211 E : }
1212 :
1213 : // Not all symbols have a name, if we've found one without a name, make
1214 : // one up.
1215 E : BlockGraph::Offset offset = label_rva - block_addr;
1216 E : if (label_name.empty()) {
1217 E : switch (sym_tag) {
1218 : case SymTagFuncDebugStart: {
1219 E : label_name = "<debug-start>";
1220 E : break;
1221 : }
1222 :
1223 : case SymTagFuncDebugEnd: {
1224 E : label_name = "<debug-end>";
1225 E : break;
1226 : }
1227 :
1228 : case SymTagData: {
1229 E : if (reloc_set_.count(label_rva)) {
1230 E : label_name = base::StringPrintf("<jump-table-%d>", offset);
1231 E : label_attr |= BlockGraph::JUMP_TABLE_LABEL;
1232 E : } else {
1233 E : label_name = base::StringPrintf("<case-table-%d>", offset);
1234 E : label_attr |= BlockGraph::CASE_TABLE_LABEL;
1235 : }
1236 E : break;
1237 : }
1238 :
1239 : case SymTagBlock: {
1240 E : label_name = "<scope-start>";
1241 E : break;
1242 : }
1243 :
1244 : // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
1245 : case SymTagCallSite: {
1246 E : label_name = "<call-site>";
1247 E : break;
1248 : }
1249 :
1250 : default: {
1251 i : LOG(WARNING) << "Unexpected symbol type " << sym_tag << " in "
1252 : << block->name() << " at "
1253 : << base::StringPrintf("0x%08X.", label_rva.value());
1254 i : label_name = base::StringPrintf("<anonymous-%d>", sym_tag);
1255 : }
1256 : }
1257 : }
1258 :
1259 : // We expect that we'll never see a code label that refers to a reloc.
1260 : // This happens sometimes, however, as we generally get a code label for
1261 : // the first byte after a switch statement. This can sometimes land on the
1262 : // following jump table.
1263 E : if ((label_attr & BlockGraph::CODE_LABEL) && reloc_set_.count(label_rva)) {
1264 E : VLOG(1) << "Collision between reloc and code label in "
1265 : << block->name() << " at " << label_name
1266 : << base::StringPrintf(" (0x%08X).", label_rva.value())
1267 : << " Falling back to data label.";
1268 E : label_attr = BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL;
1269 E : DCHECK_EQ(block_addr, block->addr());
1270 E : BlockGraph::Label label;
1271 : if (block->GetLabel(offset, &label) &&
1272 E : !label.has_attributes(BlockGraph::DATA_LABEL)) {
1273 i : VLOG(1) << block->name() << ": Replacing label " << label.name()
1274 : << " ("
1275 : << BlockGraph::LabelAttributesToString(label.attributes())
1276 : << ") at offset " << offset << ".";
1277 i : block->RemoveLabel(offset);
1278 : }
1279 E : }
1280 :
1281 : // Add the label to the block.
1282 E : if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1283 i : LOG(ERROR) << "Failed to add label to code block.";
1284 i : return false;
1285 : }
1286 :
1287 : // Is this a scope? Then it also has a length. Use it to create the matching
1288 : // scope end.
1289 E : if (sym_tag == SymTagBlock) {
1290 E : ULONGLONG length = 0;
1291 E : if (symbol->get_length(&length) != S_OK) {
1292 i : LOG(ERROR) << "Failed to extract code scope length for "
1293 : << block->name();
1294 i : return false;
1295 : }
1296 E : label_rva += length;
1297 E : label_name = "<scope-end>";
1298 E : label_attr = BlockGraph::SCOPE_END_LABEL;
1299 E : if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1300 i : LOG(ERROR) << "Failed to add label to code block.";
1301 i : return false;
1302 : }
1303 : }
1304 E : }
1305 :
1306 E : return true;
1307 E : }
1308 :
1309 E : bool Decomposer::ProcessThunkSymbols(IDiaSymbol* globals) {
1310 E : ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1311 : HRESULT hr = globals->findChildren(SymTagCompiland,
1312 : NULL,
1313 : nsNone,
1314 E : enum_compilands.Receive());
1315 E : if (FAILED(hr)) {
1316 i : LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1317 : << com::LogHr(hr) << ".";
1318 i : return false;
1319 : }
1320 :
1321 E : while (true) {
1322 E : ScopedComPtr<IDiaSymbol> compiland;
1323 E : ULONG fetched = 0;
1324 E : hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1325 E : if (FAILED(hr)) {
1326 i : LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1327 : << com::LogHr(hr) << ".";
1328 i : return false;
1329 : }
1330 E : if (hr != S_OK || fetched == 0)
1331 E : break;
1332 :
1333 E : ScopedComPtr<IDiaEnumSymbols> enum_thunks;
1334 : hr = compiland->findChildren(SymTagThunk,
1335 : NULL,
1336 : nsNone,
1337 E : enum_thunks.Receive());
1338 E : if (FAILED(hr)) {
1339 i : LOG(ERROR) << "Failed to retrieve thunk enumerator: "
1340 : << com::LogHr(hr) << ".";
1341 i : return false;
1342 : }
1343 :
1344 E : while (true) {
1345 E : ScopedComPtr<IDiaSymbol> thunk;
1346 E : hr = enum_thunks->Next(1, thunk.Receive(), &fetched);
1347 E : if (FAILED(hr)) {
1348 i : LOG(ERROR) << "Failed to enumerate thunk enumerator: "
1349 : << com::LogHr(hr) << ".";
1350 i : return false;
1351 : }
1352 E : if (hr != S_OK || fetched == 0)
1353 E : break;
1354 :
1355 E : DCHECK(IsSymTag(thunk, SymTagThunk));
1356 :
1357 E : if (!ProcessFunctionOrThunkSymbol(thunk))
1358 i : return false;
1359 E : }
1360 E : }
1361 :
1362 E : return true;
1363 E : }
1364 :
1365 E : bool Decomposer::CreateGlobalLabels(IDiaSymbol* globals) {
1366 E : ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1367 : HRESULT hr = globals->findChildren(SymTagCompiland,
1368 : NULL,
1369 : nsNone,
1370 E : enum_compilands.Receive());
1371 E : if (FAILED(hr)) {
1372 i : LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1373 : << com::LogHr(hr) << ".";
1374 i : return false;
1375 : }
1376 :
1377 E : while (true) {
1378 E : ScopedComPtr<IDiaSymbol> compiland;
1379 E : ULONG fetched = 0;
1380 E : hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1381 E : if (FAILED(hr)) {
1382 i : LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1383 : << com::LogHr(hr) << ".";
1384 i : return false;
1385 : }
1386 E : if (hr != S_OK || fetched == 0)
1387 E : break;
1388 :
1389 E : ScopedComPtr<IDiaEnumSymbols> enum_labels;
1390 : hr = compiland->findChildren(SymTagLabel,
1391 : NULL,
1392 : nsNone,
1393 E : enum_labels.Receive());
1394 E : if (FAILED(hr)) {
1395 i : LOG(ERROR) << "Failed to retrieve label enumerator: "
1396 : << com::LogHr(hr) << ".";
1397 i : return false;
1398 : }
1399 :
1400 E : while (true) {
1401 E : ScopedComPtr<IDiaSymbol> label;
1402 E : hr = enum_labels->Next(1, label.Receive(), &fetched);
1403 E : if (FAILED(hr)) {
1404 i : LOG(ERROR) << "Failed to enumerate label enumerator: "
1405 : << com::LogHr(hr) << ".";
1406 i : return false;
1407 : }
1408 E : if (hr != S_OK || fetched == 0)
1409 E : break;
1410 :
1411 E : DCHECK(IsSymTag(label, SymTagLabel));
1412 :
1413 E : DWORD addr = 0;
1414 E : ScopedBstr temp_name;
1415 : if (label->get_relativeVirtualAddress(&addr) != S_OK ||
1416 E : label->get_name(temp_name.Receive()) != S_OK) {
1417 i : LOG(ERROR) << "Failed to retrieve label address or name.";
1418 i : return false;
1419 : }
1420 :
1421 E : std::string label_name;
1422 E : if (!WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1423 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1424 i : return false;
1425 : }
1426 :
1427 E : RelativeAddress label_addr(addr);
1428 E : BlockGraph::Block* block = image_->GetBlockByAddress(label_addr);
1429 E : if (block == NULL) {
1430 i : LOG(ERROR) << "No block for label " << label_name << " at " << addr;
1431 i : return false;
1432 : }
1433 :
1434 : if (!AddLabelToBlock(label_addr,
1435 : label_name,
1436 : BlockGraph::CODE_LABEL,
1437 E : block)) {
1438 i : LOG(ERROR) << "Failed to add label to code block.";
1439 i : return false;
1440 : }
1441 E : }
1442 E : }
1443 :
1444 E : return true;
1445 E : }
1446 :
1447 : bool Decomposer::CreateGapBlock(BlockGraph::BlockType block_type,
1448 : RelativeAddress address,
1449 E : BlockGraph::Size size) {
1450 : BlockGraph::Block* block = FindOrCreateBlock(block_type, address, size,
1451 : base::StringPrintf("Gap Block 0x%08X", address.value()).c_str(),
1452 E : kExpectNoBlock);
1453 E : if (block == NULL) {
1454 i : LOG(ERROR) << "Unable to create gap block.";
1455 i : return false;
1456 : }
1457 E : block->set_attribute(BlockGraph::GAP_BLOCK);
1458 :
1459 E : return true;
1460 E : }
1461 :
1462 : bool Decomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
1463 E : BlockGraph::BlockType block_type) {
1464 E : RelativeAddress section_begin(header->VirtualAddress);
1465 E : RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
1466 : RelativeAddress image_end(
1467 E : image_file_.nt_headers()->OptionalHeader.SizeOfImage);
1468 :
1469 : // Search for the first and last blocks interesting from the start and end
1470 : // of the section to the end of the image.
1471 : BlockGraph::AddressSpace::RangeMap::const_iterator it(
1472 : image_->address_space_impl().FindFirstIntersection(
1473 : BlockGraph::AddressSpace::Range(section_begin,
1474 E : image_end - section_begin)));
1475 :
1476 : BlockGraph::AddressSpace::RangeMap::const_iterator end =
1477 E : image_->address_space_impl().end();
1478 E : if (section_end < image_end) {
1479 : end = image_->address_space_impl().FindFirstIntersection(
1480 : BlockGraph::AddressSpace::Range(section_end,
1481 E : image_end - section_end));
1482 : }
1483 :
1484 : // The whole section is missing. Cover it with one gap block.
1485 E : if (it == end)
1486 : return CreateGapBlock(
1487 i : block_type, section_begin, section_end - section_begin);
1488 :
1489 : // Create the head gap block if need be.
1490 E : if (section_begin < it->first.start())
1491 : if (!CreateGapBlock(
1492 i : block_type, section_begin, it->first.start() - section_begin))
1493 i : return false;
1494 :
1495 : // Now iterate the blocks and fill in gaps.
1496 E : for (; it != end; ++it) {
1497 E : const BlockGraph::Block* block = it->second;
1498 E : DCHECK(block != NULL);
1499 E : RelativeAddress block_end = it->first.start() + block->size();
1500 E : if (block_end >= section_end)
1501 E : break;
1502 :
1503 : // Walk to the next address in turn.
1504 E : BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
1505 E : ++next;
1506 E : if (next == end) {
1507 : // We're at the end of the list. Create the tail gap block.
1508 E : DCHECK_GT(section_end, block_end);
1509 E : if (!CreateGapBlock(block_type, block_end, section_end - block_end))
1510 i : return false;
1511 E : break;
1512 : }
1513 :
1514 : // Create the interstitial gap block.
1515 E : if (block_end < next->first.start())
1516 : if (!CreateGapBlock(
1517 E : block_type, block_end, next->first.start() - block_end))
1518 i : return false;
1519 E : }
1520 :
1521 E : return true;
1522 E : }
1523 :
1524 E : bool Decomposer::CreateGapBlocks() {
1525 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1526 :
1527 : // Iterate through all the image sections.
1528 E : for (size_t i = 0; i < num_sections; ++i) {
1529 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1530 E : DCHECK(header != NULL);
1531 :
1532 E : BlockGraph::BlockType type = BlockGraph::CODE_BLOCK;
1533 E : const char* section_type = NULL;
1534 E : switch (GetSectionType(header)) {
1535 : case kSectionCode:
1536 E : type = BlockGraph::CODE_BLOCK;
1537 E : section_type = "code";
1538 E : break;
1539 :
1540 : case kSectionData:
1541 E : type = BlockGraph::DATA_BLOCK;
1542 E : section_type = "data";
1543 E : break;
1544 :
1545 : default:
1546 i : continue;
1547 : }
1548 :
1549 E : if (!CreateSectionGapBlocks(header, type)) {
1550 i : LOG(ERROR) << "Unable to create gap blocks for " << section_type
1551 : << " section \"" << header->Name << "\".";
1552 i : return false;
1553 : }
1554 E : }
1555 :
1556 E : return true;
1557 E : }
1558 :
1559 : bool Decomposer::AddReferenceCallback(RelativeAddress src_addr,
1560 : BlockGraph::ReferenceType type,
1561 : BlockGraph::Size size,
1562 E : RelativeAddress dst_addr) {
1563 : // This is only called by the PEFileParser, and it creates some references
1564 : // for which there are no corresponding fixup entries.
1565 : return ValidateOrAddReference(FIXUP_MAY_EXIST, src_addr, type, size, dst_addr,
1566 E : 0, &fixup_map_, &references_);
1567 E : }
1568 :
1569 E : bool Decomposer::ParseRelocs() {
1570 E : if (!image_file_.DecodeRelocs(&reloc_set_)) {
1571 i : LOG(ERROR) << "Unable to decode image relocs.";
1572 i : return false;
1573 : }
1574 :
1575 E : PEFile::RelocMap reloc_map;
1576 E : if (!image_file_.ReadRelocs(reloc_set_, &reloc_map)) {
1577 i : LOG(ERROR) << "Unable to read image relocs.";
1578 i : return false;
1579 : }
1580 :
1581 : // Validate each relocation entry against the corresponding fixup entry.
1582 E : if (!ValidateRelocs(reloc_map))
1583 i : return false;
1584 :
1585 E : return true;
1586 E : }
1587 :
1588 E : bool Decomposer::CreateReferencesFromFixups() {
1589 E : FixupMap::const_iterator it(fixup_map_.begin());
1590 E : for (; it != fixup_map_.end(); ++it) {
1591 E : RelativeAddress src_addr(it->second.location);
1592 E : uint32 data = 0;
1593 E : if (!image_file_.ReadImage(src_addr, &data, sizeof(data))) {
1594 i : LOG(ERROR) << "Unable to read image data for fixup with source at "
1595 : << src_addr;
1596 i : return false;
1597 : }
1598 :
1599 E : RelativeAddress dst_base(it->second.base);
1600 E : BlockGraph::Offset dst_offset = 0;
1601 E : switch (it->second.type) {
1602 : case BlockGraph::PC_RELATIVE_REF: {
1603 E : dst_offset = src_addr + kPointerSize + data - dst_base;
1604 E : break;
1605 : }
1606 :
1607 : case BlockGraph::ABSOLUTE_REF: {
1608 E : dst_offset = image_file_.AbsToRelDisplacement(data) - dst_base.value();
1609 E : break;
1610 : }
1611 :
1612 : case BlockGraph::RELATIVE_REF: {
1613 E : dst_offset = data - dst_base.value();
1614 E : break;
1615 : }
1616 :
1617 : default: {
1618 i : NOTREACHED() << "Invalid reference type.";
1619 i : return false;
1620 : }
1621 : }
1622 :
1623 : if (!AddReference(src_addr, it->second.type, kPointerSize, dst_base,
1624 E : dst_offset, &references_)) {
1625 i : return false;
1626 : }
1627 E : }
1628 :
1629 E : return true;
1630 E : }
1631 :
1632 E : bool Decomposer::ValidateRelocs(const PEFile::RelocMap& reloc_map) {
1633 E : PEFile::RelocMap::const_iterator it(reloc_map.begin());
1634 E : PEFile::RelocMap::const_iterator end(reloc_map.end());
1635 E : for (; it != end; ++it) {
1636 E : RelativeAddress src(it->first);
1637 E : RelativeAddress dummy;
1638 :
1639 : if (!ValidateOrAddReference(
1640 : FIXUP_MUST_EXIST, src, BlockGraph::ABSOLUTE_REF,
1641 E : sizeof(dummy), dummy, 0, &fixup_map_, &references_)) {
1642 i : return false;
1643 : }
1644 E : }
1645 :
1646 E : return true;
1647 E : }
1648 :
1649 E : bool Decomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1650 E : ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1651 : SearchResult search_result = FindDiaTable(session,
1652 E : section_contribs.Receive());
1653 E : if (search_result != kSearchSucceeded) {
1654 i : if (search_result == kSearchFailed)
1655 i : LOG(ERROR) << "No section contribution table found.";
1656 i : return false;
1657 : }
1658 :
1659 E : size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1660 :
1661 E : LONG count = 0;
1662 E : if (section_contribs->get_Count(&count) != S_OK) {
1663 i : LOG(ERROR) << "Failed to get section contributions enumeration length.";
1664 i : return false;
1665 : }
1666 :
1667 E : for (LONG visited = 0; visited < count; ++visited) {
1668 E : ScopedComPtr<IDiaSectionContrib> section_contrib;
1669 E : ULONG fetched = 0;
1670 E : HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1671 E : if (hr != S_OK) {
1672 i : LOG(ERROR) << "Failed to get DIA section contribution: "
1673 : << com::LogHr(hr) << ".";
1674 i : return false;
1675 : }
1676 E : if (fetched == 0)
1677 i : break;
1678 :
1679 E : hr = E_FAIL;
1680 E : DWORD rva = 0;
1681 E : DWORD length = 0;
1682 E : DWORD section_id = 0;
1683 E : BOOL code = FALSE;
1684 E : ScopedComPtr<IDiaSymbol> compiland;
1685 E : ScopedBstr bstr_name;
1686 : if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1687 : (hr = section_contrib->get_length(&length)) != S_OK ||
1688 : (hr = section_contrib->get_addressSection(§ion_id)) != S_OK ||
1689 : (hr = section_contrib->get_code(&code)) != S_OK ||
1690 : (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1691 E : (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1692 i : LOG(ERROR) << "Failed to get section contribution properties: "
1693 : << com::LogHr(hr) << ".";
1694 i : return false;
1695 : }
1696 :
1697 : // Determine if this function was built by a supported compiler.
1698 : bool is_built_by_supported_compiler =
1699 E : IsBuiltBySupportedCompiler(compiland.get());
1700 :
1701 : // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1702 E : DCHECK_LT(0u, section_id);
1703 E : --section_id;
1704 :
1705 : // We don't parse the resource section, as it is parsed by the PEFileParser.
1706 E : if (section_id == rsrc_id)
1707 E : continue;
1708 :
1709 E : std::string name;
1710 E : if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1711 i : LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1712 i : return false;
1713 : }
1714 :
1715 : // Create the block.
1716 : BlockGraph::BlockType block_type =
1717 E : code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1718 : BlockGraph::Block* block = FindOrCreateBlock(block_type,
1719 : RelativeAddress(rva),
1720 : length,
1721 : name.c_str(),
1722 E : kExpectNoBlock);
1723 E : if (block == NULL) {
1724 i : LOG(ERROR) << "Unable to create block.";
1725 i : return false;
1726 : }
1727 :
1728 : // Set the block attributes.
1729 E : block->set_attribute(BlockGraph::SECTION_CONTRIB);
1730 E : if (!is_built_by_supported_compiler)
1731 E : block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1732 E : }
1733 :
1734 E : return true;
1735 E : }
1736 :
1737 : DiaBrowser::BrowserDirective Decomposer::OnDataSymbol(
1738 : const DiaBrowser& dia_browser,
1739 : const DiaBrowser::SymTagVector& sym_tags,
1740 E : const DiaBrowser::SymbolPtrVector& symbols) {
1741 E : DCHECK_LT(0u, sym_tags.size());
1742 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1743 E : DCHECK_EQ(SymTagData, sym_tags.back());
1744 :
1745 E : const DiaBrowser::SymbolPtr& data(symbols.back());
1746 :
1747 E : HRESULT hr = E_FAIL;
1748 E : DWORD location_type = LocIsNull;
1749 E : DWORD rva = 0;
1750 E : ScopedBstr name_bstr;
1751 : if (FAILED(hr = data->get_locationType(&location_type)) ||
1752 : FAILED(hr = data->get_relativeVirtualAddress(&rva)) ||
1753 E : FAILED(hr = data->get_name(name_bstr.Receive()))) {
1754 i : LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1755 i : return DiaBrowser::kBrowserAbort;
1756 : }
1757 :
1758 : // We only parse data symbols with static storage.
1759 E : if (location_type != LocIsStatic)
1760 E : return DiaBrowser::kBrowserContinue;
1761 :
1762 : // Symbols with an address of zero are essentially invalid. They appear to
1763 : // have been optimized away by the compiler, but they are still reported.
1764 E : if (rva == 0)
1765 E : return DiaBrowser::kBrowserContinue;
1766 :
1767 : // TODO(chrisha): We eventually want to get alignment info from the type
1768 : // information. This is strictly a lower bound, however, as certain
1769 : // data may be used in instructions that impose stricter alignment
1770 : // requirements.
1771 E : size_t length = 0;
1772 E : if (!GetTypeInfo(data, &length)) {
1773 i : return DiaBrowser::kBrowserAbort;
1774 : }
1775 : // Zero-length data symbols act as 'forward declares' in some sense. They
1776 : // are always followed by a non-zero length data symbol with the same name
1777 : // and location.
1778 E : if (length == 0)
1779 E : return DiaBrowser::kBrowserContinue;
1780 :
1781 E : RelativeAddress addr(rva);
1782 E : std::string name;
1783 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1784 i : LOG(ERROR) << "Failed to convert data symbol name to UTF8.";
1785 i : return DiaBrowser::kBrowserAbort;
1786 : }
1787 :
1788 : // In general we expect data symbols to be completely contained by a block.
1789 : // The data symbol can exceed the size of the block in the case of data
1790 : // imports. For some reason the toolchain emits a global data symbol with
1791 : // type information equal to the type of the data *pointed* to by the import
1792 : // entry rather than the type of the entry itself. Thus, if the data type
1793 : // is bigger than the entire IAT this symbol will exceed it. To complicate
1794 : // matters even more, a poorly written module can import its own export in
1795 : // which case a linker generated pseudo-import-entry block will be
1796 : // generated. This won't be part of the IAT, so we can't even filter based
1797 : // on that. Instead, we simply ignore global data symbols that exceed the
1798 : // block size.
1799 E : FindOrCreateBlockDirective directive = kAllowCoveringBlock;
1800 E : base::StringPiece spname(name);
1801 E : if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
1802 : // For global data symbols (no parent symbols) to imported data ("_imp_"
1803 : // prefix) we allow partially covering blocks.
1804 E : directive = kAllowPartialCoveringBlock;
1805 : }
1806 :
1807 : BlockGraph::Block* block = FindOrCreateBlock(BlockGraph::DATA_BLOCK,
1808 : addr, length, spname,
1809 E : directive);
1810 :
1811 : // We've seen null blocks for some symbols in modules compiled using a custom
1812 : // non-Microsoft toolchain.
1813 E : if (block == NULL) {
1814 i : LOG(ERROR) << "Failed to get a block for symbol named " << name << ".";
1815 i : return DiaBrowser::kBrowserAbort;
1816 : }
1817 :
1818 E : if (block->type() == BlockGraph::CODE_BLOCK) {
1819 : // The NativeClient bits of chrome.dll consists of hand-written assembly
1820 : // that is compiled using a custom non-Microsoft toolchain. Unfortunately
1821 : // for us this toolchain emits 1-byte data symbols instead of code labels.
1822 : static const char kNaClPrefix[] = "NaCl";
1823 : if (length == 1 &&
1824 E : name.compare(0, arraysize(kNaClPrefix) - 1, kNaClPrefix) == 0) {
1825 i : if (!AddLabelToBlock(addr, name, BlockGraph::CODE_LABEL, block)) {
1826 i : LOG(ERROR) << "Failed to add label to code block.";
1827 i : return DiaBrowser::kBrowserAbort;
1828 : }
1829 :
1830 i : return DiaBrowser::kBrowserContinue;
1831 : }
1832 : }
1833 :
1834 E : if (!AddLabelToBlock(addr, name, BlockGraph::DATA_LABEL, block)) {
1835 i : LOG(ERROR) << "Failed to add data label to block.";
1836 i : return DiaBrowser::kBrowserAbort;
1837 : }
1838 :
1839 E : return DiaBrowser::kBrowserContinue;
1840 E : }
1841 :
1842 : DiaBrowser::BrowserDirective Decomposer::OnPublicSymbol(
1843 : const DiaBrowser& dia_browser,
1844 : const DiaBrowser::SymTagVector& sym_tags,
1845 E : const DiaBrowser::SymbolPtrVector& symbols) {
1846 E : DCHECK_LT(0u, sym_tags.size());
1847 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1848 E : DCHECK_EQ(SymTagPublicSymbol, sym_tags.back());
1849 E : const DiaBrowser::SymbolPtr& symbol(symbols.back());
1850 :
1851 : // We don't care about symbols that don't have addresses.
1852 E : DWORD rva = 0;
1853 E : if (S_OK != symbol->get_relativeVirtualAddress(&rva))
1854 E : return DiaBrowser::kBrowserContinue;
1855 :
1856 E : ScopedBstr name_bstr;
1857 E : if (S_OK != symbol->get_name(name_bstr.Receive())) {
1858 i : LOG(ERROR) << "Failed to get public symbol name.";
1859 i : return DiaBrowser::kBrowserAbort;
1860 : }
1861 :
1862 E : std::string name;
1863 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1864 i : LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1865 i : return DiaBrowser::kBrowserAbort;
1866 : }
1867 :
1868 E : RelativeAddress addr(rva);
1869 E : BlockGraph::Block* block = image_->GetBlockByAddress(addr);
1870 E : if (block == NULL) {
1871 i : LOG(ERROR) << "No block found for public symbol \"" << name << "\".";
1872 i : return DiaBrowser::kBrowserAbort;
1873 : }
1874 :
1875 : // Public symbol names are mangled. Remove leading '_' as per
1876 : // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1877 E : if (name[0] == '_')
1878 E : name = name.substr(1);
1879 :
1880 E : if (!AddLabelToBlock(addr, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
1881 i : return DiaBrowser::kBrowserAbort;
1882 :
1883 E : return DiaBrowser::kBrowserContinue;
1884 E : }
1885 :
1886 E : bool Decomposer::ProcessStaticInitializers() {
1887 : typedef std::pair<RelativeAddress, RelativeAddress> AddressPair;
1888 : typedef std::map<std::string, AddressPair> AddressPairMap;
1889 :
1890 E : const RelativeAddress kNull(0);
1891 :
1892 : // This stores pairs of addresses, representing the beginning and the end
1893 : // of each static initializer block. It is keyed with a string, which is
1894 : // returned by the match group of the corresponding initializer pattern.
1895 : // The key is necessary to correlate matching labels (as multiple pairs
1896 : // of labels may match through a single pattern).
1897 E : AddressPairMap addr_pair_map;
1898 :
1899 : // Used for keeping track of which label, if any, we matched.
1900 : enum MatchType {
1901 : kMatchNone,
1902 : kMatchBeginLabel,
1903 : kMatchEndLabel
1904 : };
1905 :
1906 : // Iterate through all data blocks, looking for known initializer labels.
1907 E : BlockGraph::AddressSpace::RangeMapConstIter block_it = image_->begin();
1908 E : for (; block_it != image_->end(); ++block_it) {
1909 E : const BlockGraph::Block* block = block_it->second;
1910 : // Skip non-data blocks.
1911 E : if (block->type() != BlockGraph::DATA_BLOCK)
1912 E : continue;
1913 :
1914 : // Check the block name against each of the initializer patterns.
1915 E : MatchType match = kMatchNone;
1916 E : std::string block_name = block->name();
1917 E : std::string name;
1918 E : for (size_t i = 0; i < static_initializer_patterns_.size(); ++i) {
1919 E : REPair& re_pair(static_initializer_patterns_[i]);
1920 E : if (re_pair.first.FullMatch(block_name, &name))
1921 E : match = kMatchBeginLabel;
1922 E : else if (re_pair.second.FullMatch(block_name, &name))
1923 E : match = kMatchEndLabel;
1924 :
1925 E : if (match != kMatchNone)
1926 E : break;
1927 E : }
1928 :
1929 : // No pattern matched this symbol? Continue to the next one.
1930 E : if (match == kMatchNone)
1931 E : continue;
1932 :
1933 : // Ensure this symbol exists in the map. Thankfully, addresses default
1934 : // construct to NULL.
1935 E : AddressPair& addr_pair = addr_pair_map[name];
1936 :
1937 : // Update the bracketing symbol endpoint. Make sure each symbol endpoint
1938 : // is only seen once.
1939 E : RelativeAddress* addr = NULL;
1940 E : RelativeAddress new_addr;
1941 E : if (match == kMatchBeginLabel) {
1942 E : addr = &addr_pair.first;
1943 E : new_addr = block->addr();
1944 E : } else {
1945 E : addr = &addr_pair.second;
1946 E : new_addr = block->addr() + block->size();
1947 : }
1948 E : if (*addr != kNull) {
1949 i : LOG(ERROR) << "Bracketing symbol appears multiple times: "
1950 : << block_name;
1951 i : return false;
1952 : }
1953 E : *addr = new_addr;
1954 E : }
1955 :
1956 : // Use the bracketing symbols to make the initializers contiguous.
1957 E : AddressPairMap::const_iterator init_it = addr_pair_map.begin();
1958 E : for (; init_it != addr_pair_map.end(); ++init_it) {
1959 E : RelativeAddress begin_addr = init_it->second.first;
1960 E : if (begin_addr == kNull) {
1961 i : LOG(ERROR) << "Bracketing start symbol missing: " << init_it->first;
1962 i : return false;
1963 : }
1964 :
1965 E : RelativeAddress end_addr = init_it->second.second;
1966 E : if (end_addr == kNull) {
1967 i : LOG(ERROR) << "Bracketing end symbol missing: " << init_it->first;
1968 i : return false;
1969 : }
1970 :
1971 E : if (begin_addr > end_addr) {
1972 i : LOG(ERROR) << "Bracketing symbols out of order: " << init_it->first;
1973 i : return false;
1974 : }
1975 :
1976 : // Merge the initializers.
1977 E : DataSpace::Range range(begin_addr, end_addr - begin_addr);
1978 E : BlockGraph::Block* merged = image_->MergeIntersectingBlocks(range);
1979 : std::string name = base::StringPrintf("Bracketed Initializers: %s",
1980 E : init_it->first.c_str());
1981 E : DCHECK(merged != NULL);
1982 E : merged->set_name(name);
1983 E : merged->set_attribute(BlockGraph::COFF_GROUP);
1984 E : }
1985 :
1986 E : return true;
1987 E : }
1988 :
1989 E : bool Decomposer::ProcessDataSymbols(IDiaSymbol* root) {
1990 : DiaBrowser::MatchCallback on_data_symbol(
1991 E : base::Bind(&Decomposer::OnDataSymbol, base::Unretained(this)));
1992 :
1993 E : DiaBrowser dia_browser;
1994 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1995 E : on_data_symbol);
1996 : dia_browser.AddPattern(Seq(SymTagCompiland, SymTagFunction,
1997 : Star(SymTagBlock), SymTagData),
1998 E : on_data_symbol);
1999 :
2000 E : return dia_browser.Browse(root);
2001 E : }
2002 :
2003 E : bool Decomposer::ProcessPublicSymbols(IDiaSymbol* root) {
2004 : DiaBrowser::MatchCallback on_public_symbol(
2005 E : base::Bind(&Decomposer::OnPublicSymbol, base::Unretained(this)));
2006 :
2007 E : DiaBrowser dia_browser;
2008 E : dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
2009 :
2010 E : return dia_browser.Browse(root);
2011 E : }
2012 :
2013 E : bool Decomposer::GuessDataBlockAlignments() {
2014 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2015 : // Iterate through all the image sections.
2016 E : for (size_t i = 0; i < num_sections; ++i) {
2017 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2018 E : DCHECK(header != NULL);
2019 :
2020 : // Only iterate through data sections.
2021 E : if (GetSectionType(header) != kSectionData)
2022 E : continue;
2023 :
2024 E : RelativeAddress section_begin(header->VirtualAddress);
2025 E : size_t section_length = header->Misc.VirtualSize;
2026 :
2027 : // Get the range of blocks in this section.
2028 : BlockGraph::AddressSpace::RangeMapIterPair it_pair =
2029 E : image_->GetIntersectingBlocks(section_begin, section_length);
2030 :
2031 : // Iterate through the blocks in the section, setting their alignment.
2032 E : BlockGraph::AddressSpace::RangeMapIter it = it_pair.first;
2033 E : for (; it != it_pair.second; ++it) {
2034 E : BlockGraph::Block* block = it->second;
2035 : GuessDataBlockAlignment(block,
2036 E : image_file_.nt_headers()->OptionalHeader.SectionAlignment);
2037 E : }
2038 E : }
2039 :
2040 E : return true;
2041 E : }
2042 :
2043 E : bool Decomposer::CreateCodeReferences() {
2044 E : BlockGraph::BlockMap::iterator it(image_->graph()->blocks_mutable().begin());
2045 E : BlockGraph::BlockMap::iterator end(image_->graph()->blocks_mutable().end());
2046 E : for (; it != end; ++it) {
2047 E : BlockGraph::Block* block = &it->second;
2048 E : if (block->type() != BlockGraph::CODE_BLOCK)
2049 E : continue;
2050 :
2051 E : if (!CreateCodeReferencesForBlock(block))
2052 i : return false;
2053 E : }
2054 :
2055 E : return true;
2056 E : }
2057 :
2058 E : bool Decomposer::CreateCodeReferencesForBlock(BlockGraph::Block* block) {
2059 E : DCHECK(current_block_ == NULL);
2060 E : current_block_ = block;
2061 :
2062 E : RelativeAddress block_addr;
2063 E : if (!image_->GetAddressOf(block, &block_addr)) {
2064 i : LOG(ERROR) << "Block \"" << block->name() << "\" has no address.";
2065 i : return false;
2066 : }
2067 :
2068 E : AbsoluteAddress abs_block_addr;
2069 E : if (!image_file_.Translate(block_addr, &abs_block_addr)) {
2070 i : LOG(ERROR) << "Unable to get absolute address for " << block_addr;
2071 i : return false;
2072 : }
2073 :
2074 : Disassembler::InstructionCallback on_instruction(
2075 E : base::Bind(&Decomposer::OnInstruction, base::Unretained(this)));
2076 :
2077 : // Use block labels and code references as starting points for disassembly.
2078 E : Disassembler::AddressSet starting_points;
2079 : GetDisassemblyStartingPoints(block, abs_block_addr, reloc_set_,
2080 E : &starting_points);
2081 :
2082 : // If the block has no starting points, then it has no private symbols and
2083 : // is not BB safe. We mark the block as not safe for basic-block disassembly.
2084 : if (starting_points.empty() &&
2085 E : (block->attributes() & BlockGraph::GAP_BLOCK) == 0) {
2086 E : VLOG(1) << "Block \"" << block->name() << "\" has no private symbols.";
2087 E : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2088 : }
2089 :
2090 : // Determine whether or not we are being strict during disassembly.
2091 E : bool strict = block_graph::CodeBlockAttributesAreBasicBlockSafe(block);
2092 E : be_strict_with_current_block_ = strict;
2093 :
2094 : // Determine the length of the code portion of the block by trimming off any
2095 : // known trailing data. Also, if we're in strict mode, ensure that our
2096 : // assumption regarding code/data layout is met.
2097 E : size_t code_size = 0;
2098 : if (!BlockHasExpectedCodeDataLayout(block, &code_size) &&
2099 E : be_strict_with_current_block_) {
2100 i : LOG(ERROR) << "Block \"" << block->name() << "\" has unexpected code/data "
2101 : << "layout.";
2102 i : return false;
2103 : }
2104 :
2105 : // Disassemble the block.
2106 : Disassembler disasm(block->data(),
2107 : code_size,
2108 : abs_block_addr,
2109 : starting_points,
2110 E : on_instruction);
2111 E : Disassembler::WalkResult result = disasm.Walk();
2112 :
2113 : // If we're strict (that is, we're confident that the block was produced by
2114 : // cl.exe), then we can use that knowledge to look for calls that appear to be
2115 : // to non-returning functions that we may not have symbol info for.
2116 E : if (be_strict_with_current_block_)
2117 E : LookForNonReturningFunctions(references_, *image_, current_block_, disasm);
2118 :
2119 E : DCHECK_EQ(block, current_block_);
2120 E : current_block_ = NULL;
2121 E : be_strict_with_current_block_ = true;
2122 :
2123 E : switch (result) {
2124 : case Disassembler::kWalkIncomplete:
2125 : // There were computed branches that couldn't be chased down.
2126 E : block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2127 E : return true;
2128 :
2129 : case Disassembler::kWalkTerminated:
2130 : // This exit condition should only ever occur for non-strict disassembly.
2131 : // If strict, we should always get kWalkError.
2132 E : DCHECK(!strict);
2133 : // This means that they code was malformed, or broke some expected
2134 : // conventions. This code is not safe for basic block disassembly.
2135 E : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2136 E : return true;
2137 :
2138 : case Disassembler::kWalkSuccess:
2139 : // Were any bytes in the block not accounted for? This generally means
2140 : // unreachable code, which we see quite often, especially in debug builds.
2141 E : if (disasm.code_size() != disasm.disassembled_bytes())
2142 E : block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2143 E : return true;
2144 :
2145 : case Disassembler::kWalkError:
2146 i : return false;
2147 :
2148 : default:
2149 i : NOTREACHED() << "Unhandled Disassembler WalkResult.";
2150 i : return false;
2151 : }
2152 E : }
2153 :
2154 : BlockGraph::Block* Decomposer::CreateBlock(BlockGraph::BlockType type,
2155 : RelativeAddress address,
2156 : BlockGraph::Size size,
2157 E : const base::StringPiece& name) {
2158 E : BlockGraph::Block* block = image_->AddBlock(type, address, size, name);
2159 E : if (block == NULL) {
2160 i : LOG(ERROR) << "Unable to add block at " << address << " with size "
2161 : << size << ".";
2162 i : return NULL;
2163 : }
2164 :
2165 : // Mark the source range from whence this block originates.
2166 : bool pushed = block->source_ranges().Push(
2167 : BlockGraph::Block::DataRange(0, size),
2168 E : BlockGraph::Block::SourceRange(address, size));
2169 E : DCHECK(pushed);
2170 :
2171 E : BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2172 E : if (section == BlockGraph::kInvalidSectionId) {
2173 i : LOG(ERROR) << "Block at " << address << " with size " << size
2174 : << " lies outside of all sections.";
2175 i : return NULL;
2176 : }
2177 E : block->set_section(section);
2178 :
2179 E : const uint8* data = image_file_.GetImageData(address, size);
2180 E : if (data != NULL)
2181 E : block->SetData(data, size);
2182 :
2183 E : return block;
2184 E : }
2185 :
2186 : BlockGraph::Block* Decomposer::FindOrCreateBlock(
2187 : BlockGraph::BlockType type,
2188 : RelativeAddress addr,
2189 : BlockGraph::Size size,
2190 : const base::StringPiece& name,
2191 E : FindOrCreateBlockDirective directive) {
2192 E : BlockGraph::Block* block = image_->GetBlockByAddress(addr);
2193 E : if (block != NULL) {
2194 : // If we got a block we're guaranteed that it at least partially covers
2195 : // the query range, so we can immediately return it in that case.
2196 E : if (directive == kAllowPartialCoveringBlock)
2197 E : return block;
2198 :
2199 : // Always allow collisions where the new block is a proper subset of
2200 : // an existing PE parsed block. The PE parser often knows more than we do
2201 : // about blocks that need to stick together.
2202 E : if (block->attributes() & BlockGraph::PE_PARSED)
2203 E : directive = kAllowCoveringBlock;
2204 :
2205 E : bool collision = false;
2206 E : switch (directive) {
2207 : case kExpectNoBlock: {
2208 i : collision = true;
2209 i : break;
2210 : }
2211 : case kAllowIdenticalBlock: {
2212 i : collision = (block->addr() != addr || block->size() != size);
2213 i : break;
2214 : }
2215 : default: {
2216 E : DCHECK(directive == kAllowCoveringBlock);
2217 : collision = block->addr() > addr ||
2218 E : (block->addr() + block->size()) < addr + size;
2219 : break;
2220 : }
2221 : }
2222 :
2223 E : if (collision) {
2224 i : LOG(ERROR) << "Block collision for \"" << name.as_string() << "\" at "
2225 : << addr << "(" << size << ") with existing block \""
2226 : << block->name() << "\" at " << block->addr() << " ("
2227 : << block->size() << ").";
2228 i : return NULL;
2229 : }
2230 :
2231 E : return block;
2232 : }
2233 E : DCHECK(block == NULL);
2234 :
2235 E : return CreateBlock(type, addr, size, name);
2236 E : }
2237 :
2238 : CallbackDirective Decomposer::LookPastInstructionForData(
2239 E : RelativeAddress instr_end) {
2240 : // If this instruction terminates at a data boundary (ie: the *next*
2241 : // instruction will be data or a reloc), we can be certain that a new
2242 : // lookup table is starting at this address.
2243 E : if (reloc_set_.find(instr_end) == reloc_set_.end())
2244 E : return Disassembler::kDirectiveContinue;
2245 :
2246 : // Find the block housing the reloc. We expect the reloc to be contained
2247 : // completely within this block.
2248 i : BlockGraph::Block* block = image_->GetContainingBlock(instr_end, 4);
2249 i : if (block != current_block_) {
2250 i : CHECK(block != NULL);
2251 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2252 : << "Found an instruction/data boundary between blocks: "
2253 : << current_block_->name() << " and " << block->name();
2254 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2255 : }
2256 :
2257 i : BlockGraph::Offset offset = instr_end - block->addr();
2258 :
2259 : // We expect there to be a jump-table data label already.
2260 i : BlockGraph::Label label;
2261 i : bool have_label = block->GetLabel(offset, &label);
2262 : if (!have_label || !label.has_attributes(
2263 i : BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)) {
2264 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2265 : << "Expected there to be a data label marking the jump "
2266 : << "table at " << block->name() << " + " << offset << ".";
2267 :
2268 : // If we're in strict mode, we're a block that obeys standard conventions.
2269 : // Which means we should already be aware of any jump tables in this block.
2270 i : if (be_strict_with_current_block_)
2271 i : return Disassembler::kDirectiveAbort;
2272 :
2273 : // If we're not in strict mode, add the jump-table label.
2274 i : if (have_label) {
2275 i : CHECK(block->RemoveLabel(offset));
2276 : }
2277 :
2278 : CHECK(block->SetLabel(offset, BlockGraph::Label(
2279 : base::StringPrintf("<JUMP-TABLE-%d>", offset),
2280 i : BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)));
2281 : }
2282 :
2283 i : return Disassembler::kDirectiveTerminatePath;
2284 E : }
2285 :
2286 E : void Decomposer::MarkDisassembledPastEnd() {
2287 : static size_t count = 0;
2288 E : DCHECK(current_block_ != NULL);
2289 E : current_block_->set_attribute(BlockGraph::DISASSEMBLED_PAST_END);
2290 : // TODO(chrisha): The entire "disassembled past end" and non-returning
2291 : // function infrastructure can be ripped out once we rework the BB
2292 : // disassembler to be straight path, and remove the disassembly phase
2293 : // from the decomposer (where it's no longer needed). In the meantime
2294 : // we simply crank down this log verbosity due to all of the false
2295 : // positives.
2296 E : VLOG(1) << "Disassembled past end of block or into known data for block \""
2297 : << current_block_->name() << "\" at " << current_block_->addr()
2298 : << ".";
2299 E : }
2300 :
2301 : CallbackDirective Decomposer::VisitNonFlowControlInstruction(
2302 E : RelativeAddress instr_start, RelativeAddress instr_end) {
2303 : // TODO(chrisha): We could walk the operands and follow references
2304 : // explicitly. If any of them are of reference type and there's no
2305 : // matching reference, this would be cause to blow up and die (we
2306 : // should get all of these as relocs and/or fixups).
2307 :
2308 : IntermediateReferenceMap::const_iterator ref_it =
2309 E : references_.upper_bound(instr_start);
2310 : IntermediateReferenceMap::const_iterator ref_end =
2311 E : references_.lower_bound(instr_end);
2312 :
2313 E : for (; ref_it != ref_end; ++ref_it) {
2314 : BlockGraph::Block* ref_block = image_->GetContainingBlock(
2315 E : ref_it->second.base, 1);
2316 E : DCHECK(ref_block != NULL);
2317 :
2318 : // This is an inter-block reference.
2319 E : if (ref_block != current_block_) {
2320 : // There should be no cross-block references to the middle of other
2321 : // code blocks (to the top is fine, as we could be passing around a
2322 : // function pointer). The exception is if the remote block is not
2323 : // generated by cl.exe. In this case, there could be arbitrary labels
2324 : // that act like functions within the body of that block, and referring
2325 : // to them is perfectly fine.
2326 : if (ref_block->type() == BlockGraph::CODE_BLOCK &&
2327 : ref_it->second.base != ref_block->addr() &&
2328 E : block_graph::CodeBlockAttributesAreBasicBlockSafe(ref_block)) {
2329 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2330 : << "Found a non-control-flow code-block to middle-of-code-block "
2331 : << "reference from block \"" << current_block_->name()
2332 : << "\" to block \"" << ref_block->name() << "\".";
2333 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2334 : }
2335 E : } else {
2336 : // This is an intra-block reference.
2337 : BlockGraph::Offset ref_offset =
2338 E : ref_it->second.base - current_block_->addr();
2339 :
2340 : // If this is to offset zero, we assume we are taking a pointer to
2341 : // ourself, which is safe.
2342 E : if (ref_offset != 0) {
2343 : // If this is 'clean' code it should be to data, and there should be a
2344 : // label.
2345 E : BlockGraph::Label label;
2346 E : if (!current_block_->GetLabel(ref_offset, &label)) {
2347 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2348 : << "Found an intra-block data-reference with no label.";
2349 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2350 i : } else {
2351 : if (!label.has_attributes(BlockGraph::DATA_LABEL) ||
2352 E : label.has_attributes(BlockGraph::CODE_LABEL)) {
2353 E : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2354 : << "Found an intra-block data-like reference to a non-data "
2355 : << "or code label in block \"" << current_block_->name()
2356 : << "\".";
2357 E : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2358 : }
2359 : }
2360 E : }
2361 : }
2362 E : }
2363 :
2364 E : return Disassembler::kDirectiveContinue;
2365 E : }
2366 :
2367 : CallbackDirective Decomposer::VisitPcRelativeFlowControlInstruction(
2368 : AbsoluteAddress instr_abs,
2369 : RelativeAddress instr_rel,
2370 : const _DInst& instruction,
2371 E : bool end_of_code) {
2372 E : int fc = META_GET_FC(instruction.meta);
2373 E : DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
2374 E : DCHECK_EQ(O_PC, instruction.ops[0].type);
2375 E : DCHECK_EQ(O_NONE, instruction.ops[1].type);
2376 E : DCHECK_EQ(O_NONE, instruction.ops[2].type);
2377 E : DCHECK_EQ(O_NONE, instruction.ops[3].type);
2378 : DCHECK(instruction.ops[0].size == 8 ||
2379 : instruction.ops[0].size == 16 ||
2380 E : instruction.ops[0].size == 32);
2381 : // Distorm gives us size in bits, we want bytes.
2382 E : BlockGraph::Size size = instruction.ops[0].size / 8;
2383 :
2384 : // Get the reference's address. Note we assume it's in the instruction's
2385 : // tail end - I don't know of a case where a PC-relative offset in a branch
2386 : // or call is not the very last thing in an x86 instruction.
2387 E : AbsoluteAddress abs_src = instr_abs + instruction.size - size;
2388 : AbsoluteAddress abs_dst = instr_abs + instruction.size +
2389 E : static_cast<size_t>(instruction.imm.addr);
2390 :
2391 E : RelativeAddress src, dst;
2392 : if (!image_file_.Translate(abs_src, &src) ||
2393 E : !image_file_.Translate(abs_dst, &dst)) {
2394 i : LOG(ERROR) << "Unable to translate absolute to relative addresses.";
2395 i : return Disassembler::kDirectiveAbort;
2396 : }
2397 :
2398 : // Get the block associated with the destination address. It must exist
2399 : // and be a code block.
2400 E : BlockGraph::Block* block = image_->GetContainingBlock(dst, 1);
2401 E : DCHECK(block != NULL);
2402 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
2403 :
2404 : // For short references, we should not see a fixup.
2405 E : ValidateOrAddReferenceMode mode = FIXUP_MUST_NOT_EXIST;
2406 E : if (size == kPointerSize) {
2407 : // Long PC_RELATIVE reference within a single block? FIXUPs aren't
2408 : // strictly necessary.
2409 E : if (block->Contains(src, kPointerSize))
2410 E : mode = FIXUP_MAY_EXIST;
2411 E : else
2412 : // But if they're between blocks (section contributions), we expect to
2413 : // find them.
2414 E : mode = FIXUP_MUST_EXIST;
2415 E : } else {
2416 : // Since we slice by section contributions we no longer see short
2417 : // references across blocks. If we do, bail!
2418 E : if (block != current_block_) {
2419 i : LOG(ERROR) << "Found a short PC-relative reference out of block \""
2420 : << current_block_->name() << "\".";
2421 i : return Disassembler::kDirectiveAbort;
2422 : }
2423 : }
2424 :
2425 : // Validate or create the reference, as necessary.
2426 : if (!ValidateOrAddReference(mode, src, BlockGraph::PC_RELATIVE_REF, size,
2427 E : dst, 0, &fixup_map_, &references_)) {
2428 i : LOG(ERROR) << "Failed to validate/create reference originating from "
2429 : << "block \"" << current_block_->name() << "\".";
2430 i : return Disassembler::kDirectiveAbort;
2431 : }
2432 :
2433 : // If this is a call and the destination is a non-returning function,
2434 : // then indicate that we should terminate this disassembly path.
2435 : if (fc == FC_CALL &&
2436 E : (block->attributes() & BlockGraph::NON_RETURN_FUNCTION)) {
2437 : // TODO(chrisha): For now, we enforce that the call be to the beginning
2438 : // of the function. This may not be necessary, but better safe than
2439 : // sorry for now.
2440 E : if (block->addr() != dst) {
2441 i : LOG(ERROR) << "Calling inside the body of a non-returning function: "
2442 : << block->name();
2443 i : return Disassembler::kDirectiveAbort;
2444 : }
2445 :
2446 E : return Disassembler::kDirectiveTerminatePath;
2447 : }
2448 :
2449 : // If we get here, then we don't think it's a non-returning call. If it's
2450 : // not an unconditional jump and we're at the end of the code for this block
2451 : // then we consider this as disassembling past the end.
2452 E : if (fc != FC_UNC_BRANCH && end_of_code)
2453 i : MarkDisassembledPastEnd();
2454 :
2455 E : return Disassembler::kDirectiveContinue;
2456 E : }
2457 :
2458 : CallbackDirective Decomposer::VisitIndirectMemoryCallInstruction(
2459 E : const _DInst& instruction, bool end_of_code) {
2460 E : DCHECK_EQ(FC_CALL, META_GET_FC(instruction.meta));
2461 E : DCHECK_EQ(O_DISP, instruction.ops[0].type);
2462 :
2463 : // TODO(rogerm): Consider changing to image_file_.AbsToRelDisplacement()
2464 : // instead of translate. In theory, the indexing into a function-table
2465 : // could be statically offset such that the displacement falls outside
2466 : // of the image's address space. But, we have never seen the compiler
2467 : // generate code like that. This is left to use Translate, which will
2468 : // trigger an error in such a case.
2469 E : AbsoluteAddress disp_addr_abs(static_cast<uint32>(instruction.disp));
2470 E : RelativeAddress disp_addr_rel;
2471 E : if (!image_file_.Translate(disp_addr_abs, &disp_addr_rel)) {
2472 i : LOG(ERROR) << "Unable to translate call address.";
2473 i : return Disassembler::kDirectiveAbort;
2474 : }
2475 :
2476 : // Try to dereference the address of the call instruction. This can fail
2477 : // for blocks that are only initialized at runtime, so we don't fail if
2478 : // we don't find a reference.
2479 : IntermediateReferenceMap::const_iterator ref_it =
2480 E : references_.find(disp_addr_rel);
2481 E : if (ref_it == references_.end())
2482 E : return Disassembler::kDirectiveContinue;
2483 :
2484 : // NOTE: This process derails for bound import tables. In this case the
2485 : // attempted dereference above will fail, but we could still actually
2486 : // find the import name thunk by inspecting the offset of the memory
2487 : // location.
2488 :
2489 : // The reference must be direct and 32-bit.
2490 E : const IntermediateReference& ref = ref_it->second;
2491 E : DCHECK_EQ(BlockGraph::Reference::kMaximumSize, ref.size);
2492 E : DCHECK_EQ(0, ref.offset);
2493 :
2494 : // Look up the thunk this refers to.
2495 E : BlockGraph::Block* thunk = image_->GetBlockByAddress(ref.base);
2496 E : if (thunk == NULL) {
2497 i : LOG(ERROR) << "Unable to dereference intermediate reference at "
2498 : << disp_addr_rel << " to " << ref.base << ".";
2499 i : return Disassembler::kDirectiveAbort;
2500 : }
2501 :
2502 E : if (ref.type == BlockGraph::RELATIVE_REF) {
2503 : // If this is a relative reference it must be part of an import address
2504 : // table (during runtime this address would be patched up with an absolute
2505 : // reference). Thus we expect the referenced block to be data, an import
2506 : // name thunk.
2507 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, thunk->type());
2508 E : } else {
2509 : // If this is an absolute address it should actually point directly to
2510 : // code.
2511 E : DCHECK_EQ(BlockGraph::ABSOLUTE_REF, ref.type);
2512 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, thunk->type());
2513 : }
2514 :
2515 : // Either way, if the block is non-returning we terminate this path of
2516 : // disassembly.
2517 E : if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
2518 E : return Disassembler::kDirectiveTerminatePath;
2519 :
2520 E : if (end_of_code)
2521 i : MarkDisassembledPastEnd();
2522 :
2523 E : return Disassembler::kDirectiveContinue;
2524 E : }
2525 :
2526 : CallbackDirective Decomposer::OnInstruction(const Disassembler& walker,
2527 E : const _DInst& instruction) {
2528 : // Get the relative address of this instruction.
2529 E : AbsoluteAddress instr_abs(static_cast<uint32>(instruction.addr));
2530 E : RelativeAddress instr_rel;
2531 E : if (!image_file_.Translate(instr_abs, &instr_rel)) {
2532 i : LOG(ERROR) << "Unable to translate instruction address.";
2533 i : return Disassembler::kDirectiveAbort;
2534 : }
2535 E : RelativeAddress after_instr_rel = instr_rel + instruction.size;
2536 :
2537 : #ifndef NDEBUG
2538 : // If we're in debug mode, it's helpful to have a pointer directly to the
2539 : // beginning of this instruction in memory.
2540 E : BlockGraph::Offset instr_offset = instr_rel - current_block_->addr();
2541 E : const uint8* instr_data = current_block_->data() + instr_offset;
2542 : #endif
2543 :
2544 : // TODO(chrisha): Certain instructions require aligned data (ie: MMX/SSE
2545 : // instructions). We need to follow the data that these instructions
2546 : // refer to, and set their alignment appropriately. For now, alignment
2547 : // is simply preserved from the original image.
2548 :
2549 E : CallbackDirective directive = LookPastInstructionForData(after_instr_rel);
2550 E : if (IsFatalCallbackDirective(directive))
2551 i : return directive;
2552 :
2553 : // We're at the end of code in this block if we encountered data, or this is
2554 : // the last instruction to be processed.
2555 E : RelativeAddress block_end(current_block_->addr() + current_block_->size());
2556 : bool end_of_code = (directive == Disassembler::kDirectiveTerminatePath) ||
2557 E : (after_instr_rel >= block_end);
2558 :
2559 E : int fc = META_GET_FC(instruction.meta);
2560 :
2561 E : if (fc == FC_NONE) {
2562 : // There's no control flow and we're at the end of the block. Mark the
2563 : // block as dirty.
2564 E : if (end_of_code)
2565 i : MarkDisassembledPastEnd();
2566 :
2567 : return CombineCallbackDirectives(directive,
2568 E : VisitNonFlowControlInstruction(instr_rel, after_instr_rel));
2569 : }
2570 :
2571 : if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
2572 E : instruction.ops[0].type == O_PC) {
2573 : // For all branches, calls and conditional branches to PC-relative
2574 : // addresses, record a PC-relative reference.
2575 : return CombineCallbackDirectives(directive,
2576 : VisitPcRelativeFlowControlInstruction(instr_abs,
2577 : instr_rel,
2578 : instruction,
2579 E : end_of_code));
2580 : }
2581 :
2582 : // We explicitly handle indirect memory call instructions. These can often
2583 : // be tracked down as pointing to a block in this image, or to an import
2584 : // name thunk from another module.
2585 E : if (fc == FC_CALL && instruction.ops[0].type == O_DISP) {
2586 : return CombineCallbackDirectives(directive,
2587 E : VisitIndirectMemoryCallInstruction(instruction, end_of_code));
2588 : }
2589 :
2590 : // Look out for blocks where disassembly seems to run off the end of the
2591 : // block. We do not treat interrupts as flow control as execution can
2592 : // continue past the interrupt.
2593 E : if (fc != FC_RET && fc != FC_UNC_BRANCH && end_of_code)
2594 E : MarkDisassembledPastEnd();
2595 :
2596 E : return directive;
2597 E : }
2598 :
2599 : bool Decomposer::CreatePEImageBlocksAndReferences(
2600 E : PEFileParser::PEHeader* header) {
2601 : PEFileParser::AddReferenceCallback add_reference(
2602 E : base::Bind(&Decomposer::AddReferenceCallback, base::Unretained(this)));
2603 E : PEFileParser parser(image_file_, image_, add_reference);
2604 : parser.set_on_import_thunk(
2605 E : base::Bind(&Decomposer::OnImportThunkCallback, base::Unretained(this)));
2606 :
2607 E : if (!parser.ParseImage(header)) {
2608 i : LOG(ERROR) << "Unable to parse PE image.";
2609 i : return false;
2610 : }
2611 :
2612 E : return true;
2613 E : }
2614 :
2615 E : bool Decomposer::FinalizeIntermediateReferences() {
2616 E : IntermediateReferenceMap::const_iterator it(references_.begin());
2617 E : IntermediateReferenceMap::const_iterator end(references_.end());
2618 :
2619 E : for (; it != end; ++it) {
2620 E : RelativeAddress src_addr(it->first);
2621 E : BlockGraph::Block* src = image_->GetBlockByAddress(src_addr);
2622 E : RelativeAddress dst_base_addr(it->second.base);
2623 E : RelativeAddress dst_addr(dst_base_addr + it->second.offset);
2624 E : BlockGraph::Block* dst = image_->GetBlockByAddress(dst_base_addr);
2625 :
2626 E : if (src == NULL || dst == NULL) {
2627 i : LOG(ERROR) << "Reference source or base destination address is out of "
2628 : << "range, src: " << src << ", dst: " << dst;
2629 i : return false;
2630 : }
2631 :
2632 E : RelativeAddress src_start = src->addr();
2633 E : RelativeAddress dst_start = dst->addr();
2634 :
2635 : // Get the offset of the ultimate destination relative to the start of the
2636 : // destination block.
2637 E : BlockGraph::Offset dst_offset = dst_addr - dst_start;
2638 :
2639 : // Get the offset of the actual referenced object relative to the start of
2640 : // the destination block.
2641 E : BlockGraph::Offset dst_base = dst_base_addr - dst_start;
2642 :
2643 : BlockGraph::Reference ref(it->second.type,
2644 : it->second.size,
2645 : dst,
2646 : dst_offset,
2647 E : dst_base);
2648 E : src->SetReference(src_addr - src_start, ref);
2649 E : }
2650 :
2651 E : references_.clear();
2652 :
2653 E : return true;
2654 E : }
2655 :
2656 E : bool Decomposer::ConfirmFixupsVisited() const {
2657 E : bool success = true;
2658 :
2659 : // Ideally, all fixups should have been visited during decomposition.
2660 : // TODO(chrisha): Address the root problems underlying the following
2661 : // temporary fix.
2662 E : FixupMap::const_iterator fixup_it = fixup_map_.begin();
2663 E : for (; fixup_it != fixup_map_.end(); ++fixup_it) {
2664 E : if (fixup_it->second.visited)
2665 E : continue;
2666 :
2667 : const BlockGraph::Block* block =
2668 E : image_->GetContainingBlock(fixup_it->first, kPointerSize);
2669 E : DCHECK(block != NULL);
2670 :
2671 : // We know that we currently do not have full disassembly coverage as there
2672 : // are several orphaned pieces of apparently unreachable code in the CRT
2673 : // that we do not disassemble, but which may contain jmp or call commands.
2674 : // Thus, we expect that missed fixups are all PC-relative and lie within
2675 : // code blocks.
2676 : if (block->type() == BlockGraph::CODE_BLOCK &&
2677 E : fixup_it->second.type == BlockGraph::PC_RELATIVE_REF)
2678 E : continue;
2679 :
2680 i : success = false;
2681 i : LOG(ERROR) << "Unexpected unseen fixup at " << fixup_it->second.location;
2682 i : }
2683 :
2684 E : return success;
2685 E : }
2686 :
2687 E : bool Decomposer::FindPaddingBlocks() {
2688 E : DCHECK(image_ != NULL);
2689 E : DCHECK(image_->graph() != NULL);
2690 :
2691 : BlockGraph::BlockMap::iterator block_it =
2692 E : image_->graph()->blocks_mutable().begin();
2693 E : for (; block_it != image_->graph()->blocks_mutable().end(); ++block_it) {
2694 E : BlockGraph::Block& block = block_it->second;
2695 :
2696 : // Padding blocks must not have any symbol information: no labels,
2697 : // no references, no referrers, and they must be a gap block.
2698 : if (block.labels().size() != 0 ||
2699 : block.references().size() != 0 ||
2700 : block.referrers().size() != 0 ||
2701 E : (block.attributes() & BlockGraph::GAP_BLOCK) == 0)
2702 E : continue;
2703 :
2704 E : switch (block.type()) {
2705 : // Code blocks should be fully defined and consist of only int3s.
2706 : case BlockGraph::CODE_BLOCK: {
2707 : if (block.data_size() != block.size() ||
2708 E : RepeatedValue(block.data(), block.data_size()) != kInt3)
2709 i : continue;
2710 E : break;
2711 : }
2712 :
2713 : // Data blocks should be uninitialized or have fully defined data
2714 : // consisting only of zeros.
2715 : default: {
2716 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
2717 E : if (block.data_size() == 0) // Uninitialized data blocks are padding.
2718 E : break;
2719 : if (block.data_size() != block.size() ||
2720 E : RepeatedValue(block.data(), block.data_size()) != 0)
2721 i : continue;
2722 : }
2723 : }
2724 :
2725 : // If we fall through to this point, then the block is a padding block.
2726 E : block.set_attribute(BlockGraph::PADDING_BLOCK);
2727 E : }
2728 :
2729 E : return true;
2730 E : }
2731 :
2732 E : bool Decomposer::CreateSections() {
2733 : // Iterate through the image sections, and create sections in the BlockGraph.
2734 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2735 E : for (size_t i = 0; i < num_sections; ++i) {
2736 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2737 E : std::string name = pe::PEFile::GetSectionName(*header);
2738 : BlockGraph::Section* section = image_->graph()->AddSection(
2739 E : name, header->Characteristics);
2740 E : DCHECK(section != NULL);
2741 :
2742 : // For now, we expect them to have been created with the same IDs as those
2743 : // in the original image.
2744 E : if (section->id() != i) {
2745 i : LOG(ERROR) << "Unexpected section ID.";
2746 i : return false;
2747 : }
2748 E : }
2749 :
2750 E : return true;
2751 E : }
2752 :
2753 E : bool Decomposer::LoadDebugStreams(IDiaSession* dia_session) {
2754 E : DCHECK(dia_session != NULL);
2755 :
2756 : // Load the fixups. These must exist.
2757 E : PdbFixups pdb_fixups;
2758 : SearchResult search_result = FindAndLoadDiaDebugStreamByName(
2759 E : kFixupDiaDebugStreamName, dia_session, &pdb_fixups);
2760 E : if (search_result != kSearchSucceeded) {
2761 i : if (search_result == kSearchFailed) {
2762 i : LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
2763 : "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
2764 : }
2765 i : return false;
2766 : }
2767 :
2768 : // Load the omap_from table. It is not necessary that one exist.
2769 E : std::vector<OMAP> omap_from;
2770 : search_result = FindAndLoadDiaDebugStreamByName(
2771 E : kOmapFromDiaDebugStreamName, dia_session, &omap_from);
2772 E : if (search_result == kSearchErrored)
2773 i : return false;
2774 :
2775 : // Translate and validate fixups.
2776 E : if (!OmapAndValidateFixups(omap_from, pdb_fixups))
2777 i : return false;
2778 :
2779 E : return true;
2780 E : }
2781 :
2782 : bool Decomposer::OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
2783 E : const PdbFixups& pdb_fixups) {
2784 E : bool have_omap = omap_from.size() != 0;
2785 :
2786 : // The resource section in Chrome is modified post-link by a tool that adds a
2787 : // manifest to it. This causes all of the fixups in the resource section (and
2788 : // anything beyond it) to be invalid. As long as the resource section is the
2789 : // last section in the image, this is not a problem (we can safely ignore the
2790 : // .rsrc fixups, which we know how to parse without them). However, if there
2791 : // is a section after the resource section, things will have been shifted
2792 : // and potentially crucial fixups will be invalid.
2793 E : RelativeAddress rsrc_start(0xffffffff), max_start;
2794 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2795 E : for (size_t i = 0; i < num_sections; ++i) {
2796 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2797 E : RelativeAddress start(header->VirtualAddress);
2798 E : if (start > max_start)
2799 E : max_start = start;
2800 : if (strncmp(kResourceSectionName,
2801 : reinterpret_cast<const char*>(header->Name),
2802 E : IMAGE_SIZEOF_SHORT_NAME) == 0) {
2803 E : rsrc_start = start;
2804 E : break;
2805 : }
2806 E : }
2807 :
2808 : // Ensure there are no sections after the resource section.
2809 E : if (max_start > rsrc_start) {
2810 i : LOG(ERROR) << kResourceSectionName << " section is not the last section.";
2811 i : return false;
2812 : }
2813 :
2814 : // Ensure the fixups are all valid, and populate the fixup map.
2815 E : size_t skipped = 0;
2816 E : for (size_t i = 0; i < pdb_fixups.size(); ++i) {
2817 E : if (!pdb_fixups[i].ValidHeader()) {
2818 i : LOG(ERROR) << "Unknown fixup header: "
2819 : << base::StringPrintf("0x%08X.", pdb_fixups[i].header);
2820 i : return false;
2821 : }
2822 :
2823 : // For now, we skip any offset fixups. We've only seen this in the context
2824 : // of TLS data access, and we don't mess with TLS structures.
2825 E : if (pdb_fixups[i].is_offset())
2826 E : continue;
2827 :
2828 : // All fixups we handle should be full size pointers.
2829 E : DCHECK_EQ(kPointerSize, pdb_fixups[i].size());
2830 :
2831 : // Get the original addresses, and map them through OMAP information.
2832 : // Normally DIA takes care of this for us, but there is no API for
2833 : // getting DIA to give us FIXUP information, so we have to do it manually.
2834 E : RelativeAddress rva_location(pdb_fixups[i].rva_location);
2835 E : RelativeAddress rva_base(pdb_fixups[i].rva_base);
2836 E : if (have_omap) {
2837 i : rva_location = pdb::TranslateAddressViaOmap(omap_from, rva_location);
2838 i : rva_base = pdb::TranslateAddressViaOmap(omap_from, rva_base);
2839 : }
2840 :
2841 : // If these are part of the .rsrc section, ignore them.
2842 E : if (rva_location >= rsrc_start)
2843 E : continue;
2844 :
2845 : // Ensure they live within the image, and refer to things within the
2846 : // image.
2847 : if (!image_file_.Contains(rva_location, kPointerSize) ||
2848 E : !image_file_.Contains(rva_base, 1)) {
2849 i : LOG(ERROR) << "Fixup refers to addresses outside of image.";
2850 i : return false;
2851 : }
2852 :
2853 : // Add the fix up, and ensure the source address is unique.
2854 E : Fixup fixup = { PdbFixupTypeToReferenceType(pdb_fixups[i].type),
2855 E : pdb_fixups[i].refers_to_code(),
2856 E : pdb_fixups[i].is_data(),
2857 E : false,
2858 E : rva_location,
2859 E : rva_base };
2860 E : bool added = fixup_map_.insert(std::make_pair(rva_location, fixup)).second;
2861 E : if (!added) {
2862 i : LOG(ERROR) << "Colliding fixups at " << rva_location;
2863 i : return false;
2864 : }
2865 E : }
2866 :
2867 E : return true;
2868 E : }
2869 :
2870 : bool Decomposer::RegisterStaticInitializerPatterns(
2871 E : const base::StringPiece& begin, const base::StringPiece& end) {
2872 : // Ensuring the patterns each have exactly one capturing group.
2873 : REPair re_pair = std::make_pair(RE(begin.as_string()),
2874 E : RE(end.as_string()));
2875 : if (re_pair.first.NumberOfCapturingGroups() != 1 ||
2876 E : re_pair.second.NumberOfCapturingGroups() != 1)
2877 i : return false;
2878 :
2879 E : static_initializer_patterns_.push_back(re_pair);
2880 :
2881 E : return true;
2882 E : }
2883 :
2884 : bool Decomposer::RegisterNonReturningFunction(
2885 E : const base::StringPiece& function_name) {
2886 E : return non_returning_functions_.insert(function_name.as_string()).second;
2887 E : }
2888 :
2889 : bool Decomposer::RegisterNonReturningImport(
2890 : const base::StringPiece& module_name,
2891 E : const base::StringPiece& function_name) {
2892 E : StringSet& module_set = non_returning_imports_[module_name.as_string()];
2893 E : return module_set.insert(function_name.as_string()).second;
2894 E : }
2895 :
2896 : bool Decomposer::LoadBlockGraphFromPdbStream(const PEFile& image_file,
2897 : pdb::PdbStream* block_graph_stream,
2898 E : ImageLayout* image_layout) {
2899 E : DCHECK(block_graph_stream != NULL);
2900 E : DCHECK(image_layout != NULL);
2901 E : LOG(INFO) << "Reading block-graph and image layout from the PDB.";
2902 :
2903 : // Initialize an input archive pointing to the stream.
2904 E : scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
2905 E : if (!byte_stream->Init(block_graph_stream))
2906 i : return false;
2907 E : DCHECK(byte_stream.get() != NULL);
2908 :
2909 E : core::ScopedInStreamPtr pdb_in_stream;
2910 : pdb_in_stream.reset(core::CreateByteInStream(
2911 E : byte_stream->data(), byte_stream->data() + byte_stream->length()));
2912 :
2913 : // Read the header.
2914 E : uint32 stream_version = 0;
2915 E : unsigned char compressed = 0;
2916 : if (!pdb_in_stream->Read(sizeof(stream_version),
2917 : reinterpret_cast<core::Byte*>(&stream_version)) ||
2918 : !pdb_in_stream->Read(sizeof(compressed),
2919 E : reinterpret_cast<core::Byte*>(&compressed))) {
2920 i : LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
2921 i : return false;
2922 : }
2923 :
2924 : // Check the stream version.
2925 E : if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
2926 E : LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
2927 : << " version (got " << stream_version << ", expected "
2928 : << pdb::kSyzygyBlockGraphStreamVersion << ").";
2929 E : return false;
2930 : }
2931 :
2932 : // If the stream is compressed insert the decompression filter.
2933 E : core::InStream* in_stream = pdb_in_stream.get();
2934 E : scoped_ptr<core::ZInStream> zip_in_stream;
2935 E : if (compressed != 0) {
2936 E : zip_in_stream.reset(new core::ZInStream(in_stream));
2937 E : if (!zip_in_stream->Init()) {
2938 i : LOG(ERROR) << "Unable to initialize ZInStream.";
2939 i : return false;
2940 : }
2941 E : in_stream = zip_in_stream.get();
2942 : }
2943 :
2944 : // Deserialize the image-layout.
2945 E : core::NativeBinaryInArchive in_archive(in_stream);
2946 E : block_graph::BlockGraphSerializer::Attributes attributes = 0;
2947 : if (!LoadBlockGraphAndImageLayout(
2948 E : image_file, &attributes, image_layout, &in_archive)) {
2949 i : LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
2950 i : return false;
2951 : }
2952 :
2953 E : return true;
2954 E : }
2955 :
2956 : bool Decomposer::LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
2957 : const PEFile& image_file,
2958 : ImageLayout* image_layout,
2959 E : bool* stream_exists) {
2960 E : DCHECK(image_layout != NULL);
2961 E : DCHECK(stream_exists != NULL);
2962 :
2963 E : pdb::PdbFile pdb_file;
2964 E : pdb::PdbReader pdb_reader;
2965 E : if (!pdb_reader.Read(pdb_path, &pdb_file)) {
2966 i : LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
2967 : << "\".";
2968 i : return NULL;
2969 : }
2970 :
2971 : // Try to get the block-graph stream from the PDB.
2972 : scoped_refptr<pdb::PdbStream> block_graph_stream =
2973 E : GetBlockGraphStreamFromPdb(&pdb_file);
2974 E : if (block_graph_stream.get() == NULL) {
2975 E : *stream_exists = false;
2976 E : return false;
2977 : }
2978 :
2979 : // The PDB contains a block-graph stream, the block-graph and the image layout
2980 : // will be read from this stream.
2981 E : *stream_exists = true;
2982 : if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
2983 E : image_layout)) {
2984 i : return false;
2985 : }
2986 :
2987 E : return true;
2988 E : }
2989 :
2990 : scoped_refptr<pdb::PdbStream> Decomposer::GetBlockGraphStreamFromPdb(
2991 E : pdb::PdbFile* pdb_file) {
2992 E : scoped_refptr<pdb::PdbStream> block_graph_stream;
2993 : // Get the PDB header and try to get the block-graph ID stream from it.
2994 E : pdb::PdbInfoHeader70 pdb_header = {0};
2995 E : pdb::NameStreamMap name_stream_map;
2996 : if (!ReadHeaderInfoStream(pdb_file->GetStream(pdb::kPdbHeaderInfoStream),
2997 : &pdb_header,
2998 E : &name_stream_map)) {
2999 i : LOG(ERROR) << "Failed to read header info stream.";
3000 i : return block_graph_stream;
3001 : }
3002 : pdb::NameStreamMap::const_iterator name_it = name_stream_map.find(
3003 E : pdb::kSyzygyBlockGraphStreamName);
3004 E : if (name_it == name_stream_map.end()) {
3005 E : return block_graph_stream;
3006 : }
3007 :
3008 : // Get the block-graph stream and ensure that it's not empty.
3009 E : block_graph_stream = pdb_file->GetStream(name_it->second);
3010 E : if (block_graph_stream.get() == NULL) {
3011 i : LOG(ERROR) << "Failed to read the block-graph stream from the PDB.";
3012 i : return block_graph_stream;
3013 : }
3014 E : if (block_graph_stream->length() == 0) {
3015 i : LOG(ERROR) << "The block-graph stream is empty.";
3016 i : return block_graph_stream;
3017 : }
3018 :
3019 E : return block_graph_stream;
3020 E : }
3021 :
3022 : bool Decomposer::OnImportThunkCallback(const char* module_name,
3023 : const char* symbol_name,
3024 E : BlockGraph::Block* thunk) {
3025 E : DCHECK(module_name != NULL);
3026 E : DCHECK(symbol_name != NULL);
3027 E : DCHECK(thunk != NULL);
3028 :
3029 : // Look for the module first.
3030 : StringSetMap::const_iterator module_it =
3031 E : non_returning_imports_.find(std::string(module_name));
3032 E : if (module_it == non_returning_imports_.end())
3033 E : return true;
3034 :
3035 : // Look for the symbol within the module.
3036 E : if (module_it->second.count(std::string(symbol_name)) == 0)
3037 E : return true;
3038 :
3039 : // If we get here then the imported symbol is found. Decorate the thunk.
3040 E : thunk->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
3041 E : VLOG(1) << "Forcing non-returning attribute on imported symbol \""
3042 : << symbol_name << "\" from module \"" << module_name << "\".";
3043 :
3044 E : return true;
3045 E : }
3046 :
3047 : } // namespace pe
|