1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/decomposer.h"
16 :
17 : #include <cvconst.h>
18 : #include <algorithm>
19 :
20 : #include "base/bind.h"
21 : #include "base/logging.h"
22 : #include "base/path_service.h"
23 : #include "base/string_util.h"
24 : #include "base/stringprintf.h"
25 : #include "base/utf_string_conversions.h"
26 : #include "base/files/file_path.h"
27 : #include "base/memory/scoped_ptr.h"
28 : #include "base/win/scoped_bstr.h"
29 : #include "base/win/scoped_comptr.h"
30 : #include "sawbuck/common/com_utils.h"
31 : #include "sawbuck/sym_util/types.h"
32 : #include "syzygy/block_graph/block_util.h"
33 : #include "syzygy/block_graph/typed_block.h"
34 : #include "syzygy/core/disassembler_util.h"
35 : #include "syzygy/core/zstream.h"
36 : #include "syzygy/pdb/omap.h"
37 : #include "syzygy/pdb/pdb_byte_stream.h"
38 : #include "syzygy/pdb/pdb_util.h"
39 : #include "syzygy/pe/dia_util.h"
40 : #include "syzygy/pe/find.h"
41 : #include "syzygy/pe/metadata.h"
42 : #include "syzygy/pe/pdb_info.h"
43 : #include "syzygy/pe/pe_file_parser.h"
44 : #include "syzygy/pe/pe_transform_policy.h"
45 : #include "syzygy/pe/pe_utils.h"
46 : #include "syzygy/pe/serialization.h"
47 :
48 : namespace pe {
49 : namespace {
50 :
51 : using base::win::ScopedBstr;
52 : using base::win::ScopedComPtr;
53 : using block_graph::BlockGraph;
54 : using block_graph::ConstTypedBlock;
55 : using builder::Opt;
56 : using builder::Seq;
57 : using builder::Star;
58 : using core::AbsoluteAddress;
59 : using core::Disassembler;
60 : using core::RelativeAddress;
61 :
62 : typedef Disassembler::CallbackDirective CallbackDirective;
63 :
64 : const size_t kPointerSize = sizeof(AbsoluteAddress);
65 :
66 : // Converts from PdbFixup::Type to BlockGraph::ReferenceType.
67 : BlockGraph::ReferenceType PdbFixupTypeToReferenceType(
68 E : pdb::PdbFixup::Type type) {
69 E : switch (type) {
70 : case pdb::PdbFixup::TYPE_ABSOLUTE:
71 E : return BlockGraph::ABSOLUTE_REF;
72 :
73 : case pdb::PdbFixup::TYPE_RELATIVE:
74 E : return BlockGraph::RELATIVE_REF;
75 :
76 : case pdb::PdbFixup::TYPE_PC_RELATIVE:
77 E : return BlockGraph::PC_RELATIVE_REF;
78 :
79 : default:
80 i : NOTREACHED() << "Invalid PdbFixup::Type.";
81 : // The return type here is meaningless.
82 i : return BlockGraph::ABSOLUTE_REF;
83 : }
84 E : }
85 :
86 : // Adds a reference to the provided intermediate reference map. If one already
87 : // exists, will validate that they are consistent.
88 : bool AddReference(RelativeAddress src_addr,
89 : BlockGraph::ReferenceType type,
90 : BlockGraph::Size size,
91 : RelativeAddress dst_base,
92 : BlockGraph::Offset dst_offset,
93 E : Decomposer::IntermediateReferenceMap* references) {
94 E : DCHECK(references != NULL);
95 :
96 : // If we get an iterator to a reference and it has the same source address
97 : // then ensure that we are consistent with it.
98 : Decomposer::IntermediateReferenceMap::iterator it =
99 E : references->lower_bound(src_addr);
100 E : if (it != references->end() && it->first == src_addr) {
101 : if (type != it->second.type || size != it->second.size ||
102 E : dst_base != it->second.base || dst_offset != it->second.offset) {
103 i : LOG(ERROR) << "Trying to insert inconsistent and colliding intermediate "
104 : "references.";
105 i : return false;
106 : }
107 : }
108 :
109 E : Decomposer::IntermediateReference ref = { type,
110 E : size,
111 E : dst_base,
112 E : dst_offset };
113 :
114 : // Since we used lower_bound above, we can use it as a hint for the
115 : // insertion. This saves us from incurring the lookup cost twice.
116 E : references->insert(it, std::make_pair(src_addr, ref));
117 E : return true;
118 E : }
119 :
120 : // Validates the given reference against the given fixup map entry. If they
121 : // are consistent, marks the fixup as having been visited.
122 : bool ValidateReference(RelativeAddress src_addr,
123 : BlockGraph::ReferenceType type,
124 : BlockGraph::Size size,
125 E : Decomposer::FixupMap::iterator fixup_it) {
126 E : if (type != fixup_it->second.type || size != kPointerSize) {
127 i : LOG(ERROR) << "Reference at " << src_addr
128 : << " not consistent with corresponding fixup.";
129 i : return false;
130 : }
131 :
132 : // Mark this fixup as having been visited.
133 E : fixup_it->second.visited = true;
134 :
135 E : return true;
136 E : }
137 :
138 : enum ValidateOrAddReferenceMode {
139 : // Look for an existing fixup. If we find one, validate against it,
140 : // otherwise create a new intermediate reference.
141 : FIXUP_MAY_EXIST,
142 : // Compare against an existing fixup, bailing if there is none. Does not
143 : // create a new intermediate reference.
144 : FIXUP_MUST_EXIST,
145 : // Look for an existing fixup, and fail if one exists. Otherwise, create
146 : // a new intermediate reference.
147 : FIXUP_MUST_NOT_EXIST
148 : };
149 : bool ValidateOrAddReference(ValidateOrAddReferenceMode mode,
150 : RelativeAddress src_addr,
151 : BlockGraph::ReferenceType type,
152 : BlockGraph::Size size,
153 : RelativeAddress dst_base,
154 : BlockGraph::Offset dst_offset,
155 : Decomposer::FixupMap* fixup_map,
156 E : Decomposer::IntermediateReferenceMap* references) {
157 E : DCHECK(fixup_map != NULL);
158 E : DCHECK(references != NULL);
159 :
160 E : Decomposer::FixupMap::iterator it = fixup_map->find(src_addr);
161 :
162 E : switch (mode) {
163 : case FIXUP_MAY_EXIST: {
164 : if (it != fixup_map->end() &&
165 E : !ValidateReference(src_addr, type, size, it))
166 i : return false;
167 : return AddReference(src_addr, type, size, dst_base, dst_offset,
168 E : references);
169 : }
170 :
171 : case FIXUP_MUST_EXIST: {
172 E : if (it == fixup_map->end()) {
173 i : LOG(ERROR) << "Reference at " << src_addr << " has no matching fixup.";
174 i : return false;
175 : }
176 E : if (!ValidateReference(src_addr, type, size, it))
177 i : return false;
178 : // Do not create a new intermediate reference.
179 E : return true;
180 : }
181 :
182 : case FIXUP_MUST_NOT_EXIST: {
183 E : if (it != fixup_map->end()) {
184 i : LOG(ERROR) << "Reference at " << src_addr
185 : << " collides with an existing fixup.";
186 i : return false;
187 : }
188 : return AddReference(src_addr, type, size, dst_base, dst_offset,
189 E : references);
190 : }
191 :
192 : default: {
193 i : NOTREACHED() << "Invalid ValidateOrAddReferenceMode.";
194 i : return false;
195 : }
196 : }
197 E : }
198 :
199 E : bool GetTypeInfo(IDiaSymbol* symbol, size_t* length) {
200 E : DCHECK(symbol != NULL);
201 E : DCHECK(length != NULL);
202 :
203 E : *length = 0;
204 E : ScopedComPtr<IDiaSymbol> type;
205 E : HRESULT hr = symbol->get_type(type.Receive());
206 : // This happens if the symbol has no type information.
207 E : if (hr == S_FALSE)
208 E : return true;
209 E : if (hr != S_OK) {
210 i : LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
211 i : return false;
212 : }
213 :
214 E : ULONGLONG ull_length = 0;
215 E : hr = type->get_length(&ull_length);
216 E : if (hr != S_OK) {
217 i : LOG(ERROR) << "Failed to retrieve type length properties: "
218 : << com::LogHr(hr) << ".";
219 i : return false;
220 : }
221 E : *length = ull_length;
222 :
223 E : return true;
224 E : }
225 :
226 E : SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
227 E : DCHECK(header != NULL);
228 E : if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
229 E : return kSectionCode;
230 E : if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
231 E : return kSectionData;
232 i : return kSectionUnknown;
233 E : }
234 :
235 E : void GuessDataBlockAlignment(BlockGraph::Block* block, uint32 max_alignment) {
236 E : DCHECK(block != NULL);
237 E : uint32 alignment = block->addr().GetAlignment();
238 : // Cap the alignment.
239 E : if (alignment > max_alignment)
240 E : alignment = max_alignment;
241 E : block->set_alignment(alignment);
242 E : }
243 :
244 : bool AreMatchedBlockAndLabelAttributes(
245 : BlockGraph::BlockType bt,
246 : BlockGraph::LabelAttributes la) {
247 : return (bt == BlockGraph::CODE_BLOCK && (la & BlockGraph::CODE_LABEL) != 0) ||
248 : (bt == BlockGraph::DATA_BLOCK && (la & BlockGraph::DATA_LABEL) != 0);
249 : }
250 :
251 E : BlockGraph::LabelAttributes SymTagToLabelAttributes(enum SymTagEnum sym_tag) {
252 E : switch (sym_tag) {
253 : case SymTagData:
254 E : return BlockGraph::DATA_LABEL;
255 : case SymTagLabel:
256 E : return BlockGraph::CODE_LABEL;
257 : case SymTagFuncDebugStart:
258 E : return BlockGraph::DEBUG_START_LABEL;
259 : case SymTagFuncDebugEnd:
260 E : return BlockGraph::DEBUG_END_LABEL;
261 : case SymTagBlock:
262 E : return BlockGraph::SCOPE_START_LABEL;
263 : case SymTagCallSite:
264 E : return BlockGraph::CALL_SITE_LABEL;
265 : }
266 :
267 i : NOTREACHED();
268 i : return 0;
269 E : }
270 :
271 : bool AddLabelToBlock(RelativeAddress addr,
272 : const base::StringPiece& name,
273 : BlockGraph::LabelAttributes label_attributes,
274 E : BlockGraph::Block* block) {
275 E : DCHECK(block != NULL);
276 E : DCHECK_LE(block->addr(), addr);
277 E : DCHECK_GT(block->addr() + block->size(), addr);
278 :
279 E : BlockGraph::Offset offset = addr - block->addr();
280 :
281 : // Try to create the label.
282 E : if (block->SetLabel(offset, name, label_attributes)) {
283 : // If there was no label at offset 0, then this block has not yet been
284 : // renamed, and still has its section contribution as a name. Update it to
285 : // the first symbol we get for it. We parse symbols from most useful
286 : // (undecorated function names) to least useful (mangled public symbols), so
287 : // this ensures a block has the most useful name.
288 E : if (offset == 0)
289 E : block->set_name(name);
290 :
291 E : return true;
292 : }
293 :
294 : // If we get here there's an already existing label. Update it.
295 E : BlockGraph::Label label;
296 E : CHECK(block->GetLabel(offset, &label));
297 :
298 : // It is conceivable that there could be more than one scope with either the
299 : // same beginning or the same ending. However, this doesn't appear to happen
300 : // in any version of Chrome up to 20. We add this check so that we'd at least
301 : // be made aware of the situation. (We don't rely on these labels, so we
302 : // merely output a warning rather than an error.)
303 : {
304 : const BlockGraph::LabelAttributes kScopeAttributes =
305 : BlockGraph::SCOPE_START_LABEL |
306 E : BlockGraph::SCOPE_END_LABEL;
307 : BlockGraph::LabelAttributes scope_attributes =
308 E : label_attributes & kScopeAttributes;
309 E : if (scope_attributes != 0) {
310 E : if (label.has_any_attributes(scope_attributes)) {
311 i : LOG(WARNING) << "Detected colliding scope labels at offset "
312 : << offset << " of block \"" << block->name() << "\".";
313 : }
314 : }
315 : }
316 :
317 : // Merge the names if this isn't a repeated name.
318 E : std::string new_name = label.name();
319 E : if (new_name.find(name.data()) == new_name.npos) {
320 E : new_name.append(", ");
321 E : name.AppendToString(&new_name);
322 : }
323 :
324 : // Merge the attributes.
325 : BlockGraph::LabelAttributes new_label_attr = label.attributes() |
326 E : label_attributes;
327 E : if (!BlockGraph::Label::AreValidAttributes(new_label_attr)) {
328 : // It's not clear which attributes should be the winner here, so we log an
329 : // error.
330 i : LOG(ERROR) << "Trying to merge conflicting label attributes \""
331 : << BlockGraph::LabelAttributesToString(label_attributes)
332 : << "\" for label \"" << label.ToString() << "\" at offset "
333 : << offset << " of block \"" << block->name() << "\".";
334 i : return false;
335 : }
336 :
337 : // Update the label.
338 E : label = BlockGraph::Label(new_name, new_label_attr);
339 E : CHECK(block->RemoveLabel(offset));
340 E : CHECK(block->SetLabel(offset, label));
341 :
342 E : return true;
343 E : }
344 :
345 : // The MS linker pads between code blocks with int3s.
346 : static const uint8 kInt3 = 0xCC;
347 :
348 : // If the given run of bytes consists of a single value repeated, returns that
349 : // value. Otherwise, returns -1.
350 E : int RepeatedValue(const uint8* data, size_t size) {
351 E : DCHECK(data != NULL);
352 E : const uint8* data_end = data + size;
353 E : uint8 value = *(data++);
354 E : for (; data < data_end; ++data) {
355 E : if (*data != value)
356 i : return -1;
357 E : }
358 E : return value;
359 E : }
360 :
361 : const BlockGraph::BlockId kNullBlockId(-1);
362 :
363 : void GetDisassemblyStartingPoints(
364 : const BlockGraph::Block* block,
365 : AbsoluteAddress abs_block_addr,
366 : const PEFile::RelocSet& reloc_set,
367 E : Disassembler::AddressSet* addresses) {
368 E : DCHECK(block != NULL);
369 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
370 E : DCHECK(addresses != NULL);
371 :
372 E : addresses->clear();
373 :
374 : // Use code labels as starting points.
375 E : BlockGraph::Block::LabelMap::const_iterator it(block->labels().begin());
376 E : for (; it != block->labels().end(); ++it) {
377 E : BlockGraph::Offset offset = it->first;
378 E : DCHECK_LE(0, offset);
379 E : DCHECK_GT(block->size(), static_cast<size_t>(offset));
380 :
381 E : if (it->second.has_attributes(BlockGraph::CODE_LABEL)) {
382 : // We sometimes receive code labels that land on lookup tables; we can
383 : // detect these because the label will point directly to a reloc. These
384 : // should have already been marked as data by now. DCHECK to validate.
385 : // TODO(chrisha): Get rid of this DCHECK, and allow mixed CODE and DATA
386 : // labels. Simply only use ones that are DATA only.
387 E : DCHECK_EQ(0u, reloc_set.count(block->addr() + offset));
388 :
389 E : addresses->insert(abs_block_addr + offset);
390 : }
391 E : }
392 E : }
393 :
394 : // Determines if the provided code block has the expected layout of code first,
395 : // data second. Returns true if so, false otherwise. Also returns the size of
396 : // the code portion of the block by trimming off any data labels.
397 : bool BlockHasExpectedCodeDataLayout(const BlockGraph::Block* block,
398 E : size_t* code_size) {
399 E : DCHECK(block != NULL);
400 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
401 E : DCHECK(code_size != NULL);
402 :
403 E : *code_size = block->data_size();
404 :
405 : BlockGraph::Block::LabelMap::const_reverse_iterator label_it =
406 E : block->labels().rbegin();
407 : BlockGraph::Block::LabelMap::const_reverse_iterator label_end =
408 E : block->labels().rend();
409 :
410 E : bool seen_non_data = false;
411 :
412 : // Walk through the labels in reverse order (by decreasing offset). Trim
413 : // any data labels from this blocks data_size.
414 E : for (; label_it != label_end; ++label_it) {
415 E : if (label_it->second.has_attributes(BlockGraph::DATA_LABEL)) {
416 : // We've encountered data not strictly at the end of the block. This
417 : // violates assumptions about code generated by cl.exe.
418 E : if (seen_non_data)
419 i : return false;
420 :
421 : // Otherwise, we're still in a run of data labels at the tail of the
422 : // block. Keep trimming the code size.
423 E : size_t offset = static_cast<size_t>(label_it->first);
424 E : if (offset < *code_size)
425 E : *code_size = offset;
426 E : } else {
427 E : seen_non_data = true;
428 : }
429 E : }
430 :
431 E : return true;
432 E : }
433 :
434 : // Given a compiland, returns its compiland details.
435 : bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
436 E : IDiaSymbol** compiland_details) {
437 E : DCHECK(compiland != NULL);
438 E : DCHECK(compiland_details != NULL);
439 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
440 :
441 E : *compiland_details = NULL;
442 :
443 : // Get the enumeration of compiland details.
444 E : ScopedComPtr<IDiaEnumSymbols> enum_symbols;
445 : HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
446 E : enum_symbols.Receive());
447 E : DCHECK_EQ(S_OK, hr);
448 :
449 : // We expect there to be compiland details. For compilands built by
450 : // non-standard toolchains, there usually aren't any.
451 E : LONG count = 0;
452 E : hr = enum_symbols->get_Count(&count);
453 E : DCHECK_EQ(S_OK, hr);
454 E : if (count == 0)
455 i : return false;
456 :
457 : // Get the compiland details.
458 E : ULONG fetched = 0;
459 E : hr = enum_symbols->Next(1, compiland_details, &fetched);
460 E : DCHECK_EQ(S_OK, hr);
461 E : DCHECK_EQ(1u, fetched);
462 E : return true;
463 E : }
464 :
465 : // Stores information regarding known compilers.
466 : struct KnownCompilerInfo {
467 : wchar_t* compiler_name;
468 : bool supported;
469 : };
470 :
471 : // A list of known compilers, and their status as being supported or not.
472 : KnownCompilerInfo kKnownCompilerInfos[] = {
473 : { L"Microsoft (R) Macro Assembler", false },
474 : { L"Microsoft (R) Optimizing Compiler", true },
475 : { L"Microsoft (R) LINK", false }
476 : };
477 :
478 : // Given a compiland, determines whether the compiler used is one of those that
479 : // we whitelist.
480 E : bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
481 E : DCHECK(compiland != NULL);
482 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
483 :
484 E : ScopedComPtr<IDiaSymbol> compiland_details;
485 : if (!GetCompilandDetailsForCompiland(compiland,
486 E : compiland_details.Receive())) {
487 : // If the compiland has no compiland details we assume the compiler is not
488 : // supported.
489 i : ScopedBstr compiland_name;
490 i : if (compiland->get_name(compiland_name.Receive()) == S_OK) {
491 i : VLOG(1) << "Compiland has no compiland details: "
492 : << com::ToString(compiland_name);
493 : }
494 i : return false;
495 : }
496 E : DCHECK(compiland_details.get() != NULL);
497 :
498 : // Get the compiler name.
499 E : ScopedBstr compiler_name;
500 E : HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
501 E : DCHECK_EQ(S_OK, hr);
502 :
503 : // Check the compiler name against the list of known compilers.
504 E : for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
505 E : if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
506 E : return kKnownCompilerInfos[i].supported;
507 : }
508 E : }
509 :
510 : // Anything we don't explicitly know about is not supported.
511 E : VLOG(1) << "Encountered unknown compiler: " << compiler_name;
512 E : return false;
513 E : }
514 :
515 : // Logs an error if @p error is true, a verbose logging message otherwise.
516 : #define LOG_ERROR_OR_VLOG1(error) LAZY_STREAM( \
517 : ::logging::LogMessage(__FILE__, \
518 : __LINE__, \
519 : (error) ? ::logging::LOG_ERROR : -1).stream(), \
520 : (error ? LOG_IS_ON(ERROR) : VLOG_IS_ON(1)))
521 :
522 : // Logs a warning if @p warn is true, a verbose logging message otherwise.
523 : #define LOG_WARNING_OR_VLOG1(warn) LAZY_STREAM( \
524 : ::logging::LogMessage(__FILE__, \
525 : __LINE__, \
526 : (warn) ? ::logging::LOG_WARNING : -1).stream(), \
527 : (warn ? LOG_IS_ON(WARNING) : VLOG_IS_ON(1)))
528 :
529 : // Sets the disassembler directive to an error if @p strict is true, otherwise
530 : // sets it to an early termination.
531 i : CallbackDirective AbortOrTerminateDisassembly(bool strict) {
532 i : if (strict)
533 i : return Disassembler::kDirectiveAbort;
534 i : else
535 i : return Disassembler::kDirectiveTerminateWalk;
536 i : }
537 :
538 : // Returns true if the callback-directive is an early termination that should be
539 : // returned immediately.
540 E : bool IsFatalCallbackDirective(CallbackDirective directive) {
541 E : switch (directive) {
542 : case Disassembler::kDirectiveContinue:
543 : case Disassembler::kDirectiveTerminatePath:
544 E : return false;
545 :
546 : case Disassembler::kDirectiveTerminateWalk:
547 : case Disassembler::kDirectiveAbort:
548 i : return true;
549 :
550 : default:
551 i : NOTREACHED();
552 : }
553 :
554 i : return true;
555 E : }
556 :
557 : // Combines two callback directives. Higher codes supersede lower ones.
558 : CallbackDirective CombineCallbackDirectives(CallbackDirective d1,
559 E : CallbackDirective d2) {
560 : // This ensures that this logic remains valid. This should prevent people
561 : // from tinkering with CallbackDirective and breaking this code.
562 : COMPILE_ASSERT(Disassembler::kDirectiveContinue <
563 : Disassembler::kDirectiveTerminatePath &&
564 : Disassembler::kDirectiveTerminatePath <
565 : Disassembler::kDirectiveTerminateWalk &&
566 : Disassembler::kDirectiveTerminateWalk <
567 : Disassembler::kDirectiveAbort,
568 : callback_directive_enum_is_not_sorted);
569 E : return std::max(d1, d2);
570 E : }
571 :
572 : // Determines if the given block has a data label in the given range of bytes.
573 : bool HasDataLabelInRange(const BlockGraph::Block* block,
574 : BlockGraph::Offset offset,
575 i : BlockGraph::Size size) {
576 : BlockGraph::Block::LabelMap::const_iterator it =
577 i : block->labels().lower_bound(offset);
578 : BlockGraph::Block::LabelMap::const_iterator end =
579 i : block->labels().lower_bound(offset + size);
580 :
581 i : for (; it != end; ++it) {
582 i : if (it->second.has_attributes(BlockGraph::DATA_LABEL))
583 i : return true;
584 i : }
585 :
586 i : return false;
587 i : }
588 :
589 : void ReportPotentialNonReturningFunction(
590 : const Decomposer::IntermediateReferenceMap& refs,
591 : const BlockGraph::AddressSpace& image,
592 : const BlockGraph::Block* block,
593 : BlockGraph::Offset call_ref_offset,
594 i : const char* reason) {
595 : typedef Decomposer::IntermediateReferenceMap::const_iterator RefIter;
596 :
597 : // Try and track down the block being pointed at by the call. If this is a
598 : // computed address there will be no reference.
599 i : RefIter ref_it = refs.find(block->addr() + call_ref_offset);
600 i : if (ref_it == refs.end()) {
601 i : LOG(WARNING) << "Suspected non-returning function call from offset "
602 : << call_ref_offset << " (followed by " << reason
603 : << ") of block \"" << block->name()
604 : << "\", but target can not be tracked down.";
605 i : return;
606 : }
607 :
608 i : BlockGraph::Block* target = image.GetBlockByAddress(ref_it->second.base);
609 i : DCHECK(target != NULL);
610 :
611 : // If this was marked as non-returning, then its not suspicious.
612 i : if ((target->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
613 i : return;
614 :
615 : // If the target is a code block then this is a direct call.
616 i : if (target->type() == BlockGraph::CODE_BLOCK) {
617 i : LOG(WARNING) << "Suspected non-returning call from offset "
618 : << call_ref_offset << " (followed by " << reason
619 : << ") of block \"" << block->name() << "\" to code block \""
620 : << target->name() << "\".";
621 i : return;
622 : }
623 : // Otherwise the target is a data block and this is a memory indirect call
624 : // to a thunk.
625 i : DCHECK_EQ(BlockGraph::DATA_BLOCK, target->type());
626 :
627 : // Track down the import thunk.
628 i : RefIter thunk_ref_it = refs.find(ref_it->second.base);
629 i : DCHECK(thunk_ref_it != refs.end());
630 i : BlockGraph::Block* thunk = image.GetBlockByAddress(thunk_ref_it->second.base);
631 :
632 : // If this was marked as non-returning, then its not suspicious.
633 i : if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
634 i : return;
635 :
636 : // Otherwise, this is an import thunk. Get the module and symbol names.
637 i : LOG(WARNING) << "Suspected non-returning call from offset "
638 : << call_ref_offset << " (followed by " << reason
639 : << ") of block \"" << block->name() << "\" to import thunk \""
640 : << thunk->name() << "\".";
641 i : }
642 :
643 : void LookForNonReturningFunctions(
644 : const Decomposer::IntermediateReferenceMap& refs,
645 : const BlockGraph::AddressSpace& image,
646 : const BlockGraph::Block* block,
647 i : const Disassembler& disasm) {
648 i : bool saw_call = false;
649 i : bool saw_call_then_nop = false;
650 i : BlockGraph::Offset call_ref_offset = 0;
651 :
652 i : AbsoluteAddress end_of_last_inst;
653 : Disassembler::VisitedSpace::const_iterator inst_it =
654 i : disasm.visited().begin();
655 i : for (; inst_it != disasm.visited().end(); ++inst_it) {
656 : // Not contiguous with the last instruction? Then we're spanning a gap. If
657 : // it's an instruction then we didn't parse it; thus, we already know that
658 : // if the last instruction is a call it's to a non-returning function. So,
659 : // we only need to check for data.
660 i : if (inst_it->first.start() != end_of_last_inst) {
661 i : if (saw_call || saw_call_then_nop) {
662 i : BlockGraph::Offset offset = end_of_last_inst - disasm.code_addr();
663 i : BlockGraph::Size size = inst_it->first.start() - end_of_last_inst;
664 i : if (HasDataLabelInRange(block, offset, size))
665 : // We do not expect this to ever occur in cl.exe generated code.
666 : // However, it is entirely possible in hand-written assembly.
667 : ReportPotentialNonReturningFunction(
668 : refs, image, block, call_ref_offset,
669 i : saw_call ? "data" : "nop(s) and data");
670 : }
671 :
672 i : saw_call = false;
673 i : saw_call_then_nop = false;
674 : }
675 :
676 i : _DInst inst = { 0 };
677 i : BlockGraph::Offset offset = inst_it->first.start() - disasm.code_addr();
678 i : const uint8* code = disasm.code() + offset;
679 i : CHECK(core::DecodeOneInstruction(code, inst_it->first.size(), &inst));
680 :
681 : // Previous instruction was a call?
682 i : if (saw_call) {
683 i : if (core::IsNop(inst)) {
684 i : saw_call_then_nop = true;
685 i : } else if (core::IsDebugInterrupt(inst)) {
686 : ReportPotentialNonReturningFunction(
687 i : refs, image, block, call_ref_offset, "int3");
688 : }
689 i : saw_call = false;
690 i : } else if (saw_call_then_nop) {
691 : // The previous instructions we've seen have been a call followed by
692 : // arbitrary many nops. Look for another nop to continue the pattern.
693 i : saw_call_then_nop = core::IsNop(inst);
694 i : } else {
695 : // The previous instruction was not a call, so we're looking for one.
696 : // If this instruction is a call, remember that fact and also remember
697 : // the offset of its operand (the call target).
698 i : if (core::IsCall(inst)) {
699 i : saw_call = true;
700 : call_ref_offset = offset + inst_it->first.size() -
701 i : BlockGraph::Reference::kMaximumSize;
702 : }
703 : }
704 :
705 : // Remember the end of the last instruction we processed.
706 i : end_of_last_inst = inst_it->first.end();
707 i : }
708 :
709 : // If the last instruction was a call and we've marked that we've disassembled
710 : // past the end, then this is also a suspected non-returning function.
711 : if ((saw_call || saw_call_then_nop) &&
712 i : (block->attributes() & BlockGraph::DISASSEMBLED_PAST_END) != 0) {
713 i : const char* reason = saw_call ? "end of block" : "nop(s) and end of block";
714 : ReportPotentialNonReturningFunction(
715 i : refs, image, block, call_ref_offset, reason);
716 : }
717 i : }
718 :
719 E : bool CodeBlockHasAlignedJumpTables(const BlockGraph::Block* block) {
720 E : DCHECK(block != NULL);
721 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
722 :
723 : // Iterate over the labels of this block looking for jump tables.
724 E : bool has_jump_tables = false;
725 : BlockGraph::Block::LabelMap::const_iterator label_it =
726 E : block->labels().begin();
727 E : for (; label_it != block->labels().end(); ++label_it) {
728 E : if (!label_it->second.has_attributes(BlockGraph::JUMP_TABLE_LABEL))
729 E : continue;
730 :
731 E : has_jump_tables = true;
732 :
733 : // If the jump table is misaligned we can return false immediately.
734 E : if (label_it->first % kPointerSize != 0)
735 i : return false;
736 E : }
737 :
738 E : return has_jump_tables;
739 E : }
740 :
741 E : bool AlignCodeBlocksWithJumpTables(ImageLayout* image_layout) {
742 E : DCHECK(image_layout != NULL);
743 :
744 : BlockGraph::AddressSpace::RangeMapConstIter block_it =
745 E : image_layout->blocks.begin();
746 E : for (; block_it != image_layout->blocks.end(); ++block_it) {
747 E : BlockGraph::Block* block = block_it->second;
748 :
749 : // We only care about code blocks that are already aligned 0 mod 4 but
750 : // whose explicit alignment is currently less than that.
751 E : if (block->type() != BlockGraph::CODE_BLOCK)
752 E : continue;
753 E : if (block->alignment() >= kPointerSize)
754 i : continue;
755 E : if (block_it->first.start().value() % kPointerSize != 0)
756 E : continue;
757 :
758 : // Inspect them to see if they have aligned jump tables. If they do,
759 : // set the alignment of the block itself.
760 E : if (CodeBlockHasAlignedJumpTables(block_it->second))
761 E : block->set_alignment(kPointerSize);
762 E : }
763 :
764 E : return true;
765 E : }
766 :
767 : } // namespace
768 :
769 : Decomposer::Decomposer(const PEFile& image_file)
770 : : image_(NULL),
771 : image_file_(image_file),
772 : current_block_(NULL),
773 E : be_strict_with_current_block_(true) {
774 : // Register static initializer patterns that we know are always present.
775 : // CRT C/C++/etc initializers.
776 E : CHECK(RegisterStaticInitializerPatterns("(__x.*)_a", "(__x.*)_z"));
777 : // RTC (run-time checks) initializers (part of CRT).
778 E : CHECK(RegisterStaticInitializerPatterns("(__rtc_[it])aa", "(__rtc_[it])zz"));
779 : // ATL object map initializers.
780 : CHECK(RegisterStaticInitializerPatterns("(__pobjMapEntry)First",
781 E : "(__pobjMapEntry)Last"));
782 : // Thread-local storage template.
783 E : CHECK(RegisterStaticInitializerPatterns("(_tls_)start", "(_tls_)end"));
784 :
785 : // Register non-returning functions that for some reason the symbols lie to
786 : // us about.
787 E : CHECK(RegisterNonReturningFunction("_CxxThrowException"));
788 E : CHECK(RegisterNonReturningFunction("_longjmp"));
789 :
790 : // Register non-returning imports that we know about.
791 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitProcess"));
792 E : CHECK(RegisterNonReturningImport("KERNEL32.dll", "ExitThread"));
793 E : }
794 :
795 E : bool Decomposer::Decompose(ImageLayout* image_layout) {
796 : // We start by finding the PDB path.
797 E : if (!FindAndValidatePdbPath())
798 E : return false;
799 E : DCHECK(!pdb_path_.empty());
800 :
801 : // Check if the block-graph has already been serialized into the PDB and load
802 : // it from here in this case. This allows round-trip decomposition.
803 E : bool stream_exists = false;
804 : if (LoadBlockGraphFromPdb(pdb_path_, image_file_, image_layout,
805 E : &stream_exists)) {
806 E : return true;
807 i : } else {
808 : // If the stream exists but hasn't been loaded we return an error. At this
809 : // point an error message has already been logged if there was one.
810 E : if (stream_exists)
811 i : return false;
812 : }
813 :
814 : // Move on to instantiating and initializing our Debug Interface Access
815 : // session.
816 E : ScopedComPtr<IDiaDataSource> dia_source;
817 E : if (!CreateDiaSource(dia_source.Receive()))
818 i : return false;
819 :
820 : // We create the session using the PDB file directly, as we've already
821 : // validated that it matches the module.
822 E : ScopedComPtr<IDiaSession> dia_session;
823 : if (!CreateDiaSession(pdb_path_,
824 : dia_source.get(),
825 E : dia_session.Receive())) {
826 i : return false;
827 : }
828 :
829 : HRESULT hr = dia_session->put_loadAddress(
830 E : image_file_.nt_headers()->OptionalHeader.ImageBase);
831 E : if (hr != S_OK) {
832 i : LOG(ERROR) << "Failed to set the DIA load address: "
833 : << com::LogHr(hr) << ".";
834 i : return false;
835 : }
836 :
837 E : ScopedComPtr<IDiaSymbol> global;
838 E : hr = dia_session->get_globalScope(global.Receive());
839 E : if (hr != S_OK) {
840 i : LOG(ERROR) << "Failed to get the DIA global scope: "
841 : << com::LogHr(hr) << ".";
842 i : return false;
843 : }
844 :
845 E : image_ = &image_layout->blocks;
846 :
847 : // Create the sections for the image.
848 E : bool success = CreateSections();
849 :
850 : // Load FIXUP information from the PDB file. We do this early on so that we
851 : // can do accounting with references that are created later on.
852 E : if (success)
853 E : success = LoadDebugStreams(dia_session);
854 :
855 : // Create intermediate references for each fixup entry.
856 E : if (success)
857 E : success = CreateReferencesFromFixups();
858 :
859 : // Chunk out important PE image structures, like the headers and such.
860 E : PEFileParser::PEHeader header;
861 E : if (success)
862 E : success = CreatePEImageBlocksAndReferences(&header);
863 :
864 : // Parse and validate the relocation entries.
865 E : if (success)
866 E : success = ParseRelocs();
867 :
868 : // Our first round of parsing is using section contributions. This creates
869 : // both code and data blocks.
870 E : if (success)
871 E : success = CreateBlocksFromSectionContribs(dia_session);
872 :
873 : // Process the function and thunk symbols in the image. This does not create
874 : // any blocks, as all functions are covered by section contributions.
875 E : if (success)
876 E : success = ProcessCodeSymbols(global);
877 :
878 : // Process data symbols. This can cause the creation of some blocks as the
879 : // data sections are not fully covered by section contributions.
880 E : if (success)
881 E : success = ProcessDataSymbols(global);
882 :
883 : // Create labels in code blocks.
884 E : if (success)
885 E : success = CreateGlobalLabels(global);
886 :
887 : // Create gap blocks. This ensures that we have complete coverage of the
888 : // entire image.
889 E : if (success)
890 E : success = CreateGapBlocks();
891 :
892 : // Parse public symbols, augmenting code and data labels where possible.
893 : // Some public symbols land on gap blocks, so they need to have been parsed
894 : // already.
895 E : if (success)
896 E : success = ProcessPublicSymbols(global);
897 :
898 : // Parse initialization bracketing symbols. This needs to happen after
899 : // PublicSymbols have been parsed.
900 E : if (success)
901 E : success = ProcessStaticInitializers();
902 :
903 : // We know that some data blocks need to have alignment precisely preserved.
904 : // For now, we very conservatively (guaranteed to be correct, but causes many
905 : // blocks to be aligned that don't strictly need alignment) guess alignment
906 : // for each block. This must be run after static initializers have been
907 : // parsed.
908 E : if (success)
909 E : success = GuessDataBlockAlignments();
910 :
911 : // Disassemble code blocks and create PC-relative references
912 E : if (success)
913 E : success = CreateCodeReferences();
914 :
915 : // Turn the address->address format references we've created into
916 : // block->block references on the blocks in the image.
917 E : if (success)
918 E : success = FinalizeIntermediateReferences();
919 :
920 : // Everything called after this points requires the references to have been
921 : // finalized.
922 :
923 : // One way of ensuring full coverage is to check that all of the fixups
924 : // were visited during decomposition.
925 E : if (success)
926 E : success = ConfirmFixupsVisited();
927 :
928 : // Now, find and label any padding blocks.
929 E : if (success)
930 E : success = FindPaddingBlocks();
931 :
932 : // Copy the image headers over to the layout.
933 E : if (success)
934 E : success = CopyHeaderToImageLayout(header.nt_headers, image_layout);
935 :
936 : // Set the alignment on code blocks with jump tables. This ensures that the
937 : // jump tables remain aligned post-transform.
938 E : if (success)
939 E : success = AlignCodeBlocksWithJumpTables(image_layout);
940 :
941 E : image_ = NULL;
942 :
943 E : return success;
944 E : }
945 :
946 E : bool Decomposer::FindAndValidatePdbPath() {
947 : // Manually find the PDB path if it is not specified.
948 E : if (pdb_path_.empty()) {
949 : if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
950 E : pdb_path_.empty()) {
951 i : LOG(ERROR) << "Unable to find PDB file for module: "
952 : << image_file_.path().value();
953 i : return false;
954 : }
955 : }
956 E : DCHECK(!pdb_path_.empty());
957 :
958 E : if (!file_util::PathExists(pdb_path_)) {
959 E : LOG(ERROR) << "Path not found: " << pdb_path_.value();
960 E : return false;
961 : }
962 :
963 : // Get the PDB info from the PDB file.
964 : pdb::PdbInfoHeader70 pdb_info_header;
965 E : if (!pdb::ReadPdbHeader(pdb_path_, &pdb_info_header)) {
966 i : LOG(ERROR) << "Unable to read PDB info header from PDB file: "
967 : << pdb_path_.value();
968 i : return false;
969 : }
970 :
971 : // Get the PDB info from the module.
972 E : PdbInfo pdb_info;
973 E : if (!pdb_info.Init(image_file_)) {
974 i : LOG(ERROR) << "Unable to read PDB info from PE file: "
975 : << image_file_.path().value();
976 i : return false;
977 : }
978 :
979 : // Ensure that they are consistent.
980 E : if (!pdb_info.IsConsistent(pdb_info_header)) {
981 i : LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
982 : << "module \"" << image_file_.path().value() << "\".";
983 i : return false;
984 : }
985 :
986 E : return true;
987 E : }
988 :
989 E : bool Decomposer::ProcessCodeSymbols(IDiaSymbol* global) {
990 E : if (!ProcessFunctionSymbols(global))
991 i : return false;
992 E : if (!ProcessThunkSymbols(global))
993 i : return false;
994 :
995 E : return true;
996 E : }
997 :
998 E : bool Decomposer::ProcessFunctionSymbols(IDiaSymbol* global) {
999 E : DCHECK(IsSymTag(global, SymTagExe));
1000 :
1001 : // Otherwise enumerate its offspring.
1002 E : ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1003 : HRESULT hr = global->findChildren(SymTagFunction,
1004 : NULL,
1005 : nsNone,
1006 E : dia_enum_symbols.Receive());
1007 E : if (hr != S_OK) {
1008 i : LOG(ERROR) << "Failed to get the DIA function enumerator: "
1009 : << com::LogHr(hr) << ".";
1010 i : return false;
1011 : }
1012 :
1013 E : LONG count = 0;
1014 E : if (dia_enum_symbols->get_Count(&count) != S_OK) {
1015 i : LOG(ERROR) << "Failed to get function enumeration length.";
1016 i : return false;
1017 : }
1018 :
1019 E : for (LONG visited = 0; visited < count; ++visited) {
1020 E : ScopedComPtr<IDiaSymbol> function;
1021 E : ULONG fetched = 0;
1022 E : hr = dia_enum_symbols->Next(1, function.Receive(), &fetched);
1023 E : if (hr != S_OK) {
1024 i : LOG(ERROR) << "Failed to enumerate functions: " << com::LogHr(hr) << ".";
1025 i : return false;
1026 : }
1027 E : if (fetched == 0)
1028 i : break;
1029 :
1030 : // Create the block representing the function.
1031 E : DCHECK(IsSymTag(function, SymTagFunction));
1032 E : if (!ProcessFunctionOrThunkSymbol(function))
1033 i : return false;
1034 E : }
1035 :
1036 E : return true;
1037 E : }
1038 :
1039 E : bool Decomposer::ProcessFunctionOrThunkSymbol(IDiaSymbol* function) {
1040 E : DCHECK(IsSymTag(function, SymTagFunction) || IsSymTag(function, SymTagThunk));
1041 :
1042 E : DWORD location_type = LocIsNull;
1043 E : HRESULT hr = E_FAIL;
1044 E : if (FAILED(hr = function->get_locationType(&location_type))) {
1045 i : LOG(ERROR) << "Failed to retrieve function address type: "
1046 : << com::LogHr(hr) << ".";
1047 i : return false;
1048 : }
1049 E : if (location_type != LocIsStatic) {
1050 i : DCHECK_EQ(static_cast<DWORD>(LocIsNull), location_type);
1051 i : return true;
1052 : }
1053 :
1054 E : DWORD rva = 0;
1055 E : ULONGLONG length = 0;
1056 E : ScopedBstr name;
1057 : if ((hr = function->get_relativeVirtualAddress(&rva)) != S_OK ||
1058 : (hr = function->get_length(&length)) != S_OK ||
1059 E : (hr = function->get_name(name.Receive())) != S_OK) {
1060 i : LOG(ERROR) << "Failed to retrieve function information: "
1061 : << com::LogHr(hr) << ".";
1062 i : return false;
1063 : }
1064 :
1065 : // Certain properties are not defined on all blocks, so the following calls
1066 : // may return S_FALSE.
1067 E : BOOL no_return = FALSE;
1068 E : if (function->get_noReturn(&no_return) != S_OK)
1069 E : no_return = FALSE;
1070 :
1071 E : BOOL has_inl_asm = FALSE;
1072 E : if (function->get_hasInlAsm(&has_inl_asm) != S_OK)
1073 E : has_inl_asm = FALSE;
1074 :
1075 E : BOOL has_eh = FALSE;
1076 E : if (function->get_hasEH(&has_eh) != S_OK)
1077 E : has_eh = FALSE;
1078 :
1079 E : BOOL has_seh = FALSE;
1080 E : if (function->get_hasSEH(&has_seh) != S_OK)
1081 E : has_seh = FALSE;
1082 :
1083 E : std::string block_name;
1084 E : if (!WideToUTF8(name, name.Length(), &block_name)) {
1085 i : LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1086 i : return false;
1087 : }
1088 :
1089 : // Find the block to which this symbol maps, and ensure it fully covers the
1090 : // symbol.
1091 E : RelativeAddress block_addr(rva);
1092 E : BlockGraph::Block* block = image_->GetBlockByAddress(block_addr);
1093 E : if (block == NULL) {
1094 i : LOG(ERROR) << "No block found for function/thunk symbol \""
1095 : << block_name << "\".";
1096 i : return false;
1097 : }
1098 E : if (block->addr() + block->size() < block_addr + length) {
1099 i : LOG(ERROR) << "Section contribution \"" << block->name() << "\" does not "
1100 : << "fully cover function/thunk symbol \"" << block_name << "\".";
1101 i : return false;
1102 : }
1103 :
1104 : // Annotate the block with a label, as this is an entry point to it. This is
1105 : // the routine that adds labels, so there should never be any collisions.
1106 E : CHECK(AddLabelToBlock(block_addr, block_name, BlockGraph::CODE_LABEL, block));
1107 :
1108 : // If we didn't get an explicit no-return flag from the symbols check our
1109 : // list of exceptions.
1110 E : if (no_return == FALSE && non_returning_functions_.count(block->name()) > 0) {
1111 E : VLOG(1) << "Forcing non-returning attribute on function \""
1112 : << block->name() << "\".";
1113 E : no_return = TRUE;
1114 : }
1115 :
1116 : // Set the block attributes.
1117 E : if (no_return == TRUE)
1118 E : block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1119 E : if (has_inl_asm == TRUE)
1120 E : block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1121 E : if (has_eh || has_seh)
1122 E : block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1123 E : if (IsSymTag(function, SymTagThunk))
1124 E : block->set_attribute(BlockGraph::THUNK);
1125 :
1126 E : if (!CreateLabelsForFunction(function, block)) {
1127 i : LOG(ERROR) << "Failed to create labels for '" << block->name() << "'.";
1128 i : return false;
1129 : }
1130 :
1131 E : return true;
1132 E : }
1133 :
1134 : bool Decomposer::CreateLabelsForFunction(IDiaSymbol* function,
1135 E : BlockGraph::Block* block) {
1136 E : DCHECK(function != NULL);
1137 E : DCHECK(block != NULL);
1138 :
1139 : // Lookup the block address.
1140 E : RelativeAddress block_addr;
1141 E : if (!image_->GetAddressOf(block, &block_addr)) {
1142 i : NOTREACHED() << "Block " << block->name() << " has no address.";
1143 i : return false;
1144 : }
1145 :
1146 : // Enumerate all symbols which are children of function.
1147 E : ScopedComPtr<IDiaEnumSymbols> dia_enum_symbols;
1148 : HRESULT hr = function->findChildren(SymTagNull,
1149 : NULL,
1150 : nsNone,
1151 E : dia_enum_symbols.Receive());
1152 E : if (FAILED(hr)) {
1153 i : LOG(ERROR) << "Failed to get the DIA label enumerator: "
1154 : << com::LogHr(hr) << ".";
1155 i : return false;
1156 : }
1157 :
1158 E : while (true) {
1159 E : ScopedComPtr<IDiaSymbol> symbol;
1160 E : ULONG fetched = 0;
1161 E : hr = dia_enum_symbols->Next(1, symbol.Receive(), &fetched);
1162 E : if (FAILED(hr)) {
1163 i : LOG(ERROR) << "Failed to enumerate the DIA symbol: "
1164 : << com::LogHr(hr) << ".";
1165 i : return false;
1166 : }
1167 E : if (hr != S_OK || fetched == 0)
1168 E : break;
1169 :
1170 : // If it doesn't have an RVA then it's not interesting to us.
1171 E : DWORD temp_rva = 0;
1172 E : if (symbol->get_relativeVirtualAddress(&temp_rva) != S_OK)
1173 E : continue;
1174 :
1175 : // Get the type of symbol we're looking at.
1176 E : DWORD temp_sym_tag = 0;
1177 E : if (symbol->get_symTag(&temp_sym_tag) != S_OK) {
1178 i : LOG(ERROR) << "Failed to retrieve label information.";
1179 i : return false;
1180 : }
1181 :
1182 E : enum SymTagEnum sym_tag = static_cast<enum SymTagEnum>(temp_sym_tag);
1183 :
1184 : #if defined(_MSC_VER) && (_MSC_VER >= 1700)
1185 : // Since VS 2012 there's some new symbols exposed by DIA which are not
1186 : // handled at the moment.
1187 : // TODO(sebmarchand): Handle those symbols.
1188 : if (sym_tag == SymTagInlineSite)
1189 : continue;
1190 : #endif
1191 :
1192 E : BlockGraph::LabelAttributes label_attr = SymTagToLabelAttributes(sym_tag);
1193 :
1194 : // TODO(rogerm): Add a flag to include/exclude the symbol types that are
1195 : // interesting for debugging purposes, but not actually needed for
1196 : // decomposition: FuncDebugStart/End, Block, etc.
1197 :
1198 : // We ignore labels that fall outside of the code block. We sometimes
1199 : // get labels at the end of a code block, and if the binary has any OMAP
1200 : // information these follow the original successor block, and they can
1201 : // end up most anywhere in the binary.
1202 E : RelativeAddress label_rva(temp_rva);
1203 E : if (label_rva < block_addr || label_rva >= block_addr + block->size())
1204 E : continue;
1205 :
1206 : // Extract the symbol's name.
1207 E : std::string label_name;
1208 : {
1209 E : ScopedBstr temp_name;
1210 : if (symbol->get_name(temp_name.Receive()) == S_OK &&
1211 E : !WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1212 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1213 i : return false;
1214 : }
1215 E : }
1216 :
1217 : // Not all symbols have a name, if we've found one without a name, make
1218 : // one up.
1219 E : BlockGraph::Offset offset = label_rva - block_addr;
1220 E : if (label_name.empty()) {
1221 E : switch (sym_tag) {
1222 : case SymTagFuncDebugStart: {
1223 E : label_name = "<debug-start>";
1224 E : break;
1225 : }
1226 :
1227 : case SymTagFuncDebugEnd: {
1228 E : label_name = "<debug-end>";
1229 E : break;
1230 : }
1231 :
1232 : case SymTagData: {
1233 E : if (reloc_set_.count(label_rva)) {
1234 E : label_name = base::StringPrintf("<jump-table-%d>", offset);
1235 E : label_attr |= BlockGraph::JUMP_TABLE_LABEL;
1236 E : } else {
1237 E : label_name = base::StringPrintf("<case-table-%d>", offset);
1238 E : label_attr |= BlockGraph::CASE_TABLE_LABEL;
1239 : }
1240 E : break;
1241 : }
1242 :
1243 : case SymTagBlock: {
1244 E : label_name = "<scope-start>";
1245 E : break;
1246 : }
1247 :
1248 : // The DIA SDK shipping with MSVS 2010 includes additional symbol types.
1249 : case SymTagCallSite: {
1250 E : label_name = "<call-site>";
1251 E : break;
1252 : }
1253 :
1254 : default: {
1255 i : LOG(WARNING) << "Unexpected symbol type " << sym_tag << " in "
1256 : << block->name() << " at "
1257 : << base::StringPrintf("0x%08X.", label_rva.value());
1258 i : label_name = base::StringPrintf("<anonymous-%d>", sym_tag);
1259 : }
1260 : }
1261 : }
1262 :
1263 : // We expect that we'll never see a code label that refers to a reloc.
1264 : // This happens sometimes, however, as we generally get a code label for
1265 : // the first byte after a switch statement. This can sometimes land on the
1266 : // following jump table.
1267 E : if ((label_attr & BlockGraph::CODE_LABEL) && reloc_set_.count(label_rva)) {
1268 E : VLOG(1) << "Collision between reloc and code label in "
1269 : << block->name() << " at " << label_name
1270 : << base::StringPrintf(" (0x%08X).", label_rva.value())
1271 : << " Falling back to data label.";
1272 E : label_attr = BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL;
1273 E : DCHECK_EQ(block_addr, block->addr());
1274 E : BlockGraph::Label label;
1275 : if (block->GetLabel(offset, &label) &&
1276 E : !label.has_attributes(BlockGraph::DATA_LABEL)) {
1277 i : VLOG(1) << block->name() << ": Replacing label " << label.name()
1278 : << " ("
1279 : << BlockGraph::LabelAttributesToString(label.attributes())
1280 : << ") at offset " << offset << ".";
1281 i : block->RemoveLabel(offset);
1282 : }
1283 E : }
1284 :
1285 : // Add the label to the block.
1286 E : if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1287 i : LOG(ERROR) << "Failed to add label to code block.";
1288 i : return false;
1289 : }
1290 :
1291 : // Is this a scope? Then it also has a length. Use it to create the matching
1292 : // scope end.
1293 E : if (sym_tag == SymTagBlock) {
1294 E : ULONGLONG length = 0;
1295 E : if (symbol->get_length(&length) != S_OK) {
1296 i : LOG(ERROR) << "Failed to extract code scope length for "
1297 : << block->name();
1298 i : return false;
1299 : }
1300 E : label_rva += length;
1301 E : label_name = "<scope-end>";
1302 E : label_attr = BlockGraph::SCOPE_END_LABEL;
1303 E : if (!AddLabelToBlock(label_rva, label_name, label_attr, block)) {
1304 i : LOG(ERROR) << "Failed to add label to code block.";
1305 i : return false;
1306 : }
1307 : }
1308 E : }
1309 :
1310 E : return true;
1311 E : }
1312 :
1313 E : bool Decomposer::ProcessThunkSymbols(IDiaSymbol* globals) {
1314 E : ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1315 : HRESULT hr = globals->findChildren(SymTagCompiland,
1316 : NULL,
1317 : nsNone,
1318 E : enum_compilands.Receive());
1319 E : if (FAILED(hr)) {
1320 i : LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1321 : << com::LogHr(hr) << ".";
1322 i : return false;
1323 : }
1324 :
1325 E : while (true) {
1326 E : ScopedComPtr<IDiaSymbol> compiland;
1327 E : ULONG fetched = 0;
1328 E : hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1329 E : if (FAILED(hr)) {
1330 i : LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1331 : << com::LogHr(hr) << ".";
1332 i : return false;
1333 : }
1334 E : if (hr != S_OK || fetched == 0)
1335 E : break;
1336 :
1337 E : ScopedComPtr<IDiaEnumSymbols> enum_thunks;
1338 : hr = compiland->findChildren(SymTagThunk,
1339 : NULL,
1340 : nsNone,
1341 E : enum_thunks.Receive());
1342 E : if (FAILED(hr)) {
1343 i : LOG(ERROR) << "Failed to retrieve thunk enumerator: "
1344 : << com::LogHr(hr) << ".";
1345 i : return false;
1346 : }
1347 :
1348 E : while (true) {
1349 E : ScopedComPtr<IDiaSymbol> thunk;
1350 E : hr = enum_thunks->Next(1, thunk.Receive(), &fetched);
1351 E : if (FAILED(hr)) {
1352 i : LOG(ERROR) << "Failed to enumerate thunk enumerator: "
1353 : << com::LogHr(hr) << ".";
1354 i : return false;
1355 : }
1356 E : if (hr != S_OK || fetched == 0)
1357 E : break;
1358 :
1359 E : DCHECK(IsSymTag(thunk, SymTagThunk));
1360 :
1361 E : if (!ProcessFunctionOrThunkSymbol(thunk))
1362 i : return false;
1363 E : }
1364 E : }
1365 :
1366 E : return true;
1367 E : }
1368 :
1369 E : bool Decomposer::CreateGlobalLabels(IDiaSymbol* globals) {
1370 E : ScopedComPtr<IDiaEnumSymbols> enum_compilands;
1371 : HRESULT hr = globals->findChildren(SymTagCompiland,
1372 : NULL,
1373 : nsNone,
1374 E : enum_compilands.Receive());
1375 E : if (FAILED(hr)) {
1376 i : LOG(ERROR) << "Failed to retrieve compiland enumerator: "
1377 : << com::LogHr(hr) << ".";
1378 i : return false;
1379 : }
1380 :
1381 E : while (true) {
1382 E : ScopedComPtr<IDiaSymbol> compiland;
1383 E : ULONG fetched = 0;
1384 E : hr = enum_compilands->Next(1, compiland.Receive(), &fetched);
1385 E : if (FAILED(hr)) {
1386 i : LOG(ERROR) << "Failed to enumerate compiland enumerator: "
1387 : << com::LogHr(hr) << ".";
1388 i : return false;
1389 : }
1390 E : if (hr != S_OK || fetched == 0)
1391 E : break;
1392 :
1393 E : ScopedComPtr<IDiaEnumSymbols> enum_labels;
1394 : hr = compiland->findChildren(SymTagLabel,
1395 : NULL,
1396 : nsNone,
1397 E : enum_labels.Receive());
1398 E : if (FAILED(hr)) {
1399 i : LOG(ERROR) << "Failed to retrieve label enumerator: "
1400 : << com::LogHr(hr) << ".";
1401 i : return false;
1402 : }
1403 :
1404 E : while (true) {
1405 E : ScopedComPtr<IDiaSymbol> label;
1406 E : hr = enum_labels->Next(1, label.Receive(), &fetched);
1407 E : if (FAILED(hr)) {
1408 i : LOG(ERROR) << "Failed to enumerate label enumerator: "
1409 : << com::LogHr(hr) << ".";
1410 i : return false;
1411 : }
1412 E : if (hr != S_OK || fetched == 0)
1413 E : break;
1414 :
1415 E : DCHECK(IsSymTag(label, SymTagLabel));
1416 :
1417 E : DWORD addr = 0;
1418 E : ScopedBstr temp_name;
1419 : if (label->get_relativeVirtualAddress(&addr) != S_OK ||
1420 E : label->get_name(temp_name.Receive()) != S_OK) {
1421 i : LOG(ERROR) << "Failed to retrieve label address or name.";
1422 i : return false;
1423 : }
1424 :
1425 E : std::string label_name;
1426 E : if (!WideToUTF8(temp_name, temp_name.Length(), &label_name)) {
1427 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1428 i : return false;
1429 : }
1430 :
1431 E : RelativeAddress label_addr(addr);
1432 E : BlockGraph::Block* block = image_->GetBlockByAddress(label_addr);
1433 E : if (block == NULL) {
1434 i : LOG(ERROR) << "No block for label " << label_name << " at " << addr;
1435 i : return false;
1436 : }
1437 :
1438 : if (!AddLabelToBlock(label_addr,
1439 : label_name,
1440 : BlockGraph::CODE_LABEL,
1441 E : block)) {
1442 i : LOG(ERROR) << "Failed to add label to code block.";
1443 i : return false;
1444 : }
1445 E : }
1446 E : }
1447 :
1448 E : return true;
1449 E : }
1450 :
1451 : bool Decomposer::CreateGapBlock(BlockGraph::BlockType block_type,
1452 : RelativeAddress address,
1453 E : BlockGraph::Size size) {
1454 : BlockGraph::Block* block = FindOrCreateBlock(block_type, address, size,
1455 : base::StringPrintf("Gap Block 0x%08X", address.value()).c_str(),
1456 E : kExpectNoBlock);
1457 E : if (block == NULL) {
1458 i : LOG(ERROR) << "Unable to create gap block.";
1459 i : return false;
1460 : }
1461 E : block->set_attribute(BlockGraph::GAP_BLOCK);
1462 :
1463 E : return true;
1464 E : }
1465 :
1466 : bool Decomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
1467 E : BlockGraph::BlockType block_type) {
1468 E : RelativeAddress section_begin(header->VirtualAddress);
1469 E : RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
1470 : RelativeAddress image_end(
1471 E : image_file_.nt_headers()->OptionalHeader.SizeOfImage);
1472 :
1473 : // Search for the first and last blocks interesting from the start and end
1474 : // of the section to the end of the image.
1475 : BlockGraph::AddressSpace::RangeMap::const_iterator it(
1476 : image_->address_space_impl().FindFirstIntersection(
1477 : BlockGraph::AddressSpace::Range(section_begin,
1478 E : image_end - section_begin)));
1479 :
1480 : BlockGraph::AddressSpace::RangeMap::const_iterator end =
1481 E : image_->address_space_impl().end();
1482 E : if (section_end < image_end) {
1483 : end = image_->address_space_impl().FindFirstIntersection(
1484 : BlockGraph::AddressSpace::Range(section_end,
1485 E : image_end - section_end));
1486 : }
1487 :
1488 : // The whole section is missing. Cover it with one gap block.
1489 E : if (it == end)
1490 : return CreateGapBlock(
1491 i : block_type, section_begin, section_end - section_begin);
1492 :
1493 : // Create the head gap block if need be.
1494 E : if (section_begin < it->first.start())
1495 : if (!CreateGapBlock(
1496 i : block_type, section_begin, it->first.start() - section_begin))
1497 i : return false;
1498 :
1499 : // Now iterate the blocks and fill in gaps.
1500 E : for (; it != end; ++it) {
1501 E : const BlockGraph::Block* block = it->second;
1502 E : DCHECK(block != NULL);
1503 E : RelativeAddress block_end = it->first.start() + block->size();
1504 E : if (block_end >= section_end)
1505 E : break;
1506 :
1507 : // Walk to the next address in turn.
1508 E : BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
1509 E : ++next;
1510 E : if (next == end) {
1511 : // We're at the end of the list. Create the tail gap block.
1512 E : DCHECK_GT(section_end, block_end);
1513 E : if (!CreateGapBlock(block_type, block_end, section_end - block_end))
1514 i : return false;
1515 E : break;
1516 : }
1517 :
1518 : // Create the interstitial gap block.
1519 E : if (block_end < next->first.start())
1520 : if (!CreateGapBlock(
1521 E : block_type, block_end, next->first.start() - block_end))
1522 i : return false;
1523 E : }
1524 :
1525 E : return true;
1526 E : }
1527 :
1528 E : bool Decomposer::CreateGapBlocks() {
1529 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1530 :
1531 : // Iterate through all the image sections.
1532 E : for (size_t i = 0; i < num_sections; ++i) {
1533 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1534 E : DCHECK(header != NULL);
1535 :
1536 E : BlockGraph::BlockType type = BlockGraph::CODE_BLOCK;
1537 E : const char* section_type = NULL;
1538 E : switch (GetSectionType(header)) {
1539 : case kSectionCode:
1540 E : type = BlockGraph::CODE_BLOCK;
1541 E : section_type = "code";
1542 E : break;
1543 :
1544 : case kSectionData:
1545 E : type = BlockGraph::DATA_BLOCK;
1546 E : section_type = "data";
1547 E : break;
1548 :
1549 : default:
1550 i : continue;
1551 : }
1552 :
1553 E : if (!CreateSectionGapBlocks(header, type)) {
1554 i : LOG(ERROR) << "Unable to create gap blocks for " << section_type
1555 : << " section \"" << header->Name << "\".";
1556 i : return false;
1557 : }
1558 E : }
1559 :
1560 E : return true;
1561 E : }
1562 :
1563 : bool Decomposer::AddReferenceCallback(RelativeAddress src_addr,
1564 : BlockGraph::ReferenceType type,
1565 : BlockGraph::Size size,
1566 E : RelativeAddress dst_addr) {
1567 : // This is only called by the PEFileParser, and it creates some references
1568 : // for which there are no corresponding fixup entries.
1569 : return ValidateOrAddReference(FIXUP_MAY_EXIST, src_addr, type, size, dst_addr,
1570 E : 0, &fixup_map_, &references_);
1571 E : }
1572 :
1573 E : bool Decomposer::ParseRelocs() {
1574 E : if (!image_file_.DecodeRelocs(&reloc_set_)) {
1575 i : LOG(ERROR) << "Unable to decode image relocs.";
1576 i : return false;
1577 : }
1578 :
1579 E : PEFile::RelocMap reloc_map;
1580 E : if (!image_file_.ReadRelocs(reloc_set_, &reloc_map)) {
1581 i : LOG(ERROR) << "Unable to read image relocs.";
1582 i : return false;
1583 : }
1584 :
1585 : // Validate each relocation entry against the corresponding fixup entry.
1586 E : if (!ValidateRelocs(reloc_map))
1587 i : return false;
1588 :
1589 E : return true;
1590 E : }
1591 :
1592 E : bool Decomposer::CreateReferencesFromFixups() {
1593 E : FixupMap::const_iterator it(fixup_map_.begin());
1594 E : for (; it != fixup_map_.end(); ++it) {
1595 E : RelativeAddress src_addr(it->second.location);
1596 E : uint32 data = 0;
1597 E : if (!image_file_.ReadImage(src_addr, &data, sizeof(data))) {
1598 i : LOG(ERROR) << "Unable to read image data for fixup with source at "
1599 : << src_addr;
1600 i : return false;
1601 : }
1602 :
1603 E : RelativeAddress dst_base(it->second.base);
1604 E : BlockGraph::Offset dst_offset = 0;
1605 E : switch (it->second.type) {
1606 : case BlockGraph::PC_RELATIVE_REF: {
1607 E : dst_offset = src_addr + kPointerSize + data - dst_base;
1608 E : break;
1609 : }
1610 :
1611 : case BlockGraph::ABSOLUTE_REF: {
1612 E : dst_offset = image_file_.AbsToRelDisplacement(data) - dst_base.value();
1613 E : break;
1614 : }
1615 :
1616 : case BlockGraph::RELATIVE_REF: {
1617 E : dst_offset = data - dst_base.value();
1618 E : break;
1619 : }
1620 :
1621 : default: {
1622 i : NOTREACHED() << "Invalid reference type.";
1623 i : return false;
1624 : }
1625 : }
1626 :
1627 : if (!AddReference(src_addr, it->second.type, kPointerSize, dst_base,
1628 E : dst_offset, &references_)) {
1629 i : return false;
1630 : }
1631 E : }
1632 :
1633 E : return true;
1634 E : }
1635 :
1636 E : bool Decomposer::ValidateRelocs(const PEFile::RelocMap& reloc_map) {
1637 E : PEFile::RelocMap::const_iterator it(reloc_map.begin());
1638 E : PEFile::RelocMap::const_iterator end(reloc_map.end());
1639 E : for (; it != end; ++it) {
1640 E : RelativeAddress src(it->first);
1641 E : RelativeAddress dummy;
1642 :
1643 : if (!ValidateOrAddReference(
1644 : FIXUP_MUST_EXIST, src, BlockGraph::ABSOLUTE_REF,
1645 E : sizeof(dummy), dummy, 0, &fixup_map_, &references_)) {
1646 i : return false;
1647 : }
1648 E : }
1649 :
1650 E : return true;
1651 E : }
1652 :
1653 E : bool Decomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1654 E : ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1655 : SearchResult search_result = FindDiaTable(session,
1656 E : section_contribs.Receive());
1657 E : if (search_result != kSearchSucceeded) {
1658 i : if (search_result == kSearchFailed)
1659 i : LOG(ERROR) << "No section contribution table found.";
1660 i : return false;
1661 : }
1662 :
1663 E : size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1664 :
1665 E : LONG count = 0;
1666 E : if (section_contribs->get_Count(&count) != S_OK) {
1667 i : LOG(ERROR) << "Failed to get section contributions enumeration length.";
1668 i : return false;
1669 : }
1670 :
1671 E : for (LONG visited = 0; visited < count; ++visited) {
1672 E : ScopedComPtr<IDiaSectionContrib> section_contrib;
1673 E : ULONG fetched = 0;
1674 E : HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1675 E : if (hr != S_OK) {
1676 i : LOG(ERROR) << "Failed to get DIA section contribution: "
1677 : << com::LogHr(hr) << ".";
1678 i : return false;
1679 : }
1680 E : if (fetched == 0)
1681 i : break;
1682 :
1683 E : hr = E_FAIL;
1684 E : DWORD rva = 0;
1685 E : DWORD length = 0;
1686 E : DWORD section_id = 0;
1687 E : BOOL code = FALSE;
1688 E : ScopedComPtr<IDiaSymbol> compiland;
1689 E : ScopedBstr bstr_name;
1690 : if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1691 : (hr = section_contrib->get_length(&length)) != S_OK ||
1692 : (hr = section_contrib->get_addressSection(§ion_id)) != S_OK ||
1693 : (hr = section_contrib->get_code(&code)) != S_OK ||
1694 : (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1695 E : (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1696 i : LOG(ERROR) << "Failed to get section contribution properties: "
1697 : << com::LogHr(hr) << ".";
1698 i : return false;
1699 : }
1700 :
1701 : // Determine if this function was built by a supported compiler.
1702 : bool is_built_by_supported_compiler =
1703 E : IsBuiltBySupportedCompiler(compiland.get());
1704 :
1705 : // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1706 E : DCHECK_LT(0u, section_id);
1707 E : --section_id;
1708 :
1709 : // We don't parse the resource section, as it is parsed by the PEFileParser.
1710 E : if (section_id == rsrc_id)
1711 E : continue;
1712 :
1713 E : std::string name;
1714 E : if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1715 i : LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1716 i : return false;
1717 : }
1718 :
1719 : // Create the block.
1720 : BlockGraph::BlockType block_type =
1721 E : code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1722 : BlockGraph::Block* block = FindOrCreateBlock(block_type,
1723 : RelativeAddress(rva),
1724 : length,
1725 : name.c_str(),
1726 E : kExpectNoBlock);
1727 E : if (block == NULL) {
1728 i : LOG(ERROR) << "Unable to create block.";
1729 i : return false;
1730 : }
1731 :
1732 : // Set the block compiland name.
1733 E : block->set_compiland_name(name);
1734 :
1735 : // Set the block attributes.
1736 E : block->set_attribute(BlockGraph::SECTION_CONTRIB);
1737 E : if (!is_built_by_supported_compiler)
1738 E : block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1739 E : }
1740 :
1741 E : return true;
1742 E : }
1743 :
1744 : DiaBrowser::BrowserDirective Decomposer::OnDataSymbol(
1745 : const DiaBrowser& dia_browser,
1746 : const DiaBrowser::SymTagVector& sym_tags,
1747 E : const DiaBrowser::SymbolPtrVector& symbols) {
1748 E : DCHECK_LT(0u, sym_tags.size());
1749 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1750 E : DCHECK_EQ(SymTagData, sym_tags.back());
1751 :
1752 E : const DiaBrowser::SymbolPtr& data(symbols.back());
1753 :
1754 E : HRESULT hr = E_FAIL;
1755 E : DWORD location_type = LocIsNull;
1756 E : DWORD rva = 0;
1757 E : ScopedBstr name_bstr;
1758 : if (FAILED(hr = data->get_locationType(&location_type)) ||
1759 : FAILED(hr = data->get_relativeVirtualAddress(&rva)) ||
1760 E : FAILED(hr = data->get_name(name_bstr.Receive()))) {
1761 i : LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1762 i : return DiaBrowser::kBrowserAbort;
1763 : }
1764 :
1765 : // We only parse data symbols with static storage.
1766 E : if (location_type != LocIsStatic)
1767 E : return DiaBrowser::kBrowserContinue;
1768 :
1769 : // Symbols with an address of zero are essentially invalid. They appear to
1770 : // have been optimized away by the compiler, but they are still reported.
1771 E : if (rva == 0)
1772 E : return DiaBrowser::kBrowserContinue;
1773 :
1774 : // TODO(chrisha): We eventually want to get alignment info from the type
1775 : // information. This is strictly a lower bound, however, as certain
1776 : // data may be used in instructions that impose stricter alignment
1777 : // requirements.
1778 E : size_t length = 0;
1779 E : if (!GetTypeInfo(data, &length)) {
1780 i : return DiaBrowser::kBrowserAbort;
1781 : }
1782 : // Zero-length data symbols act as 'forward declares' in some sense. They
1783 : // are always followed by a non-zero length data symbol with the same name
1784 : // and location.
1785 E : if (length == 0)
1786 E : return DiaBrowser::kBrowserContinue;
1787 :
1788 E : RelativeAddress addr(rva);
1789 E : std::string name;
1790 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1791 i : LOG(ERROR) << "Failed to convert data symbol name to UTF8.";
1792 i : return DiaBrowser::kBrowserAbort;
1793 : }
1794 :
1795 : // In general we expect data symbols to be completely contained by a block.
1796 : // The data symbol can exceed the size of the block in the case of data
1797 : // imports. For some reason the toolchain emits a global data symbol with
1798 : // type information equal to the type of the data *pointed* to by the import
1799 : // entry rather than the type of the entry itself. Thus, if the data type
1800 : // is bigger than the entire IAT this symbol will exceed it. To complicate
1801 : // matters even more, a poorly written module can import its own export in
1802 : // which case a linker generated pseudo-import-entry block will be
1803 : // generated. This won't be part of the IAT, so we can't even filter based
1804 : // on that. Instead, we simply ignore global data symbols that exceed the
1805 : // block size.
1806 E : FindOrCreateBlockDirective directive = kAllowCoveringBlock;
1807 E : base::StringPiece spname(name);
1808 E : if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
1809 : // For global data symbols (no parent symbols) to imported data ("_imp_"
1810 : // prefix) we allow partially covering blocks.
1811 E : directive = kAllowPartialCoveringBlock;
1812 : }
1813 :
1814 : BlockGraph::Block* block = FindOrCreateBlock(BlockGraph::DATA_BLOCK,
1815 : addr, length, spname,
1816 E : directive);
1817 :
1818 : // We've seen null blocks for some symbols in modules compiled using a custom
1819 : // non-Microsoft toolchain.
1820 E : if (block == NULL) {
1821 i : LOG(ERROR) << "Failed to get a block for symbol named " << name << ".";
1822 i : return DiaBrowser::kBrowserAbort;
1823 : }
1824 :
1825 E : if (block->type() == BlockGraph::CODE_BLOCK) {
1826 : // The NativeClient bits of chrome.dll consists of hand-written assembly
1827 : // that is compiled using a custom non-Microsoft toolchain. Unfortunately
1828 : // for us this toolchain emits 1-byte data symbols instead of code labels.
1829 : static const char kNaClPrefix[] = "NaCl";
1830 : if (length == 1 &&
1831 E : name.compare(0, arraysize(kNaClPrefix) - 1, kNaClPrefix) == 0) {
1832 i : if (!AddLabelToBlock(addr, name, BlockGraph::CODE_LABEL, block)) {
1833 i : LOG(ERROR) << "Failed to add label to code block.";
1834 i : return DiaBrowser::kBrowserAbort;
1835 : }
1836 :
1837 i : return DiaBrowser::kBrowserContinue;
1838 : }
1839 : }
1840 :
1841 E : if (!AddLabelToBlock(addr, name, BlockGraph::DATA_LABEL, block)) {
1842 i : LOG(ERROR) << "Failed to add data label to block.";
1843 i : return DiaBrowser::kBrowserAbort;
1844 : }
1845 :
1846 E : return DiaBrowser::kBrowserContinue;
1847 E : }
1848 :
1849 : DiaBrowser::BrowserDirective Decomposer::OnPublicSymbol(
1850 : const DiaBrowser& dia_browser,
1851 : const DiaBrowser::SymTagVector& sym_tags,
1852 E : const DiaBrowser::SymbolPtrVector& symbols) {
1853 E : DCHECK_LT(0u, sym_tags.size());
1854 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1855 E : DCHECK_EQ(SymTagPublicSymbol, sym_tags.back());
1856 E : const DiaBrowser::SymbolPtr& symbol(symbols.back());
1857 :
1858 : // We don't care about symbols that don't have addresses.
1859 E : DWORD rva = 0;
1860 E : if (S_OK != symbol->get_relativeVirtualAddress(&rva))
1861 E : return DiaBrowser::kBrowserContinue;
1862 :
1863 E : ScopedBstr name_bstr;
1864 E : if (S_OK != symbol->get_name(name_bstr.Receive())) {
1865 i : LOG(ERROR) << "Failed to get public symbol name.";
1866 i : return DiaBrowser::kBrowserAbort;
1867 : }
1868 :
1869 E : std::string name;
1870 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1871 i : LOG(ERROR) << "Failed to convert symbol name to UTF8.";
1872 i : return DiaBrowser::kBrowserAbort;
1873 : }
1874 :
1875 E : RelativeAddress addr(rva);
1876 E : BlockGraph::Block* block = image_->GetBlockByAddress(addr);
1877 E : if (block == NULL) {
1878 i : LOG(ERROR) << "No block found for public symbol \"" << name << "\".";
1879 i : return DiaBrowser::kBrowserAbort;
1880 : }
1881 :
1882 : // Public symbol names are mangled. Remove leading '_' as per
1883 : // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1884 E : if (name[0] == '_')
1885 E : name = name.substr(1);
1886 :
1887 E : if (!AddLabelToBlock(addr, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
1888 i : return DiaBrowser::kBrowserAbort;
1889 :
1890 E : return DiaBrowser::kBrowserContinue;
1891 E : }
1892 :
1893 E : bool Decomposer::ProcessStaticInitializers() {
1894 : typedef std::pair<RelativeAddress, RelativeAddress> AddressPair;
1895 : typedef std::map<std::string, AddressPair> AddressPairMap;
1896 :
1897 E : const RelativeAddress kNull(0);
1898 :
1899 : // This stores pairs of addresses, representing the beginning and the end
1900 : // of each static initializer block. It is keyed with a string, which is
1901 : // returned by the match group of the corresponding initializer pattern.
1902 : // The key is necessary to correlate matching labels (as multiple pairs
1903 : // of labels may match through a single pattern).
1904 E : AddressPairMap addr_pair_map;
1905 :
1906 : // Used for keeping track of which label, if any, we matched.
1907 : enum MatchType {
1908 : kMatchNone,
1909 : kMatchBeginLabel,
1910 : kMatchEndLabel
1911 : };
1912 :
1913 : // Iterate through all data blocks, looking for known initializer labels.
1914 E : BlockGraph::AddressSpace::RangeMapConstIter block_it = image_->begin();
1915 E : for (; block_it != image_->end(); ++block_it) {
1916 E : const BlockGraph::Block* block = block_it->second;
1917 : // Skip non-data blocks.
1918 E : if (block->type() != BlockGraph::DATA_BLOCK)
1919 E : continue;
1920 :
1921 : // Check the block name against each of the initializer patterns.
1922 E : MatchType match = kMatchNone;
1923 E : std::string block_name = block->name();
1924 E : std::string name;
1925 E : for (size_t i = 0; i < static_initializer_patterns_.size(); ++i) {
1926 E : REPair& re_pair(static_initializer_patterns_[i]);
1927 E : if (re_pair.first.FullMatch(block_name, &name))
1928 E : match = kMatchBeginLabel;
1929 E : else if (re_pair.second.FullMatch(block_name, &name))
1930 E : match = kMatchEndLabel;
1931 :
1932 E : if (match != kMatchNone)
1933 E : break;
1934 E : }
1935 :
1936 : // No pattern matched this symbol? Continue to the next one.
1937 E : if (match == kMatchNone)
1938 E : continue;
1939 :
1940 : // Ensure this symbol exists in the map. Thankfully, addresses default
1941 : // construct to NULL.
1942 E : AddressPair& addr_pair = addr_pair_map[name];
1943 :
1944 : // Update the bracketing symbol endpoint. Make sure each symbol endpoint
1945 : // is only seen once.
1946 E : RelativeAddress* addr = NULL;
1947 E : RelativeAddress new_addr;
1948 E : if (match == kMatchBeginLabel) {
1949 E : addr = &addr_pair.first;
1950 E : new_addr = block->addr();
1951 E : } else {
1952 E : addr = &addr_pair.second;
1953 E : new_addr = block->addr() + block->size();
1954 : }
1955 E : if (*addr != kNull) {
1956 i : LOG(ERROR) << "Bracketing symbol appears multiple times: "
1957 : << block_name;
1958 i : return false;
1959 : }
1960 E : *addr = new_addr;
1961 E : }
1962 :
1963 : // Use the bracketing symbols to make the initializers contiguous.
1964 E : AddressPairMap::const_iterator init_it = addr_pair_map.begin();
1965 E : for (; init_it != addr_pair_map.end(); ++init_it) {
1966 E : RelativeAddress begin_addr = init_it->second.first;
1967 E : if (begin_addr == kNull) {
1968 i : LOG(ERROR) << "Bracketing start symbol missing: " << init_it->first;
1969 i : return false;
1970 : }
1971 :
1972 E : RelativeAddress end_addr = init_it->second.second;
1973 E : if (end_addr == kNull) {
1974 i : LOG(ERROR) << "Bracketing end symbol missing: " << init_it->first;
1975 i : return false;
1976 : }
1977 :
1978 E : if (begin_addr > end_addr) {
1979 i : LOG(ERROR) << "Bracketing symbols out of order: " << init_it->first;
1980 i : return false;
1981 : }
1982 :
1983 : // Merge the initializers.
1984 E : DataSpace::Range range(begin_addr, end_addr - begin_addr);
1985 E : BlockGraph::Block* merged = image_->MergeIntersectingBlocks(range);
1986 : std::string name = base::StringPrintf("Bracketed Initializers: %s",
1987 E : init_it->first.c_str());
1988 E : DCHECK(merged != NULL);
1989 E : merged->set_name(name);
1990 E : merged->set_attribute(BlockGraph::COFF_GROUP);
1991 E : }
1992 :
1993 E : return true;
1994 E : }
1995 :
1996 E : bool Decomposer::ProcessDataSymbols(IDiaSymbol* root) {
1997 : DiaBrowser::MatchCallback on_data_symbol(
1998 E : base::Bind(&Decomposer::OnDataSymbol, base::Unretained(this)));
1999 :
2000 E : DiaBrowser dia_browser;
2001 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
2002 E : on_data_symbol);
2003 : dia_browser.AddPattern(Seq(SymTagCompiland, SymTagFunction,
2004 : Star(SymTagBlock), SymTagData),
2005 E : on_data_symbol);
2006 :
2007 E : return dia_browser.Browse(root);
2008 E : }
2009 :
2010 E : bool Decomposer::ProcessPublicSymbols(IDiaSymbol* root) {
2011 : DiaBrowser::MatchCallback on_public_symbol(
2012 E : base::Bind(&Decomposer::OnPublicSymbol, base::Unretained(this)));
2013 :
2014 E : DiaBrowser dia_browser;
2015 E : dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
2016 :
2017 E : return dia_browser.Browse(root);
2018 E : }
2019 :
2020 E : bool Decomposer::GuessDataBlockAlignments() {
2021 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2022 : // Iterate through all the image sections.
2023 E : for (size_t i = 0; i < num_sections; ++i) {
2024 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2025 E : DCHECK(header != NULL);
2026 :
2027 : // Only iterate through data sections.
2028 E : if (GetSectionType(header) != kSectionData)
2029 E : continue;
2030 :
2031 E : RelativeAddress section_begin(header->VirtualAddress);
2032 E : size_t section_length = header->Misc.VirtualSize;
2033 :
2034 : // Get the range of blocks in this section.
2035 : BlockGraph::AddressSpace::RangeMapIterPair it_pair =
2036 E : image_->GetIntersectingBlocks(section_begin, section_length);
2037 :
2038 : // Iterate through the blocks in the section, setting their alignment.
2039 E : BlockGraph::AddressSpace::RangeMapIter it = it_pair.first;
2040 E : for (; it != it_pair.second; ++it) {
2041 E : BlockGraph::Block* block = it->second;
2042 : GuessDataBlockAlignment(block,
2043 E : image_file_.nt_headers()->OptionalHeader.SectionAlignment);
2044 E : }
2045 E : }
2046 :
2047 E : return true;
2048 E : }
2049 :
2050 E : bool Decomposer::CreateCodeReferences() {
2051 E : BlockGraph::BlockMap::iterator it(image_->graph()->blocks_mutable().begin());
2052 E : BlockGraph::BlockMap::iterator end(image_->graph()->blocks_mutable().end());
2053 E : for (; it != end; ++it) {
2054 E : BlockGraph::Block* block = &it->second;
2055 :
2056 E : if (block->type() != BlockGraph::CODE_BLOCK)
2057 E : continue;
2058 :
2059 : // We shouldn't attempt disassembly on unsafe blocks. The new decomposer
2060 : // has this fixed, but this is a workaround here for now.
2061 : if (!pe::PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
2062 E : block, false)) {
2063 E : continue;
2064 : }
2065 :
2066 E : if (!CreateCodeReferencesForBlock(block))
2067 i : return false;
2068 E : }
2069 :
2070 E : return true;
2071 E : }
2072 :
2073 E : bool Decomposer::CreateCodeReferencesForBlock(BlockGraph::Block* block) {
2074 E : DCHECK(current_block_ == NULL);
2075 E : current_block_ = block;
2076 :
2077 E : RelativeAddress block_addr;
2078 E : if (!image_->GetAddressOf(block, &block_addr)) {
2079 i : LOG(ERROR) << "Block \"" << block->name() << "\" has no address.";
2080 i : return false;
2081 : }
2082 :
2083 E : AbsoluteAddress abs_block_addr;
2084 E : if (!image_file_.Translate(block_addr, &abs_block_addr)) {
2085 i : LOG(ERROR) << "Unable to get absolute address for " << block_addr;
2086 i : return false;
2087 : }
2088 :
2089 : Disassembler::InstructionCallback on_instruction(
2090 E : base::Bind(&Decomposer::OnInstruction, base::Unretained(this)));
2091 :
2092 : // Use block labels and code references as starting points for disassembly.
2093 E : Disassembler::AddressSet starting_points;
2094 : GetDisassemblyStartingPoints(block, abs_block_addr, reloc_set_,
2095 E : &starting_points);
2096 :
2097 : // If the block has no starting points, then it has no private symbols and
2098 : // is not BB safe. We mark the block as not safe for basic-block disassembly.
2099 : if (starting_points.empty() &&
2100 E : (block->attributes() & BlockGraph::GAP_BLOCK) == 0) {
2101 E : VLOG(1) << "Block \"" << block->name() << "\" has no private symbols.";
2102 E : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2103 : }
2104 :
2105 : // Determine whether or not we are being strict with disassembly.
2106 : // NOTE: This is particularly ugly. Decomposer should not depend on the
2107 : // transform policy object. In fact, Decomposer should not even be doing
2108 : // disassembly. This all disappears in the new decomposer.
2109 : bool strict = PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
2110 E : block, false);
2111 E : be_strict_with_current_block_ = false;
2112 :
2113 : // Determine the length of the code portion of the block by trimming off any
2114 : // known trailing data. Also, if we're in strict mode, ensure that our
2115 : // assumption regarding code/data layout is met.
2116 E : size_t code_size = 0;
2117 : if (!BlockHasExpectedCodeDataLayout(block, &code_size) &&
2118 E : be_strict_with_current_block_) {
2119 i : LOG(ERROR) << "Block \"" << block->name() << "\" has unexpected code/data "
2120 : << "layout.";
2121 i : return false;
2122 : }
2123 :
2124 : // Disassemble the block.
2125 : Disassembler disasm(block->data(),
2126 : code_size,
2127 : abs_block_addr,
2128 : starting_points,
2129 E : on_instruction);
2130 E : Disassembler::WalkResult result = disasm.Walk();
2131 :
2132 : // If we're strict (that is, we're confident that the block was produced by
2133 : // cl.exe), then we can use that knowledge to look for calls that appear to be
2134 : // to non-returning functions that we may not have symbol info for.
2135 E : if (be_strict_with_current_block_)
2136 i : LookForNonReturningFunctions(references_, *image_, current_block_, disasm);
2137 :
2138 E : DCHECK_EQ(block, current_block_);
2139 E : current_block_ = NULL;
2140 E : be_strict_with_current_block_ = true;
2141 :
2142 E : switch (result) {
2143 : case Disassembler::kWalkIncomplete:
2144 : // There were computed branches that couldn't be chased down.
2145 E : block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2146 E : return true;
2147 :
2148 : case Disassembler::kWalkTerminated:
2149 : // This exit condition should only ever occur for non-strict disassembly.
2150 : // If strict, we should always get kWalkError.
2151 i : DCHECK(!strict);
2152 : // This means that they code was malformed, or broke some expected
2153 : // conventions. This code is not safe for basic block disassembly.
2154 i : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
2155 i : return true;
2156 :
2157 : case Disassembler::kWalkSuccess:
2158 : // Were any bytes in the block not accounted for? This generally means
2159 : // unreachable code, which we see quite often, especially in debug builds.
2160 E : if (disasm.code_size() != disasm.disassembled_bytes())
2161 E : block->set_attribute(BlockGraph::INCOMPLETE_DISASSEMBLY);
2162 E : return true;
2163 :
2164 : case Disassembler::kWalkError:
2165 i : return false;
2166 :
2167 : default:
2168 i : NOTREACHED() << "Unhandled Disassembler WalkResult.";
2169 i : return false;
2170 : }
2171 E : }
2172 :
2173 : BlockGraph::Block* Decomposer::CreateBlock(BlockGraph::BlockType type,
2174 : RelativeAddress address,
2175 : BlockGraph::Size size,
2176 E : const base::StringPiece& name) {
2177 E : BlockGraph::Block* block = image_->AddBlock(type, address, size, name);
2178 E : if (block == NULL) {
2179 i : LOG(ERROR) << "Unable to add block at " << address << " with size "
2180 : << size << ".";
2181 i : return NULL;
2182 : }
2183 :
2184 : // Mark the source range from whence this block originates.
2185 : bool pushed = block->source_ranges().Push(
2186 : BlockGraph::Block::DataRange(0, size),
2187 E : BlockGraph::Block::SourceRange(address, size));
2188 E : DCHECK(pushed);
2189 :
2190 E : BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2191 E : if (section == BlockGraph::kInvalidSectionId) {
2192 i : LOG(ERROR) << "Block at " << address << " with size " << size
2193 : << " lies outside of all sections.";
2194 i : return NULL;
2195 : }
2196 E : block->set_section(section);
2197 :
2198 E : const uint8* data = image_file_.GetImageData(address, size);
2199 E : if (data != NULL)
2200 E : block->SetData(data, size);
2201 :
2202 E : return block;
2203 E : }
2204 :
2205 : BlockGraph::Block* Decomposer::FindOrCreateBlock(
2206 : BlockGraph::BlockType type,
2207 : RelativeAddress addr,
2208 : BlockGraph::Size size,
2209 : const base::StringPiece& name,
2210 E : FindOrCreateBlockDirective directive) {
2211 E : BlockGraph::Block* block = image_->GetBlockByAddress(addr);
2212 E : if (block != NULL) {
2213 : // If we got a block we're guaranteed that it at least partially covers
2214 : // the query range, so we can immediately return it in that case.
2215 E : if (directive == kAllowPartialCoveringBlock)
2216 E : return block;
2217 :
2218 E : if (block->attributes() & BlockGraph::PE_PARSED) {
2219 : // Always allow collisions where the new block is a proper subset of
2220 : // an existing PE parsed block. The PE parser often knows more than we do
2221 : // about blocks that need to stick together.
2222 E : directive = kAllowCoveringBlock;
2223 :
2224 : // Allow PE-parsed blocks to be grown to reflect reality. For example,
2225 : // in VS2013 the linker makes space for 2 debug directories rather than
2226 : // just one, and the symbols reflect this. We parse the debug directory
2227 : // with the size indicated in the PE header, which conflicts with that
2228 : // indicated by the section contributions.
2229 E : if (name == "* Linker *" && size > block->size()) {
2230 E : if (!image_->ResizeBlock(block, size)) {
2231 i : LOG(ERROR) << "Failed to extend PE parsed block with linker "
2232 : << "section contribution.";
2233 i : return false;
2234 : }
2235 E : const uint8* data = image_file_.GetImageData(addr, size);
2236 E : block->SetData(data, size);
2237 : }
2238 : }
2239 :
2240 E : bool collision = false;
2241 E : switch (directive) {
2242 : case kExpectNoBlock: {
2243 i : collision = true;
2244 i : break;
2245 : }
2246 : case kAllowIdenticalBlock: {
2247 i : collision = (block->addr() != addr || block->size() != size);
2248 i : break;
2249 : }
2250 : default: {
2251 E : DCHECK(directive == kAllowCoveringBlock);
2252 : collision = block->addr() > addr ||
2253 E : (block->addr() + block->size()) < addr + size;
2254 : break;
2255 : }
2256 : }
2257 :
2258 E : if (collision) {
2259 i : LOG(ERROR) << "Block collision for \"" << name.as_string() << "\" at "
2260 : << addr << "(" << size << ") with existing block \""
2261 : << block->name() << "\" at " << block->addr() << " ("
2262 : << block->size() << ").";
2263 i : return NULL;
2264 : }
2265 :
2266 E : return block;
2267 : }
2268 E : DCHECK(block == NULL);
2269 :
2270 E : return CreateBlock(type, addr, size, name);
2271 E : }
2272 :
2273 : CallbackDirective Decomposer::LookPastInstructionForData(
2274 E : RelativeAddress instr_end) {
2275 : // If this instruction terminates at a data boundary (ie: the *next*
2276 : // instruction will be data or a reloc), we can be certain that a new
2277 : // lookup table is starting at this address.
2278 E : if (reloc_set_.find(instr_end) == reloc_set_.end())
2279 E : return Disassembler::kDirectiveContinue;
2280 :
2281 : // Find the block housing the reloc. We expect the reloc to be contained
2282 : // completely within this block.
2283 E : BlockGraph::Block* block = image_->GetContainingBlock(instr_end, 4);
2284 E : if (block != current_block_) {
2285 i : CHECK(block != NULL);
2286 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2287 : << "Found an instruction/data boundary between blocks: "
2288 : << current_block_->name() << " and " << block->name();
2289 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2290 : }
2291 :
2292 E : BlockGraph::Offset offset = instr_end - block->addr();
2293 :
2294 : // We expect there to be a jump-table data label already.
2295 E : BlockGraph::Label label;
2296 E : bool have_label = block->GetLabel(offset, &label);
2297 : if (!have_label || !label.has_attributes(
2298 E : BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)) {
2299 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2300 : << "Expected there to be a data label marking the jump "
2301 : << "table at " << block->name() << " + " << offset << ".";
2302 :
2303 : // If we're in strict mode, we're a block that obeys standard conventions.
2304 : // Which means we should already be aware of any jump tables in this block.
2305 i : if (be_strict_with_current_block_)
2306 i : return Disassembler::kDirectiveAbort;
2307 :
2308 : // If we're not in strict mode, add the jump-table label.
2309 i : if (have_label) {
2310 i : CHECK(block->RemoveLabel(offset));
2311 : }
2312 :
2313 : CHECK(block->SetLabel(offset, BlockGraph::Label(
2314 : base::StringPrintf("<JUMP-TABLE-%d>", offset),
2315 i : BlockGraph::DATA_LABEL | BlockGraph::JUMP_TABLE_LABEL)));
2316 : }
2317 :
2318 E : return Disassembler::kDirectiveTerminatePath;
2319 E : }
2320 :
2321 E : void Decomposer::MarkDisassembledPastEnd() {
2322 E : DCHECK(current_block_ != NULL);
2323 E : current_block_->set_attribute(BlockGraph::DISASSEMBLED_PAST_END);
2324 : // TODO(chrisha): The entire "disassembled past end" and non-returning
2325 : // function infrastructure can be ripped out once we rework the BB
2326 : // disassembler to be straight path, and remove the disassembly phase
2327 : // from the decomposer (where it's no longer needed). In the meantime
2328 : // we simply crank down this log verbosity due to all of the false
2329 : // positives.
2330 E : VLOG(1) << "Disassembled past end of block or into known data for block \""
2331 : << current_block_->name() << "\" at " << current_block_->addr()
2332 : << ".";
2333 E : }
2334 :
2335 : CallbackDirective Decomposer::VisitNonFlowControlInstruction(
2336 E : RelativeAddress instr_start, RelativeAddress instr_end) {
2337 : // TODO(chrisha): We could walk the operands and follow references
2338 : // explicitly. If any of them are of reference type and there's no
2339 : // matching reference, this would be cause to blow up and die (we
2340 : // should get all of these as relocs and/or fixups).
2341 :
2342 : IntermediateReferenceMap::const_iterator ref_it =
2343 E : references_.upper_bound(instr_start);
2344 : IntermediateReferenceMap::const_iterator ref_end =
2345 E : references_.lower_bound(instr_end);
2346 :
2347 E : for (; ref_it != ref_end; ++ref_it) {
2348 : BlockGraph::Block* ref_block = image_->GetContainingBlock(
2349 E : ref_it->second.base, 1);
2350 E : DCHECK(ref_block != NULL);
2351 :
2352 : // This is an inter-block reference.
2353 E : if (ref_block != current_block_) {
2354 : // There should be no cross-block references to the middle of other
2355 : // code blocks (to the top is fine, as we could be passing around a
2356 : // function pointer). The exception is if the remote block is not
2357 : // generated by cl.exe. In this case, there could be arbitrary labels
2358 : // that act like functions within the body of that block, and referring
2359 : // to them is perfectly fine.
2360 E : bool ref_attr_safe = true;
2361 E : if (ref_block->type() == BlockGraph::CODE_BLOCK) {
2362 : ref_attr_safe =
2363 : PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(ref_block,
2364 E : false);
2365 : }
2366 : if (ref_block->type() == BlockGraph::CODE_BLOCK &&
2367 : ref_it->second.base != ref_block->addr() &&
2368 E : ref_attr_safe) {
2369 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2370 : << "Found a non-control-flow code-block to middle-of-code-block "
2371 : << "reference from block \"" << current_block_->name()
2372 : << "\" to block \"" << ref_block->name() << "\".";
2373 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2374 : }
2375 E : } else {
2376 : // This is an intra-block reference.
2377 : BlockGraph::Offset ref_offset =
2378 E : ref_it->second.base - current_block_->addr();
2379 :
2380 : // If this is to offset zero, we assume we are taking a pointer to
2381 : // ourself, which is safe.
2382 E : if (ref_offset != 0) {
2383 : // If this is 'clean' code it should be to data, and there should be a
2384 : // label.
2385 E : BlockGraph::Label label;
2386 E : if (!current_block_->GetLabel(ref_offset, &label)) {
2387 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2388 : << "Found an intra-block data-reference with no label.";
2389 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2390 i : } else {
2391 : if (!label.has_attributes(BlockGraph::DATA_LABEL) ||
2392 E : label.has_attributes(BlockGraph::CODE_LABEL)) {
2393 i : LOG_ERROR_OR_VLOG1(be_strict_with_current_block_)
2394 : << "Found an intra-block data-like reference to a non-data "
2395 : << "or code label in block \"" << current_block_->name()
2396 : << "\".";
2397 i : return AbortOrTerminateDisassembly(be_strict_with_current_block_);
2398 : }
2399 : }
2400 E : }
2401 : }
2402 E : }
2403 :
2404 E : return Disassembler::kDirectiveContinue;
2405 E : }
2406 :
2407 : CallbackDirective Decomposer::VisitPcRelativeFlowControlInstruction(
2408 : AbsoluteAddress instr_abs,
2409 : RelativeAddress instr_rel,
2410 : const _DInst& instruction,
2411 E : bool end_of_code) {
2412 E : int fc = META_GET_FC(instruction.meta);
2413 E : DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
2414 E : DCHECK_EQ(O_PC, instruction.ops[0].type);
2415 E : DCHECK_EQ(O_NONE, instruction.ops[1].type);
2416 E : DCHECK_EQ(O_NONE, instruction.ops[2].type);
2417 E : DCHECK_EQ(O_NONE, instruction.ops[3].type);
2418 : DCHECK(instruction.ops[0].size == 8 ||
2419 : instruction.ops[0].size == 16 ||
2420 E : instruction.ops[0].size == 32);
2421 : // Distorm gives us size in bits, we want bytes.
2422 E : BlockGraph::Size size = instruction.ops[0].size / 8;
2423 :
2424 : // Get the reference's address. Note we assume it's in the instruction's
2425 : // tail end - I don't know of a case where a PC-relative offset in a branch
2426 : // or call is not the very last thing in an x86 instruction.
2427 E : AbsoluteAddress abs_src = instr_abs + instruction.size - size;
2428 : AbsoluteAddress abs_dst = instr_abs + instruction.size +
2429 E : static_cast<size_t>(instruction.imm.addr);
2430 :
2431 E : RelativeAddress src, dst;
2432 : if (!image_file_.Translate(abs_src, &src) ||
2433 E : !image_file_.Translate(abs_dst, &dst)) {
2434 i : LOG(ERROR) << "Unable to translate absolute to relative addresses.";
2435 i : return Disassembler::kDirectiveAbort;
2436 : }
2437 :
2438 : // Get the block associated with the destination address. It must exist
2439 : // and be a code block.
2440 E : BlockGraph::Block* block = image_->GetContainingBlock(dst, 1);
2441 E : DCHECK(block != NULL);
2442 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
2443 :
2444 : // For short references, we should not see a fixup.
2445 E : ValidateOrAddReferenceMode mode = FIXUP_MUST_NOT_EXIST;
2446 E : if (size == kPointerSize) {
2447 : // Long PC_RELATIVE reference within a single block? FIXUPs aren't
2448 : // strictly necessary.
2449 E : if (block->Contains(src, kPointerSize))
2450 E : mode = FIXUP_MAY_EXIST;
2451 E : else
2452 : // But if they're between blocks (section contributions), we expect to
2453 : // find them.
2454 E : mode = FIXUP_MUST_EXIST;
2455 E : } else {
2456 : // Since we slice by section contributions we no longer see short
2457 : // references across blocks. If we do, bail!
2458 E : if (block != current_block_) {
2459 i : LOG(ERROR) << "Found a short PC-relative reference out of block \""
2460 : << current_block_->name() << "\".";
2461 i : return Disassembler::kDirectiveAbort;
2462 : }
2463 : }
2464 :
2465 : // Validate or create the reference, as necessary.
2466 : if (!ValidateOrAddReference(mode, src, BlockGraph::PC_RELATIVE_REF, size,
2467 E : dst, 0, &fixup_map_, &references_)) {
2468 i : LOG(ERROR) << "Failed to validate/create reference originating from "
2469 : << "block \"" << current_block_->name() << "\".";
2470 i : return Disassembler::kDirectiveAbort;
2471 : }
2472 :
2473 : // If this is a call and the destination is a non-returning function,
2474 : // then indicate that we should terminate this disassembly path.
2475 : if (fc == FC_CALL &&
2476 E : (block->attributes() & BlockGraph::NON_RETURN_FUNCTION)) {
2477 : // TODO(chrisha): For now, we enforce that the call be to the beginning
2478 : // of the function. This may not be necessary, but better safe than
2479 : // sorry for now.
2480 E : if (block->addr() != dst) {
2481 i : LOG(ERROR) << "Calling inside the body of a non-returning function: "
2482 : << block->name();
2483 i : return Disassembler::kDirectiveAbort;
2484 : }
2485 :
2486 E : return Disassembler::kDirectiveTerminatePath;
2487 : }
2488 :
2489 : // If we get here, then we don't think it's a non-returning call. If it's
2490 : // not an unconditional jump and we're at the end of the code for this block
2491 : // then we consider this as disassembling past the end.
2492 E : if (fc != FC_UNC_BRANCH && end_of_code)
2493 i : MarkDisassembledPastEnd();
2494 :
2495 E : return Disassembler::kDirectiveContinue;
2496 E : }
2497 :
2498 : CallbackDirective Decomposer::VisitIndirectMemoryCallInstruction(
2499 E : const _DInst& instruction, bool end_of_code) {
2500 E : DCHECK_EQ(FC_CALL, META_GET_FC(instruction.meta));
2501 E : DCHECK_EQ(O_DISP, instruction.ops[0].type);
2502 :
2503 : // TODO(rogerm): Consider changing to image_file_.AbsToRelDisplacement()
2504 : // instead of translate. In theory, the indexing into a function-table
2505 : // could be statically offset such that the displacement falls outside
2506 : // of the image's address space. But, we have never seen the compiler
2507 : // generate code like that. This is left to use Translate, which will
2508 : // trigger an error in such a case.
2509 E : AbsoluteAddress disp_addr_abs(static_cast<uint32>(instruction.disp));
2510 E : RelativeAddress disp_addr_rel;
2511 E : if (!image_file_.Translate(disp_addr_abs, &disp_addr_rel)) {
2512 i : LOG(ERROR) << "Unable to translate call address.";
2513 i : return Disassembler::kDirectiveAbort;
2514 : }
2515 :
2516 : // Try to dereference the address of the call instruction. This can fail
2517 : // for blocks that are only initialized at runtime, so we don't fail if
2518 : // we don't find a reference.
2519 : IntermediateReferenceMap::const_iterator ref_it =
2520 E : references_.find(disp_addr_rel);
2521 E : if (ref_it == references_.end())
2522 E : return Disassembler::kDirectiveContinue;
2523 :
2524 : // NOTE: This process derails for bound import tables. In this case the
2525 : // attempted dereference above will fail, but we could still actually
2526 : // find the import name thunk by inspecting the offset of the memory
2527 : // location.
2528 :
2529 : // The reference must be direct and 32-bit.
2530 E : const IntermediateReference& ref = ref_it->second;
2531 E : DCHECK_EQ(BlockGraph::Reference::kMaximumSize, ref.size);
2532 E : DCHECK_EQ(0, ref.offset);
2533 :
2534 : // Look up the thunk this refers to.
2535 E : BlockGraph::Block* thunk = image_->GetBlockByAddress(ref.base);
2536 E : if (thunk == NULL) {
2537 i : LOG(ERROR) << "Unable to dereference intermediate reference at "
2538 : << disp_addr_rel << " to " << ref.base << ".";
2539 i : return Disassembler::kDirectiveAbort;
2540 : }
2541 :
2542 E : if (ref.type == BlockGraph::RELATIVE_REF) {
2543 : // If this is a relative reference it must be part of an import address
2544 : // table (during runtime this address would be patched up with an absolute
2545 : // reference). Thus we expect the referenced block to be data, an import
2546 : // name thunk.
2547 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, thunk->type());
2548 E : } else {
2549 : // If this is an absolute address it should actually point directly to
2550 : // code.
2551 E : DCHECK_EQ(BlockGraph::ABSOLUTE_REF, ref.type);
2552 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, thunk->type());
2553 : }
2554 :
2555 : // Either way, if the block is non-returning we terminate this path of
2556 : // disassembly.
2557 E : if ((thunk->attributes() & BlockGraph::NON_RETURN_FUNCTION) != 0)
2558 E : return Disassembler::kDirectiveTerminatePath;
2559 :
2560 E : if (end_of_code)
2561 i : MarkDisassembledPastEnd();
2562 :
2563 E : return Disassembler::kDirectiveContinue;
2564 E : }
2565 :
2566 : CallbackDirective Decomposer::OnInstruction(const Disassembler& walker,
2567 E : const _DInst& instruction) {
2568 : // Get the relative address of this instruction.
2569 E : AbsoluteAddress instr_abs(static_cast<uint32>(instruction.addr));
2570 E : RelativeAddress instr_rel;
2571 E : if (!image_file_.Translate(instr_abs, &instr_rel)) {
2572 i : LOG(ERROR) << "Unable to translate instruction address.";
2573 i : return Disassembler::kDirectiveAbort;
2574 : }
2575 E : RelativeAddress after_instr_rel = instr_rel + instruction.size;
2576 :
2577 : #ifndef NDEBUG
2578 : // If we're in debug mode, it's helpful to have a pointer directly to the
2579 : // beginning of this instruction in memory.
2580 E : BlockGraph::Offset instr_offset = instr_rel - current_block_->addr();
2581 E : const uint8* instr_data = current_block_->data() + instr_offset;
2582 : #endif
2583 :
2584 : // TODO(chrisha): Certain instructions require aligned data (ie: MMX/SSE
2585 : // instructions). We need to follow the data that these instructions
2586 : // refer to, and set their alignment appropriately. For now, alignment
2587 : // is simply preserved from the original image.
2588 :
2589 E : CallbackDirective directive = LookPastInstructionForData(after_instr_rel);
2590 E : if (IsFatalCallbackDirective(directive))
2591 i : return directive;
2592 :
2593 : // We're at the end of code in this block if we encountered data, or this is
2594 : // the last instruction to be processed.
2595 E : RelativeAddress block_end(current_block_->addr() + current_block_->size());
2596 : bool end_of_code = (directive == Disassembler::kDirectiveTerminatePath) ||
2597 E : (after_instr_rel >= block_end);
2598 :
2599 E : int fc = META_GET_FC(instruction.meta);
2600 :
2601 E : if (fc == FC_NONE) {
2602 : // There's no control flow and we're at the end of the block. Mark the
2603 : // block as dirty.
2604 E : if (end_of_code)
2605 i : MarkDisassembledPastEnd();
2606 :
2607 : return CombineCallbackDirectives(directive,
2608 E : VisitNonFlowControlInstruction(instr_rel, after_instr_rel));
2609 : }
2610 :
2611 : if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
2612 E : instruction.ops[0].type == O_PC) {
2613 : // For all branches, calls and conditional branches to PC-relative
2614 : // addresses, record a PC-relative reference.
2615 : return CombineCallbackDirectives(directive,
2616 : VisitPcRelativeFlowControlInstruction(instr_abs,
2617 : instr_rel,
2618 : instruction,
2619 E : end_of_code));
2620 : }
2621 :
2622 : // We explicitly handle indirect memory call instructions. These can often
2623 : // be tracked down as pointing to a block in this image, or to an import
2624 : // name thunk from another module.
2625 E : if (fc == FC_CALL && instruction.ops[0].type == O_DISP) {
2626 : return CombineCallbackDirectives(directive,
2627 E : VisitIndirectMemoryCallInstruction(instruction, end_of_code));
2628 : }
2629 :
2630 : // Look out for blocks where disassembly seems to run off the end of the
2631 : // block. We do not treat interrupts as flow control as execution can
2632 : // continue past the interrupt.
2633 E : if (fc != FC_RET && fc != FC_UNC_BRANCH && end_of_code)
2634 E : MarkDisassembledPastEnd();
2635 :
2636 E : return directive;
2637 E : }
2638 :
2639 : bool Decomposer::CreatePEImageBlocksAndReferences(
2640 E : PEFileParser::PEHeader* header) {
2641 : PEFileParser::AddReferenceCallback add_reference(
2642 E : base::Bind(&Decomposer::AddReferenceCallback, base::Unretained(this)));
2643 E : PEFileParser parser(image_file_, image_, add_reference);
2644 : parser.set_on_import_thunk(
2645 E : base::Bind(&Decomposer::OnImportThunkCallback, base::Unretained(this)));
2646 :
2647 E : if (!parser.ParseImage(header)) {
2648 i : LOG(ERROR) << "Unable to parse PE image.";
2649 i : return false;
2650 : }
2651 :
2652 E : return true;
2653 E : }
2654 :
2655 E : bool Decomposer::FinalizeIntermediateReferences() {
2656 E : IntermediateReferenceMap::const_iterator it(references_.begin());
2657 E : IntermediateReferenceMap::const_iterator end(references_.end());
2658 :
2659 E : for (; it != end; ++it) {
2660 E : RelativeAddress src_addr(it->first);
2661 E : BlockGraph::Block* src = image_->GetBlockByAddress(src_addr);
2662 E : RelativeAddress dst_base_addr(it->second.base);
2663 E : RelativeAddress dst_addr(dst_base_addr + it->second.offset);
2664 E : BlockGraph::Block* dst = image_->GetBlockByAddress(dst_base_addr);
2665 :
2666 E : if (src == NULL || dst == NULL) {
2667 i : LOG(ERROR) << "Reference source or base destination address is out of "
2668 : << "range, src: " << src << ", dst: " << dst;
2669 i : return false;
2670 : }
2671 :
2672 E : RelativeAddress src_start = src->addr();
2673 E : RelativeAddress dst_start = dst->addr();
2674 :
2675 : // Get the offset of the ultimate destination relative to the start of the
2676 : // destination block.
2677 E : BlockGraph::Offset dst_offset = dst_addr - dst_start;
2678 :
2679 : // Get the offset of the actual referenced object relative to the start of
2680 : // the destination block.
2681 E : BlockGraph::Offset dst_base = dst_base_addr - dst_start;
2682 :
2683 : BlockGraph::Reference ref(it->second.type,
2684 : it->second.size,
2685 : dst,
2686 : dst_offset,
2687 E : dst_base);
2688 E : src->SetReference(src_addr - src_start, ref);
2689 E : }
2690 :
2691 E : references_.clear();
2692 :
2693 E : return true;
2694 E : }
2695 :
2696 E : bool Decomposer::ConfirmFixupsVisited() const {
2697 E : bool success = true;
2698 :
2699 : // Ideally, all fixups should have been visited during decomposition.
2700 : // TODO(chrisha): Address the root problems underlying the following
2701 : // temporary fix.
2702 E : FixupMap::const_iterator fixup_it = fixup_map_.begin();
2703 E : for (; fixup_it != fixup_map_.end(); ++fixup_it) {
2704 E : if (fixup_it->second.visited)
2705 E : continue;
2706 :
2707 : const BlockGraph::Block* block =
2708 E : image_->GetContainingBlock(fixup_it->first, kPointerSize);
2709 E : DCHECK(block != NULL);
2710 :
2711 : // We know that we currently do not have full disassembly coverage as there
2712 : // are several orphaned pieces of apparently unreachable code in the CRT
2713 : // that we do not disassemble, but which may contain jmp or call commands.
2714 : // Thus, we expect that missed fixups are all PC-relative and lie within
2715 : // code blocks.
2716 : if (block->type() == BlockGraph::CODE_BLOCK &&
2717 E : fixup_it->second.type == BlockGraph::PC_RELATIVE_REF)
2718 E : continue;
2719 :
2720 i : success = false;
2721 i : LOG(ERROR) << "Unexpected unseen fixup at " << fixup_it->second.location;
2722 i : }
2723 :
2724 E : return success;
2725 E : }
2726 :
2727 E : bool Decomposer::FindPaddingBlocks() {
2728 E : DCHECK(image_ != NULL);
2729 E : DCHECK(image_->graph() != NULL);
2730 :
2731 : BlockGraph::BlockMap::iterator block_it =
2732 E : image_->graph()->blocks_mutable().begin();
2733 E : for (; block_it != image_->graph()->blocks_mutable().end(); ++block_it) {
2734 E : BlockGraph::Block& block = block_it->second;
2735 :
2736 : // Padding blocks must not have any symbol information: no labels,
2737 : // no references, no referrers, and they must be a gap block.
2738 : if (block.labels().size() != 0 ||
2739 : block.references().size() != 0 ||
2740 : block.referrers().size() != 0 ||
2741 E : (block.attributes() & BlockGraph::GAP_BLOCK) == 0)
2742 E : continue;
2743 :
2744 E : switch (block.type()) {
2745 : // Code blocks should be fully defined and consist of only int3s.
2746 : case BlockGraph::CODE_BLOCK: {
2747 : if (block.data_size() != block.size() ||
2748 E : RepeatedValue(block.data(), block.data_size()) != kInt3)
2749 i : continue;
2750 E : break;
2751 : }
2752 :
2753 : // Data blocks should be uninitialized or have fully defined data
2754 : // consisting only of zeros.
2755 : default: {
2756 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
2757 E : if (block.data_size() == 0) // Uninitialized data blocks are padding.
2758 E : break;
2759 : if (block.data_size() != block.size() ||
2760 E : RepeatedValue(block.data(), block.data_size()) != 0)
2761 i : continue;
2762 : }
2763 : }
2764 :
2765 : // If we fall through to this point, then the block is a padding block.
2766 E : block.set_attribute(BlockGraph::PADDING_BLOCK);
2767 E : }
2768 :
2769 E : return true;
2770 E : }
2771 :
2772 E : bool Decomposer::CreateSections() {
2773 : // Iterate through the image sections, and create sections in the BlockGraph.
2774 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2775 E : for (size_t i = 0; i < num_sections; ++i) {
2776 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2777 E : std::string name = pe::PEFile::GetSectionName(*header);
2778 : BlockGraph::Section* section = image_->graph()->AddSection(
2779 E : name, header->Characteristics);
2780 E : DCHECK(section != NULL);
2781 :
2782 : // For now, we expect them to have been created with the same IDs as those
2783 : // in the original image.
2784 E : if (section->id() != i) {
2785 i : LOG(ERROR) << "Unexpected section ID.";
2786 i : return false;
2787 : }
2788 E : }
2789 :
2790 E : return true;
2791 E : }
2792 :
2793 E : bool Decomposer::LoadDebugStreams(IDiaSession* dia_session) {
2794 E : DCHECK(dia_session != NULL);
2795 :
2796 : // Load the fixups. These must exist.
2797 E : PdbFixups pdb_fixups;
2798 : SearchResult search_result = FindAndLoadDiaDebugStreamByName(
2799 E : kFixupDiaDebugStreamName, dia_session, &pdb_fixups);
2800 E : if (search_result != kSearchSucceeded) {
2801 i : if (search_result == kSearchFailed) {
2802 i : LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
2803 : "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
2804 : }
2805 i : return false;
2806 : }
2807 :
2808 : // Load the omap_from table. It is not necessary that one exist.
2809 E : std::vector<OMAP> omap_from;
2810 : search_result = FindAndLoadDiaDebugStreamByName(
2811 E : kOmapFromDiaDebugStreamName, dia_session, &omap_from);
2812 E : if (search_result == kSearchErrored)
2813 i : return false;
2814 :
2815 : // Translate and validate fixups.
2816 E : if (!OmapAndValidateFixups(omap_from, pdb_fixups))
2817 i : return false;
2818 :
2819 E : return true;
2820 E : }
2821 :
2822 : bool Decomposer::OmapAndValidateFixups(const std::vector<OMAP>& omap_from,
2823 E : const PdbFixups& pdb_fixups) {
2824 E : bool have_omap = omap_from.size() != 0;
2825 :
2826 : // The resource section in Chrome is modified post-link by a tool that adds a
2827 : // manifest to it. This causes all of the fixups in the resource section (and
2828 : // anything beyond it) to be invalid. As long as the resource section is the
2829 : // last section in the image, this is not a problem (we can safely ignore the
2830 : // .rsrc fixups, which we know how to parse without them). However, if there
2831 : // is a section after the resource section, things will have been shifted
2832 : // and potentially crucial fixups will be invalid.
2833 E : RelativeAddress rsrc_start(0xffffffff), max_start;
2834 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
2835 E : for (size_t i = 0; i < num_sections; ++i) {
2836 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
2837 E : RelativeAddress start(header->VirtualAddress);
2838 E : if (start > max_start)
2839 E : max_start = start;
2840 : if (strncmp(kResourceSectionName,
2841 : reinterpret_cast<const char*>(header->Name),
2842 E : IMAGE_SIZEOF_SHORT_NAME) == 0) {
2843 E : rsrc_start = start;
2844 E : break;
2845 : }
2846 E : }
2847 :
2848 : // Ensure there are no sections after the resource section.
2849 E : if (max_start > rsrc_start) {
2850 i : LOG(ERROR) << kResourceSectionName << " section is not the last section.";
2851 i : return false;
2852 : }
2853 :
2854 : // Ensure the fixups are all valid, and populate the fixup map.
2855 E : for (size_t i = 0; i < pdb_fixups.size(); ++i) {
2856 E : if (!pdb_fixups[i].ValidHeader()) {
2857 i : LOG(ERROR) << "Unknown fixup header: "
2858 : << base::StringPrintf("0x%08X.", pdb_fixups[i].header);
2859 i : return false;
2860 : }
2861 :
2862 : // For now, we skip any offset fixups. We've only seen this in the context
2863 : // of TLS data access, and we don't mess with TLS structures.
2864 E : if (pdb_fixups[i].is_offset())
2865 E : continue;
2866 :
2867 : // All fixups we handle should be full size pointers.
2868 E : DCHECK_EQ(kPointerSize, pdb_fixups[i].size());
2869 :
2870 : // Get the original addresses, and map them through OMAP information.
2871 : // Normally DIA takes care of this for us, but there is no API for
2872 : // getting DIA to give us FIXUP information, so we have to do it manually.
2873 E : RelativeAddress rva_location(pdb_fixups[i].rva_location);
2874 E : RelativeAddress rva_base(pdb_fixups[i].rva_base);
2875 E : if (have_omap) {
2876 i : rva_location = pdb::TranslateAddressViaOmap(omap_from, rva_location);
2877 i : rva_base = pdb::TranslateAddressViaOmap(omap_from, rva_base);
2878 : }
2879 :
2880 : // If these are part of the .rsrc section, ignore them.
2881 E : if (rva_location >= rsrc_start)
2882 E : continue;
2883 :
2884 : // Ensure they live within the image, and refer to things within the
2885 : // image.
2886 : if (!image_file_.Contains(rva_location, kPointerSize) ||
2887 E : !image_file_.Contains(rva_base, 1)) {
2888 i : LOG(ERROR) << "Fixup refers to addresses outside of image.";
2889 i : return false;
2890 : }
2891 :
2892 : // Add the fix up, and ensure the source address is unique.
2893 E : Fixup fixup = { PdbFixupTypeToReferenceType(pdb_fixups[i].type),
2894 E : pdb_fixups[i].refers_to_code(),
2895 E : pdb_fixups[i].is_data(),
2896 E : false,
2897 E : rva_location,
2898 E : rva_base };
2899 E : bool added = fixup_map_.insert(std::make_pair(rva_location, fixup)).second;
2900 E : if (!added) {
2901 i : LOG(ERROR) << "Colliding fixups at " << rva_location;
2902 i : return false;
2903 : }
2904 E : }
2905 :
2906 E : return true;
2907 E : }
2908 :
2909 : bool Decomposer::RegisterStaticInitializerPatterns(
2910 E : const base::StringPiece& begin, const base::StringPiece& end) {
2911 : // Ensuring the patterns each have exactly one capturing group.
2912 : REPair re_pair = std::make_pair(RE(begin.as_string()),
2913 E : RE(end.as_string()));
2914 : if (re_pair.first.NumberOfCapturingGroups() != 1 ||
2915 E : re_pair.second.NumberOfCapturingGroups() != 1)
2916 i : return false;
2917 :
2918 E : static_initializer_patterns_.push_back(re_pair);
2919 :
2920 E : return true;
2921 E : }
2922 :
2923 : bool Decomposer::RegisterNonReturningFunction(
2924 E : const base::StringPiece& function_name) {
2925 E : return non_returning_functions_.insert(function_name.as_string()).second;
2926 E : }
2927 :
2928 : bool Decomposer::RegisterNonReturningImport(
2929 : const base::StringPiece& module_name,
2930 E : const base::StringPiece& function_name) {
2931 E : StringSet& module_set = non_returning_imports_[module_name.as_string()];
2932 E : return module_set.insert(function_name.as_string()).second;
2933 E : }
2934 :
2935 : bool Decomposer::LoadBlockGraphFromPdbStream(const PEFile& image_file,
2936 : pdb::PdbStream* block_graph_stream,
2937 E : ImageLayout* image_layout) {
2938 E : DCHECK(block_graph_stream != NULL);
2939 E : DCHECK(image_layout != NULL);
2940 E : LOG(INFO) << "Reading block-graph and image layout from the PDB.";
2941 :
2942 : // Initialize an input archive pointing to the stream.
2943 E : scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
2944 E : if (!byte_stream->Init(block_graph_stream))
2945 i : return false;
2946 E : DCHECK(byte_stream.get() != NULL);
2947 :
2948 E : core::ScopedInStreamPtr pdb_in_stream;
2949 : pdb_in_stream.reset(core::CreateByteInStream(
2950 E : byte_stream->data(), byte_stream->data() + byte_stream->length()));
2951 :
2952 : // Read the header.
2953 E : uint32 stream_version = 0;
2954 E : unsigned char compressed = 0;
2955 : if (!pdb_in_stream->Read(sizeof(stream_version),
2956 : reinterpret_cast<core::Byte*>(&stream_version)) ||
2957 : !pdb_in_stream->Read(sizeof(compressed),
2958 E : reinterpret_cast<core::Byte*>(&compressed))) {
2959 i : LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
2960 i : return false;
2961 : }
2962 :
2963 : // Check the stream version.
2964 E : if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
2965 E : LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
2966 : << " version (got " << stream_version << ", expected "
2967 : << pdb::kSyzygyBlockGraphStreamVersion << ").";
2968 E : return false;
2969 : }
2970 :
2971 : // If the stream is compressed insert the decompression filter.
2972 E : core::InStream* in_stream = pdb_in_stream.get();
2973 E : scoped_ptr<core::ZInStream> zip_in_stream;
2974 E : if (compressed != 0) {
2975 E : zip_in_stream.reset(new core::ZInStream(in_stream));
2976 E : if (!zip_in_stream->Init()) {
2977 i : LOG(ERROR) << "Unable to initialize ZInStream.";
2978 i : return false;
2979 : }
2980 E : in_stream = zip_in_stream.get();
2981 : }
2982 :
2983 : // Deserialize the image-layout.
2984 E : core::NativeBinaryInArchive in_archive(in_stream);
2985 E : block_graph::BlockGraphSerializer::Attributes attributes = 0;
2986 : if (!LoadBlockGraphAndImageLayout(
2987 E : image_file, &attributes, image_layout, &in_archive)) {
2988 i : LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
2989 i : return false;
2990 : }
2991 :
2992 E : return true;
2993 E : }
2994 :
2995 : bool Decomposer::LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
2996 : const PEFile& image_file,
2997 : ImageLayout* image_layout,
2998 E : bool* stream_exists) {
2999 E : DCHECK(image_layout != NULL);
3000 E : DCHECK(stream_exists != NULL);
3001 :
3002 E : pdb::PdbFile pdb_file;
3003 E : pdb::PdbReader pdb_reader;
3004 E : if (!pdb_reader.Read(pdb_path, &pdb_file)) {
3005 i : LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
3006 : << "\".";
3007 i : return NULL;
3008 : }
3009 :
3010 : // Try to get the block-graph stream from the PDB.
3011 : scoped_refptr<pdb::PdbStream> block_graph_stream =
3012 E : GetBlockGraphStreamFromPdb(&pdb_file);
3013 E : if (block_graph_stream.get() == NULL) {
3014 E : *stream_exists = false;
3015 E : return false;
3016 : }
3017 :
3018 : // The PDB contains a block-graph stream, the block-graph and the image layout
3019 : // will be read from this stream.
3020 E : *stream_exists = true;
3021 : if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
3022 E : image_layout)) {
3023 i : return false;
3024 : }
3025 :
3026 E : return true;
3027 E : }
3028 :
3029 : scoped_refptr<pdb::PdbStream> Decomposer::GetBlockGraphStreamFromPdb(
3030 E : pdb::PdbFile* pdb_file) {
3031 E : scoped_refptr<pdb::PdbStream> block_graph_stream;
3032 : // Get the PDB header and try to get the block-graph ID stream from it.
3033 E : pdb::PdbInfoHeader70 pdb_header = {0};
3034 E : pdb::NameStreamMap name_stream_map;
3035 : if (!ReadHeaderInfoStream(pdb_file->GetStream(pdb::kPdbHeaderInfoStream),
3036 : &pdb_header,
3037 E : &name_stream_map)) {
3038 i : LOG(ERROR) << "Failed to read header info stream.";
3039 i : return block_graph_stream;
3040 : }
3041 : pdb::NameStreamMap::const_iterator name_it = name_stream_map.find(
3042 E : pdb::kSyzygyBlockGraphStreamName);
3043 E : if (name_it == name_stream_map.end()) {
3044 E : return block_graph_stream;
3045 : }
3046 :
3047 : // Get the block-graph stream and ensure that it's not empty.
3048 E : block_graph_stream = pdb_file->GetStream(name_it->second);
3049 E : if (block_graph_stream.get() == NULL) {
3050 i : LOG(ERROR) << "Failed to read the block-graph stream from the PDB.";
3051 i : return block_graph_stream;
3052 : }
3053 E : if (block_graph_stream->length() == 0) {
3054 i : LOG(ERROR) << "The block-graph stream is empty.";
3055 i : return block_graph_stream;
3056 : }
3057 :
3058 E : return block_graph_stream;
3059 E : }
3060 :
3061 : bool Decomposer::OnImportThunkCallback(const char* module_name,
3062 : const char* symbol_name,
3063 E : BlockGraph::Block* thunk) {
3064 E : DCHECK(module_name != NULL);
3065 E : DCHECK(symbol_name != NULL);
3066 E : DCHECK(thunk != NULL);
3067 :
3068 : // Look for the module first.
3069 : StringSetMap::const_iterator module_it =
3070 E : non_returning_imports_.find(std::string(module_name));
3071 E : if (module_it == non_returning_imports_.end())
3072 E : return true;
3073 :
3074 : // Look for the symbol within the module.
3075 E : if (module_it->second.count(std::string(symbol_name)) == 0)
3076 E : return true;
3077 :
3078 : // If we get here then the imported symbol is found. Decorate the thunk.
3079 E : thunk->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
3080 E : VLOG(1) << "Forcing non-returning attribute on imported symbol \""
3081 : << symbol_name << "\" from module \"" << module_name << "\".";
3082 :
3083 E : return true;
3084 E : }
3085 :
3086 : } // namespace pe
|