1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/new_decomposer.h"
16 :
17 : #include "pcrecpp.h" // NOLINT
18 : #include "base/bind.h"
19 : #include "base/string_split.h"
20 : #include "base/stringprintf.h"
21 : #include "base/utf_string_conversions.h"
22 : #include "base/win/scoped_bstr.h"
23 : #include "base/win/scoped_comptr.h"
24 : #include "syzygy/core/disassembler_util.h"
25 : #include "syzygy/core/zstream.h"
26 : #include "syzygy/pdb/omap.h"
27 : #include "syzygy/pdb/pdb_byte_stream.h"
28 : #include "syzygy/pdb/pdb_constants.h"
29 : #include "syzygy/pdb/pdb_dbi_stream.h"
30 : #include "syzygy/pdb/pdb_file.h"
31 : #include "syzygy/pdb/pdb_reader.h"
32 : #include "syzygy/pdb/pdb_symbol_record.h"
33 : #include "syzygy/pdb/pdb_util.h"
34 : #include "syzygy/pe/dia_util.h"
35 : #include "syzygy/pe/find.h"
36 : #include "syzygy/pe/pe_file_parser.h"
37 : #include "syzygy/pe/pe_utils.h"
38 : #include "syzygy/pe/serialization.h"
39 : #include "third_party/cci/Files/CvInfo.h"
40 :
41 : namespace cci = Microsoft_Cci_Pdb;
42 :
43 : namespace {
44 :
45 : // A small helper struct for dumping block information to log messages.
46 : // TODO(chrisha): Move this to block_graph and reuse it everywhere!
47 : struct BlockInfo {
48 : enum AddressType {
49 : kNoAddress,
50 : kAbsoluteAddress,
51 : kFileOffsetAddress,
52 : kRelativeAddress,
53 : };
54 :
55 i : explicit BlockInfo(const block_graph::BlockGraph::Block* block)
56 : : block(block), type(kNoAddress) {
57 i : DCHECK(block != NULL);
58 i : }
59 :
60 i : BlockInfo(const block_graph::BlockGraph::Block* block,
61 : core::AbsoluteAddress address)
62 : : block(block), type(kAbsoluteAddress), abs_addr(address) {
63 i : DCHECK(block != NULL);
64 i : }
65 : BlockInfo(const block_graph::BlockGraph::Block* block,
66 : core::FileOffsetAddress address)
67 : : block(block), type(kFileOffsetAddress), file_addr(address) {
68 : DCHECK(block != NULL);
69 : }
70 : BlockInfo(const block_graph::BlockGraph::Block* block,
71 : core::RelativeAddress address)
72 : : block(block), type(kRelativeAddress), rel_addr(address) {
73 : DCHECK(block != NULL);
74 : }
75 :
76 : const block_graph::BlockGraph::Block* block;
77 : AddressType type;
78 :
79 : // Ideally these would be a in a union but because they have non-trivial
80 : // constructors they are not allowed.
81 : core::AbsoluteAddress abs_addr;
82 : core::FileOffsetAddress file_addr;
83 : core::RelativeAddress rel_addr;
84 :
85 : private:
86 : DISALLOW_COPY_AND_ASSIGN(BlockInfo);
87 : };
88 :
89 : } // anonymous namespace
90 :
91 : // Pretty prints a BlockInfo to an ostream. This has to be outside of any
92 : // namespaces so that operator<< is found properly.
93 i : std::ostream& operator<<(std::ostream& os, const BlockInfo& bi) {
94 : os << "Block(id=" << bi.block->id() << ", name=\"" << bi.block->name()
95 i : << "\", size=" << bi.block->size();
96 i : if (bi.type != BlockInfo::kNoAddress) {
97 i : os << ", address=";
98 i : switch (bi.type) {
99 : case BlockInfo::kAbsoluteAddress: {
100 i : os << bi.abs_addr;
101 i : break;
102 : }
103 : case BlockInfo::kFileOffsetAddress: {
104 i : os << bi.file_addr;
105 i : break;
106 : }
107 : case BlockInfo::kRelativeAddress: {
108 i : os << bi.rel_addr;
109 : break;
110 : }
111 : default: break;
112 : }
113 : }
114 i : os << ")";
115 i : return os;
116 i : }
117 :
118 : namespace pe {
119 :
120 : // An intermediate reference representation used while parsing PE blocks.
121 : // This is necessary because at that point we haven't yet chunked the whole
122 : // image into blocks thus some references cannot be resolved.
123 : struct NewDecomposer::IntermediateReference {
124 : RelativeAddress src_addr;
125 : BlockGraph::ReferenceType type;
126 : BlockGraph::Size size;
127 : RelativeAddress dst_addr;
128 : };
129 :
130 : namespace {
131 :
132 : using base::win::ScopedBstr;
133 : using base::win::ScopedComPtr;
134 : using block_graph::BlockGraph;
135 : using builder::Callback;
136 : using builder::Opt;
137 : using builder::Or;
138 : using builder::Seq;
139 : using builder::Star;
140 : using core::AbsoluteAddress;
141 : using core::RelativeAddress;
142 :
143 : typedef BlockGraph::Block Block;
144 : typedef BlockGraph::BlockType BlockType;
145 : typedef BlockGraph::Offset Offset;
146 : typedef BlockGraph::Reference Reference;
147 : typedef BlockGraph::ReferenceType ReferenceType;
148 : typedef core::AddressRange<RelativeAddress, size_t> RelativeRange;
149 : typedef NewDecomposer::IntermediateReference IntermediateReference;
150 : typedef NewDecomposer::IntermediateReferences IntermediateReferences;
151 : typedef pcrecpp::RE RE;
152 : typedef std::vector<OMAP> OMAPs;
153 : typedef std::vector<pdb::PdbFixup> PdbFixups;
154 :
155 : const char kJumpTable[] = "<jump-table>";
156 : const char kCaseTable[] = "<case-table>";
157 :
158 : // Some helper functions for testing ranges.
159 : template<typename T1, typename T2, typename T3>
160 E : bool InRange(T1 value, T2 lower_bound_incl, T3 length_excl) {
161 E : T1 upper_bound_excl = static_cast<T1>(lower_bound_incl) + length_excl;
162 : return static_cast<T1>(lower_bound_incl) <= value &&
163 E : value < static_cast<T2>(upper_bound_excl);
164 E : }
165 : template<typename T1, typename T2, typename T3>
166 E : bool InRangeIncl(T1 value, T2 lower_bound_incl, T3 length_incl) {
167 E : T1 upper_bound_incl = static_cast<T1>(lower_bound_incl) + length_incl;
168 : return static_cast<T1>(lower_bound_incl) <= value &&
169 E : value <= upper_bound_incl;
170 E : }
171 :
172 : bool InitializeDia(const PEFile& image_file,
173 : const FilePath& pdb_path,
174 : IDiaDataSource** dia_source,
175 : IDiaSession** dia_session,
176 E : IDiaSymbol** global) {
177 E : DCHECK(*dia_source == NULL);
178 E : DCHECK(*dia_session == NULL);
179 E : DCHECK(*global == NULL);
180 :
181 E : if (!CreateDiaSource(dia_source))
182 i : return false;
183 E : DCHECK(*dia_source != NULL);
184 :
185 : // We create the session using the PDB file directly, as we've already
186 : // validated that it matches the module.
187 E : if (!CreateDiaSession(pdb_path, *dia_source, dia_session))
188 i : return false;
189 E : DCHECK(*dia_session != NULL);
190 :
191 E : HRESULT hr = (*dia_session)->get_globalScope(global);
192 E : if (hr != S_OK) {
193 i : LOG(ERROR) << "Failed to get the DIA global scope: "
194 : << com::LogHr(hr) << ".";
195 i : return false;
196 : }
197 :
198 E : return true;
199 E : }
200 :
201 : enum SectionType {
202 : kSectionCode,
203 : kSectionData,
204 : kSectionUnknown
205 : };
206 :
207 : // Determines the type of a section based on its attributes. This is used to
208 : // tag blocks with an appropriate type.
209 E : SectionType GetSectionType(const IMAGE_SECTION_HEADER* header) {
210 E : DCHECK(header != NULL);
211 E : if ((header->Characteristics & IMAGE_SCN_CNT_CODE) != 0)
212 E : return kSectionCode;
213 E : if ((header->Characteristics & kReadOnlyDataCharacteristics) != 0)
214 E : return kSectionData;
215 i : return kSectionUnknown;
216 E : }
217 :
218 : // Given a compiland, returns its compiland details.
219 : bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
220 E : IDiaSymbol** compiland_details) {
221 E : DCHECK(compiland != NULL);
222 E : DCHECK(compiland_details != NULL);
223 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
224 E : DCHECK(*compiland_details == NULL);
225 :
226 : // Get the enumeration of compiland details.
227 E : ScopedComPtr<IDiaEnumSymbols> enum_symbols;
228 : HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
229 E : enum_symbols.Receive());
230 E : DCHECK_EQ(S_OK, hr);
231 :
232 : // We expect there to be compiland details. For compilands built by
233 : // non-standard toolchains, there usually aren't any.
234 E : LONG count = 0;
235 E : hr = enum_symbols->get_Count(&count);
236 E : DCHECK_EQ(S_OK, hr);
237 E : if (count == 0) {
238 : // We don't log here because we see this quite often.
239 i : return false;
240 : }
241 :
242 : // We do sometimes encounter more than one compiland detail. In fact, for
243 : // import and export tables we get one compiland detail per table entry.
244 : // They are all marked as having been generated by the linker, so using the
245 : // first one is sufficient.
246 :
247 : // Get the compiland details.
248 E : ULONG fetched = 0;
249 E : hr = enum_symbols->Next(1, compiland_details, &fetched);
250 E : DCHECK_EQ(S_OK, hr);
251 E : DCHECK_EQ(1u, fetched);
252 :
253 E : return true;
254 E : }
255 :
256 : // Stores information regarding known compilers.
257 : struct KnownCompilerInfo {
258 : wchar_t* compiler_name;
259 : bool supported;
260 : };
261 :
262 : // A list of known compilers, and their status as being supported or not.
263 : KnownCompilerInfo kKnownCompilerInfos[] = {
264 : { L"Microsoft (R) Macro Assembler", false },
265 : { L"Microsoft (R) Optimizing Compiler", true },
266 : { L"Microsoft (R) LINK", false }
267 : };
268 :
269 : // Given a compiland, determines whether the compiler used is one of those that
270 : // we whitelist.
271 E : bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
272 E : DCHECK(compiland != NULL);
273 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
274 :
275 E : ScopedComPtr<IDiaSymbol> compiland_details;
276 : if (!GetCompilandDetailsForCompiland(compiland,
277 E : compiland_details.Receive())) {
278 : // If the compiland has no compiland details we assume the compiler is not
279 : // supported.
280 i : ScopedBstr compiland_name;
281 i : if (compiland->get_name(compiland_name.Receive()) == S_OK) {
282 i : VLOG(1) << "Compiland has no compiland details: "
283 : << com::ToString(compiland_name);
284 : }
285 i : return false;
286 : }
287 E : DCHECK(compiland_details.get() != NULL);
288 :
289 : // Get the compiler name.
290 E : ScopedBstr compiler_name;
291 E : HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
292 E : DCHECK_EQ(S_OK, hr);
293 :
294 : // Check the compiler name against the list of known compilers.
295 E : for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
296 E : if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
297 E : return kKnownCompilerInfos[i].supported;
298 : }
299 E : }
300 :
301 : // Anything we don't explicitly know about is not supported.
302 E : VLOG(1) << "Encountered unknown compiler: " << compiler_name;
303 E : return false;
304 E : }
305 :
306 : // Adds an intermediate reference to the provided vector. The vector is
307 : // specified as the first parameter (in slight violation of our coding
308 : // standards) because this function is intended to be used by Bind.
309 : bool AddIntermediateReference(IntermediateReferences* references,
310 : RelativeAddress src_addr,
311 : ReferenceType type,
312 : BlockGraph::Size size,
313 E : RelativeAddress dst_addr) {
314 E : DCHECK(references != NULL);
315 E : IntermediateReference ref = { src_addr, type, size, dst_addr };
316 E : references->push_back(ref);
317 E : return true;
318 E : }
319 :
320 : // Create a reference as specified. Ignores existing references if they are of
321 : // the exact same type.
322 : bool CreateReference(RelativeAddress src_addr,
323 : BlockGraph::Size ref_size,
324 : ReferenceType ref_type,
325 : RelativeAddress base_addr,
326 : RelativeAddress dst_addr,
327 E : BlockGraph::AddressSpace* image) {
328 E : DCHECK(image != NULL);
329 :
330 : // Get the source block and offset, and ensure that the reference fits
331 : // within it.
332 E : Block* src_block = image->GetBlockByAddress(src_addr);
333 E : if (src_block == NULL) {
334 i : LOG(ERROR) << "Unable to find block for reference originating at "
335 : << src_addr << ".";
336 i : return false;
337 : }
338 E : RelativeAddress src_block_addr;
339 E : CHECK(image->GetAddressOf(src_block, &src_block_addr));
340 E : Offset src_block_offset = src_addr - src_block_addr;
341 E : if (src_block_offset + ref_size > src_block->size()) {
342 i : LOG(ERROR) << "Reference originating at " << src_addr
343 : << " extends beyond block \"" << src_block->name() << "\".";
344 i : return false;
345 : }
346 :
347 : // Get the destination block and offset.
348 E : Block* dst_block = image->GetBlockByAddress(base_addr);
349 E : if (dst_block == NULL) {
350 i : LOG(ERROR) << "Unable to find block for reference pointing at "
351 : << base_addr << ".";
352 i : return false;
353 : }
354 E : RelativeAddress dst_block_addr;
355 E : CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
356 E : Offset base = base_addr - dst_block_addr;
357 E : Offset offset = dst_addr - dst_block_addr;
358 :
359 E : Reference ref(ref_type, ref_size, dst_block, offset, base);
360 :
361 : // Check if a reference already exists at this offset.
362 : Block::ReferenceMap::const_iterator ref_it =
363 E : src_block->references().find(src_block_offset);
364 E : if (ref_it != src_block->references().end()) {
365 : // If an identical reference already exists then we're done.
366 E : if (ref == ref_it->second)
367 E : return true;
368 i : LOG(ERROR) << "Block \"" << src_block->name() << "\" has a conflicting "
369 : << "reference at offset " << src_block_offset << ".";
370 i : return false;
371 : }
372 :
373 E : CHECK(src_block->SetReference(src_block_offset, ref));
374 :
375 E : return true;
376 E : }
377 :
378 : // Loads FIXUP and OMAP_FROM debug streams.
379 : bool LoadDebugStreams(IDiaSession* dia_session,
380 : PdbFixups* pdb_fixups,
381 E : OMAPs* omap_from) {
382 E : DCHECK(dia_session != NULL);
383 E : DCHECK(pdb_fixups != NULL);
384 E : DCHECK(omap_from != NULL);
385 :
386 : // Load the fixups. These must exist.
387 : SearchResult search_result = FindAndLoadDiaDebugStreamByName(
388 E : kFixupDiaDebugStreamName, dia_session, pdb_fixups);
389 E : if (search_result != kSearchSucceeded) {
390 i : if (search_result == kSearchFailed) {
391 i : LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
392 : "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
393 : }
394 i : return false;
395 : }
396 :
397 : // Load the omap_from table. It is not necessary that one exist.
398 : search_result = FindAndLoadDiaDebugStreamByName(
399 E : kOmapFromDiaDebugStreamName, dia_session, omap_from);
400 E : if (search_result == kSearchErrored) {
401 i : LOG(ERROR) << "Error trying to read " << kOmapFromDiaDebugStreamName
402 : << " stream.";
403 i : return false;
404 : }
405 :
406 E : return true;
407 E : }
408 :
409 : bool GetFixupDestinationAndType(const PEFile& image_file,
410 : const pdb::PdbFixup& fixup,
411 : RelativeAddress* dst_addr,
412 E : ReferenceType* ref_type) {
413 E : DCHECK(dst_addr != NULL);
414 E : DCHECK(ref_type != NULL);
415 :
416 E : RelativeAddress src_addr(fixup.rva_location);
417 :
418 : // Get the destination address from the actual image itself. We only see
419 : // fixups for 32-bit references.
420 E : uint32 data = 0;
421 E : if (!image_file.ReadImage(src_addr, &data, sizeof(data))) {
422 i : LOG(ERROR) << "Unable to read image data for fixup with source address "
423 : << "at" << src_addr << ".";
424 i : return false;
425 : }
426 :
427 : // Translate this to a relative address.
428 E : switch (fixup.type) {
429 : case pdb::PdbFixup::TYPE_ABSOLUTE: {
430 E : *ref_type = BlockGraph::ABSOLUTE_REF;
431 E : AbsoluteAddress dst_addr_abs(data);
432 E : if (!image_file.Translate(dst_addr_abs, dst_addr)) {
433 i : LOG(ERROR) << "Unable to translate " << dst_addr_abs << ".";
434 i : return false;
435 : }
436 E : break;
437 : }
438 :
439 : case pdb::PdbFixup::TYPE_PC_RELATIVE: {
440 E : *ref_type = BlockGraph::PC_RELATIVE_REF;
441 E : *dst_addr = RelativeAddress(fixup.rva_location) + sizeof(data) + data;
442 E : break;
443 : }
444 :
445 : case pdb::PdbFixup::TYPE_RELATIVE: {
446 E : *ref_type = BlockGraph::RELATIVE_REF;
447 E : *dst_addr = RelativeAddress(data);
448 E : break;
449 : }
450 :
451 : default: {
452 i : LOG(ERROR) << "Unexpected fixup type (" << fixup.type << ").";
453 i : return false;
454 : }
455 : }
456 :
457 E : return true;
458 E : }
459 :
460 : // Creates references from the @p pdb_fixups (translating them via the
461 : // provided @p omap_from information if it is not empty), all while removing the
462 : // corresponding entries from @p reloc_set. If @p reloc_set is not empty after
463 : // this then the PDB fixups are out of sync with the image and we are unable to
464 : // safely decompose.
465 : //
466 : // @note This function deliberately ignores fixup information for the resource
467 : // section. This is because chrome.dll gets modified by a manifest tool
468 : // which doesn't update the FIXUPs in the corresponding PDB. They are thus
469 : // out of sync. Even if they were in sync this doesn't harm us as we have no
470 : // need to reach in and modify resource data.
471 : bool CreateReferencesFromFixupsImpl(
472 : const PEFile& image_file,
473 : const PdbFixups& pdb_fixups,
474 : const OMAPs& omap_from,
475 : PEFile::RelocSet* reloc_set,
476 E : BlockGraph::AddressSpace* image) {
477 E : DCHECK(reloc_set != NULL);
478 E : DCHECK(image != NULL);
479 :
480 E : bool have_omap = omap_from.size() != 0;
481 E : size_t fixups_used = 0;
482 :
483 : // The resource section in Chrome is modified post-link by a tool that adds a
484 : // manifest to it. This causes all of the fixups in the resource section (and
485 : // anything beyond it) to be invalid. As long as the resource section is the
486 : // last section in the image, this is not a problem (we can safely ignore the
487 : // .rsrc fixups, which we know how to parse without them). However, if there
488 : // is a section after the resource section, things will have been shifted
489 : // and potentially crucial fixups will be invalid.
490 : const IMAGE_SECTION_HEADER* rsrc_header = image_file.GetSectionHeader(
491 E : kResourceSectionName);
492 E : RelativeAddress rsrc_start(0xffffffff);
493 E : RelativeAddress rsrc_end(0xffffffff);
494 E : if (rsrc_header != NULL) {
495 E : rsrc_start = RelativeAddress(rsrc_header->VirtualAddress);
496 E : rsrc_end = rsrc_start + rsrc_header->Misc.VirtualSize;
497 : }
498 :
499 : // Ensure the fixups are all valid.
500 E : size_t skipped = 0;
501 E : for (size_t i = 0; i < pdb_fixups.size(); ++i) {
502 E : if (!pdb_fixups[i].ValidHeader()) {
503 i : LOG(ERROR) << "Unknown fixup header: "
504 : << StringPrintf("0x%08X.", pdb_fixups[i].header);
505 i : return false;
506 : }
507 :
508 : // For now, we skip any offset fixups. We've only seen this in the context
509 : // of TLS data access, and we don't mess with TLS structures.
510 E : if (pdb_fixups[i].is_offset())
511 E : continue;
512 :
513 : // All fixups we handle should be full size pointers.
514 E : DCHECK_EQ(Reference::kMaximumSize, pdb_fixups[i].size());
515 :
516 : // Get the original addresses, and map them through OMAP information.
517 : // Normally DIA takes care of this for us, but there is no API for
518 : // getting DIA to give us FIXUP information, so we have to do it manually.
519 E : RelativeAddress src_addr(pdb_fixups[i].rva_location);
520 E : RelativeAddress base_addr(pdb_fixups[i].rva_base);
521 E : if (have_omap) {
522 i : src_addr = pdb::TranslateAddressViaOmap(omap_from, src_addr);
523 i : base_addr = pdb::TranslateAddressViaOmap(omap_from, base_addr);
524 : }
525 :
526 : // If the reference originates beyond the .rsrc section then we can't
527 : // trust it.
528 E : if (src_addr >= rsrc_end) {
529 i : LOG(ERROR) << "Found fixup originating beyond .rsrc section.";
530 i : return false;
531 : }
532 :
533 : // If the reference originates from a part of the .rsrc section, ignore it.
534 E : if (src_addr >= rsrc_start)
535 E : continue;
536 :
537 : // Get the destination address of the fixup. This logs verbosely for us.
538 E : RelativeAddress dst_addr;
539 E : ReferenceType type = BlockGraph::RELATIVE_REF;
540 : if (!GetFixupDestinationAndType(image_file, pdb_fixups[i], &dst_addr,
541 E : &type)) {
542 i : return false;
543 : }
544 :
545 : // Finally, create the reference. This logs verbosely for us on failure.
546 : if (!CreateReference(src_addr, Reference::kMaximumSize, type, base_addr,
547 E : dst_addr, image)) {
548 i : return false;
549 : }
550 :
551 : // Remove this reference from the relocs.
552 E : PEFile::RelocSet::iterator reloc_it = reloc_set->find(src_addr);
553 E : if (reloc_it != reloc_set->end()) {
554 : // We should only find a reloc if the fixup was of absolute type.
555 E : if (type != BlockGraph::ABSOLUTE_REF) {
556 i : LOG(ERROR) << "Found a reloc corresponding to a non-absolute fixup.";
557 i : return false;
558 : }
559 :
560 E : reloc_set->erase(reloc_it);
561 : }
562 :
563 E : ++fixups_used;
564 E : }
565 :
566 E : LOG(INFO) << "Used " << fixups_used << " of " << pdb_fixups.size() << ".";
567 :
568 E : return true;
569 E : }
570 :
571 E : bool GetDataSymbolSize(IDiaSymbol* symbol, size_t* length) {
572 E : DCHECK(symbol != NULL);
573 E : DCHECK(length != NULL);
574 :
575 E : *length = 0;
576 E : ScopedComPtr<IDiaSymbol> type;
577 E : HRESULT hr = symbol->get_type(type.Receive());
578 : // This happens if the symbol has no type information.
579 E : if (hr == S_FALSE)
580 E : return true;
581 E : if (hr != S_OK) {
582 i : LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
583 i : return false;
584 : }
585 :
586 E : ULONGLONG ull_length = 0;
587 E : hr = type->get_length(&ull_length);
588 E : if (hr != S_OK) {
589 i : LOG(ERROR) << "Failed to retrieve type length properties: "
590 : << com::LogHr(hr) << ".";
591 i : return false;
592 : }
593 E : DCHECK_LE(ull_length, 0xFFFFFFFF);
594 E : *length = static_cast<size_t>(ull_length);
595 :
596 E : return true;
597 E : }
598 :
599 : bool ScopeSymTagToLabelProperties(enum SymTagEnum sym_tag,
600 : size_t scope_count,
601 : BlockGraph::LabelAttributes* attr,
602 E : std::string* name) {
603 E : DCHECK(attr != NULL);
604 E : DCHECK(name != NULL);
605 :
606 E : switch (sym_tag) {
607 : case SymTagFuncDebugStart: {
608 E : *attr = BlockGraph::DEBUG_START_LABEL;
609 E : *name = "<debug-start>";
610 E : return true;
611 : }
612 : case SymTagFuncDebugEnd: {
613 E : *attr = BlockGraph::DEBUG_END_LABEL;
614 E : *name = "<debug-end>";
615 E : return true;
616 : }
617 : case SymTagBlock: {
618 E : *attr = BlockGraph::SCOPE_START_LABEL;
619 E : *name = base::StringPrintf("<scope-start-%d>", scope_count);
620 E : return true;
621 : }
622 : default:
623 i : return false;
624 : }
625 i : return false;
626 E : }
627 :
628 : bool AddLabelToBlock(Offset offset,
629 : const base::StringPiece& name,
630 : BlockGraph::LabelAttributes label_attributes,
631 E : Block* block) {
632 E : DCHECK(block != NULL);
633 :
634 : // It is possible for labels to be attached to the first byte past a block
635 : // (things like debug end, scope end, etc). It is up to the caller to be more
636 : // strict about the offset if need be.
637 E : DCHECK_LE(0, offset);
638 E : DCHECK_LE(offset, static_cast<Offset>(block->size()));
639 :
640 : // Try to create the label.
641 E : if (block->SetLabel(offset, name, label_attributes))
642 E : return true;
643 :
644 : // If we get here there's an already existing label. Update it.
645 E : BlockGraph::Label label;
646 E : CHECK(block->GetLabel(offset, &label));
647 :
648 : // Merge the names if this isn't a repeated name.
649 E : std::string name_str = name.as_string();
650 E : std::string new_name = label.name();
651 E : std::vector<std::string> names;
652 : base::SplitStringUsingSubstr(label.name(), NewDecomposer::kLabelNameSep,
653 E : &names);
654 E : if (std::find(names.begin(), names.end(), name_str) == names.end()) {
655 E : names.push_back(name_str);
656 E : new_name.append(NewDecomposer::kLabelNameSep);
657 E : new_name.append(name_str);
658 : }
659 :
660 : // Merge the attributes.
661 : BlockGraph::LabelAttributes new_label_attr = label.attributes() |
662 E : label_attributes;
663 :
664 : // Update the label.
665 E : label = BlockGraph::Label(new_name, new_label_attr);
666 E : CHECK(block->RemoveLabel(offset));
667 E : CHECK(block->SetLabel(offset, label));
668 :
669 E : return true;
670 E : }
671 :
672 : // Reads the linker module symbol stream from the given PDB file. This should
673 : // always exist as the last module.
674 : scoped_refptr<pdb::PdbStream> GetLinkerSymbolStream(
675 E : const pdb::PdbFile& pdb_file) {
676 : static const char kLinkerModuleName[] = "* Linker *";
677 :
678 : scoped_refptr<pdb::PdbStream> dbi_stream =
679 E : pdb_file.GetStream(pdb::kDbiStream);
680 E : if (dbi_stream.get() == NULL) {
681 i : LOG(ERROR) << "PDB does not contain a DBI stream.";
682 i : return false;
683 : }
684 :
685 E : pdb::DbiStream dbi;
686 E : if (!dbi.Read(dbi_stream.get())) {
687 i : LOG(ERROR) << "Unable to parse DBI stream.";
688 i : return false;
689 : }
690 :
691 E : if (dbi.modules().empty()) {
692 i : LOG(ERROR) << "DBI stream contains no modules.";
693 i : return false;
694 : }
695 :
696 : // The last module has always been observed to be the linker module.
697 E : const pdb::DbiModuleInfo& linker = dbi.modules().back();
698 E : if (linker.module_name() != kLinkerModuleName) {
699 i : LOG(ERROR) << "Last module is not the linker module.";
700 i : return false;
701 : }
702 :
703 : scoped_refptr<pdb::PdbStream> symbols = pdb_file.GetStream(
704 E : linker.module_info_base().stream);
705 E : if (symbols.get() == NULL) {
706 i : LOG(ERROR) << "Unable to open linker symbol stream.";
707 i : return false;
708 : }
709 :
710 E : return symbols;
711 E : }
712 :
713 : // Parses a symbol from a PDB symbol stream. The @p buffer is populated with the
714 : // data and upon success this returns the symbol directly cast onto the
715 : // @p buffer data. On failure this returns NULL.
716 : template<typename SymbolType>
717 : const SymbolType* ParseSymbol(uint16 symbol_length,
718 : pdb::PdbStream* stream,
719 E : std::vector<uint8>* buffer) {
720 E : DCHECK(stream != NULL);
721 E : DCHECK(buffer != NULL);
722 :
723 E : buffer->clear();
724 :
725 E : if (symbol_length < sizeof(SymbolType)) {
726 i : LOG(ERROR) << "Symbol too small for casting.";
727 i : return NULL;
728 : }
729 :
730 E : if (!stream->Read(buffer, symbol_length)) {
731 i : LOG(ERROR) << "Failed to read symbol.";
732 i : return NULL;
733 : }
734 :
735 E : return reinterpret_cast<const SymbolType*>(buffer->data());
736 E : }
737 :
738 : bool VisitNonControlFlowInstruction(const _DInst& instr,
739 : AbsoluteAddress block_addr,
740 : AbsoluteAddress instr_addr,
741 E : Block* block) {
742 E : DCHECK_NE(0u, block_addr.value());
743 E : DCHECK_NE(0u, instr_addr.value());
744 E : DCHECK_LE(block_addr, instr_addr);
745 E : DCHECK(block != NULL);
746 :
747 : // TODO(chrisha): We could walk the operands and follow references
748 : // explicitly. If any of them are of reference type and there's no
749 : // matching reference, this would be cause to blow up and die (we
750 : // should get all of these as relocs and/or fixups).
751 :
752 E : Offset instr_offset = instr_addr - block_addr;
753 : Block::ReferenceMap::const_iterator ref_it =
754 E : block->references().upper_bound(instr_offset);
755 : Block::ReferenceMap::const_iterator ref_end =
756 E : block->references().lower_bound(instr_offset + instr.size);
757 :
758 E : for (; ref_it != ref_end; ++ref_it) {
759 E : const Block* ref_block = ref_it->second.referenced();
760 :
761 : // We only care about inter-block references.
762 E : if (ref_block == block)
763 E : continue;
764 :
765 : // There should be no cross-block references to the middle of other
766 : // code blocks (to the top is fine, as we could be passing around a
767 : // function pointer). The exception is if the remote block is not
768 : // generated by cl.exe. In this case, there could be arbitrary labels
769 : // that act like functions within the body of that block, and referring
770 : // to them is perfectly fine.
771 : if (ref_block->type() == BlockGraph::CODE_BLOCK &&
772 : ref_it->second.base() != 0 &&
773 E : (block->attributes() & BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER)) {
774 i : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
775 i : LOG(WARNING) << "Found a non-control-flow code-block to "
776 : << "middle-of-code-block reference from "
777 : << BlockInfo(block, block_addr) << " to "
778 : << BlockInfo(ref_block) << ".";
779 i : return true;
780 : }
781 E : }
782 :
783 E : return true;
784 E : }
785 :
786 : bool VisitPcRelativeControlFlowInstruction(bool create_missing_refs,
787 : const _DInst& instr,
788 : AbsoluteAddress image_addr,
789 : AbsoluteAddress block_addr,
790 : AbsoluteAddress instr_addr,
791 : BlockGraph::AddressSpace* image,
792 E : Block* block) {
793 E : DCHECK_NE(0u, image_addr.value());
794 E : DCHECK_NE(0u, block_addr.value());
795 E : DCHECK_NE(0u, instr_addr.value());
796 E : DCHECK_LT(image_addr, block_addr);
797 E : DCHECK_LE(block_addr, instr_addr);
798 E : DCHECK(image != NULL);
799 E : DCHECK(block != NULL);
800 :
801 E : int fc = META_GET_FC(instr.meta);
802 E : DCHECK(fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH);
803 E : DCHECK_EQ(O_PC, instr.ops[0].type);
804 E : DCHECK_EQ(O_NONE, instr.ops[1].type);
805 E : DCHECK_EQ(O_NONE, instr.ops[2].type);
806 E : DCHECK_EQ(O_NONE, instr.ops[3].type);
807 : DCHECK(instr.ops[0].size == 8 ||
808 : instr.ops[0].size == 16 ||
809 E : instr.ops[0].size == 32);
810 :
811 : // Distorm gives us size in bits, we want bytes.
812 E : BlockGraph::Size size = instr.ops[0].size / 8;
813 :
814 : // Get the reference's address. Note we assume it's in the instruction's
815 : // tail end - I don't know of a case where a PC-relative offset in a branch
816 : // or call is not the very last thing in an x86 instruction.
817 E : AbsoluteAddress abs_src = instr_addr + instr.size - size;
818 : AbsoluteAddress abs_dst = instr_addr + instr.size +
819 E : static_cast<size_t>(instr.imm.addr);
820 E : RelativeAddress rel_dst(abs_dst.value() - image_addr.value());
821 E : Offset offset_src = abs_src - block_addr;
822 :
823 E : Block* dst_block = block;
824 E : RelativeAddress dst_block_addr(block_addr.value() - image_addr.value());
825 :
826 : // Is the reference to something outside this block?
827 E : if (abs_dst < block_addr || abs_dst >= block_addr + block->size()) {
828 : // Short PC-relative references should be to this block, otherwise this
829 : // block is not MSVC-like.
830 E : if (size < Reference::kMaximumSize) {
831 i : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
832 i : Offset offset_instr = instr_addr - block_addr;
833 i : LOG(WARNING) << "Found a " << size << "-byte PC-relative instruction to "
834 : << "an external " << abs_dst << " at offset "
835 : << offset_instr << " of " << BlockInfo(block, block_addr)
836 : << ".";
837 i : return true;
838 i : } else {
839 : // Long PC-relative references to other blocks should have been given to
840 : // us via FIXUPs, otherwise we risk breaking the world when moving blocks
841 : // around!
842 E : if (block->references().find(offset_src) == block->references().end()) {
843 i : LOG(ERROR) << "Missing fixup for a " << size << "-byte PC-relative "
844 : << "reference to " << abs_dst << " at offset "
845 : << offset_src << " of " << BlockInfo(block, block_addr)
846 : << ".";
847 i : return false;
848 : }
849 : }
850 :
851 : // Find the destination block and its address.
852 E : dst_block = image->GetContainingBlock(rel_dst, 1);
853 E : CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
854 E : if (dst_block == NULL) {
855 i : LOG(ERROR) << "Found a " << size << "-byte PC-relative reference to a "
856 : << abs_dst << " outside of the image at offset "
857 : << offset_src << " of " << BlockInfo(block, block_addr) << ".";
858 i : return false;
859 : }
860 : }
861 :
862 : // Create the missing reference if need be. These are found by basic-block
863 : // disassembly so aren't strictly needed, but are useful debug information.
864 E : if (!create_missing_refs)
865 E : return true;
866 :
867 E : Offset offset_dst = rel_dst - dst_block_addr;
868 : Reference ref(BlockGraph::PC_RELATIVE_REF, size, dst_block, offset_dst,
869 E : offset_dst);
870 E : block->SetReference(offset_src, ref);
871 :
872 E : return true;
873 E : }
874 :
875 : bool VisitInstruction(bool create_missing_refs,
876 : const _DInst& instr,
877 : AbsoluteAddress image_addr,
878 : AbsoluteAddress block_addr,
879 : AbsoluteAddress instr_addr,
880 : BlockGraph::AddressSpace* image,
881 E : Block* block) {
882 E : DCHECK_NE(0u, image_addr.value());
883 E : DCHECK_NE(0u, block_addr.value());
884 E : DCHECK_NE(0u, instr_addr.value());
885 E : DCHECK_LT(image_addr, block_addr);
886 E : DCHECK_LE(block_addr, instr_addr);
887 E : DCHECK(image != NULL);
888 E : DCHECK(block != NULL);
889 :
890 E : int fc = META_GET_FC(instr.meta);
891 :
892 E : if (fc == FC_NONE) {
893 : return VisitNonControlFlowInstruction(
894 E : instr, block_addr, instr_addr, block);
895 : }
896 :
897 : if ((fc == FC_UNC_BRANCH || fc == FC_CALL || fc == FC_CND_BRANCH) &&
898 E : instr.ops[0].type == O_PC) {
899 : return VisitPcRelativeControlFlowInstruction(create_missing_refs,
900 E : instr, image_addr, block_addr, instr_addr, image, block);
901 : }
902 :
903 E : return true;
904 E : }
905 :
906 : bool DisassembleCodeBlockAndLabelData(bool create_missing_refs,
907 : AbsoluteAddress image_addr,
908 : AbsoluteAddress block_addr,
909 : BlockGraph::AddressSpace* image,
910 E : Block* block) {
911 E : DCHECK(image != NULL);
912 E : DCHECK(block != NULL);
913 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
914 :
915 : // We simultaneously walk through the block's references while disassembling
916 : // instructions. This is used to determine when (if) data starts. MSVC
917 : // always places jump tables first, which consist of absolute references.
918 E : const Block::ReferenceMap& ref_map(block->references());
919 E : Block::ReferenceMap::const_iterator ref_it = ref_map.begin();
920 :
921 : // We keep track of any self-references. If the block contains data these
922 : // are used as beginning points of tables. We rely on the sorted nature of
923 : // std::set when using these later on.
924 E : std::set<Offset> self_refs;
925 :
926 E : const uint8* data = block->data();
927 E : const uint8* data_end = block->data() + block->data_size();
928 :
929 : // If some of the data in this block is implicit then make it explicit for
930 : // ease of decoding.
931 E : std::vector<uint8> data_copy;
932 E : if (block->data_size() < block->size()) {
933 i : data_copy.resize(block->size(), 0);
934 i : ::memcpy(data_copy.data(), block->data(), block->data_size());
935 i : data = data_copy.data();
936 i : data_end = data + data_copy.size();
937 : }
938 :
939 : // Decode instructions one by one.
940 E : AbsoluteAddress addr(block_addr);
941 E : Offset offset = 0;
942 E : while (true) {
943 : // Stop the disassembly if we're at the end of the data.
944 E : if (data == data_end)
945 E : return true;
946 :
947 E : if (ref_it != ref_map.end()) {
948 : // Step past any references.
949 E : while (ref_it != ref_map.end() && ref_it->first < offset)
950 E : ++ref_it;
951 :
952 : // Stop the disassembly if the next byte is data. Namely, it coincides
953 : // with a reference.
954 E : if (ref_it->first == offset)
955 E : break;
956 : }
957 :
958 : // If we can't decode an instruction then we mark the block as not safe
959 : // for disassembly.
960 E : _DInst inst = { 0 };
961 : if (!core::DecodeOneInstruction(addr.value(), data, data_end - data,
962 E : &inst)) {
963 i : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
964 i : VLOG(1) << "Unable to decode instruction at offset " << offset
965 : << " of " << BlockInfo(block, block_addr) << ".";
966 i : return true;
967 : }
968 :
969 : // Visit the instruction itself. This validates that the instruction is of
970 : // a type we expect to encounter, and may also cause internal references to
971 : // be created.
972 : if (!VisitInstruction(create_missing_refs, inst, image_addr, block_addr,
973 E : addr, image, block)) {
974 i : return false;
975 : }
976 :
977 : // Step past the instruction.
978 E : addr += inst.size;
979 E : data += inst.size;
980 E : offset += inst.size;
981 :
982 : // References to data are by absolute pointer, for which we always receive
983 : // a reloc/fixup, thus no need to parse the instruction. Moreover, ref_it
984 : // points to the first reference after the beginning of the instruction at
985 : // this point.
986 E : if (ref_it != ref_map.end() && ref_it->first < offset) {
987 : // The reference should be wholly contained in the instruction.
988 E : if (static_cast<Offset>(ref_it->first + ref_it->second.size()) > offset) {
989 i : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
990 i : VLOG(1) << "Unexpected reference in instruction at offset "
991 : << ref_it->first << " of " << BlockInfo(block, block_addr)
992 : << ".";
993 i : return true;
994 : }
995 :
996 : // Store self-references to locations beyond our current cursor.
997 : if (ref_it->second.referenced() == block &&
998 E : ref_it->second.offset() > offset) {
999 E : self_refs.insert(ref_it->second.offset());
1000 : }
1001 :
1002 E : ++ref_it;
1003 : }
1004 E : }
1005 :
1006 : // If we get here then we've encountered data. We need to label data
1007 : // sections as appropriate.
1008 :
1009 E : bool data_label_added = false;
1010 E : Offset end_of_code_offset = offset;
1011 :
1012 E : std::set<Offset>::const_iterator off_it = self_refs.begin();
1013 E : for (; off_it != self_refs.end(); ++off_it) {
1014 E : Offset referred_offset = *off_it;
1015 :
1016 : // References to data must be beyond the decoded instructions.
1017 E : if (referred_offset < end_of_code_offset)
1018 E : continue;
1019 :
1020 : // Determine if this offset points at another reference.
1021 E : bool ref_at_offset = false;
1022 E : if (ref_it != ref_map.end()) {
1023 : // Step past any references.
1024 E : while (ref_it != ref_map.end() && ref_it->first < referred_offset)
1025 E : ++ref_it;
1026 :
1027 : // Stop the disassembly if the next byte is data. Namely, it coincides
1028 : // with a reference.
1029 E : if (ref_it->first == referred_offset)
1030 E : ref_at_offset = true;
1031 : }
1032 :
1033 : // Build and set the data label.
1034 E : BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1035 E : const char* name = NULL;
1036 E : if (ref_at_offset) {
1037 E : name = kJumpTable;
1038 E : attr |= BlockGraph::JUMP_TABLE_LABEL;
1039 E : } else {
1040 E : name = kCaseTable;
1041 E : attr |= BlockGraph::CASE_TABLE_LABEL;
1042 : }
1043 E : if (!AddLabelToBlock(referred_offset, name, attr, block))
1044 i : return false;
1045 E : data_label_added = true;
1046 E : }
1047 :
1048 E : if (!data_label_added) {
1049 i : block->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
1050 i : VLOG(1) << "Disassembled into data but found no references to it for "
1051 : << BlockInfo(block, block_addr) << ".";
1052 i : return true;
1053 : }
1054 :
1055 E : return true;
1056 E : }
1057 :
1058 : bool JumpAndCaseTableAlreadyLabelled(const Block* block,
1059 : Offset offset,
1060 E : BlockGraph::LabelAttributes attr) {
1061 E : DCHECK(block != NULL);
1062 :
1063 : // We can't say anything about blocks that we were not able to disassemble.
1064 E : if (block->attributes() & BlockGraph::ERRORED_DISASSEMBLY)
1065 i : return true;
1066 :
1067 E : BlockGraph::Label label;
1068 E : if (!block->GetLabel(offset, &label)) {
1069 i : LOG(ERROR) << "Expected data label at offset " << offset << " of "
1070 : << BlockInfo(block) << ".";
1071 i : return false;
1072 : }
1073 :
1074 E : if ((label.attributes() & attr) == attr)
1075 E : return true;
1076 :
1077 i : LOG(ERROR) << "Label at offset " << offset << " of " << BlockInfo(block)
1078 : << " has attributes "
1079 : << BlockGraph::BlockAttributesToString(block->attributes())
1080 : << " but expected at least "
1081 : << BlockGraph::BlockAttributesToString(attr) << ".";
1082 :
1083 i : return false;
1084 E : }
1085 :
1086 : } // namespace
1087 :
1088 : // We use ", " as a separator between symbol names. We sometimes see commas
1089 : // in symbol names but do not see whitespace. Thus, this provides a useful
1090 : // separator that is also human friendly to read.
1091 : const char NewDecomposer::kLabelNameSep[] = ", ";
1092 :
1093 : // This is by CreateBlocksFromCoffGroups to communicate shared state to
1094 : // VisitLinkerSymbol via the VisitSymbols helper function.
1095 : struct NewDecomposer::VisitLinkerSymbolContext {
1096 : int current_group_index;
1097 : std::string current_group_prefix;
1098 : RelativeAddress current_group_start;
1099 :
1100 : // These are the set of patterns that indicate bracketing groups. They
1101 : // should match both the opening and the closing symbol, and have at least
1102 : // one match group returning the common prefix.
1103 : std::vector<RE> bracketing_groups;
1104 :
1105 E : VisitLinkerSymbolContext() : current_group_index(-1) {
1106 : // Matches groups like: .CRT$XCA -> .CRT$XCZ
1107 E : bracketing_groups.push_back(RE("(\\.CRT\\$X.)[AZ]"));
1108 : // Matches groups like: .rtc$IAA -> .rtc$IZZ
1109 E : bracketing_groups.push_back(RE("(\\.rtc\\$.*)(AA|ZZ)"));
1110 : // Matches exactly: ATL$__a -> ATL$__z
1111 E : bracketing_groups.push_back(RE("(ATL\\$__)[az]"));
1112 : // Matches exactly: .tls -> .tls$ZZZ
1113 E : bracketing_groups.push_back(RE("(\\.tls)(\\$ZZZ)?"));
1114 E : }
1115 :
1116 : private:
1117 : DISALLOW_COPY_AND_ASSIGN(VisitLinkerSymbolContext);
1118 : };
1119 :
1120 : NewDecomposer::NewDecomposer(const PEFile& image_file)
1121 : : image_file_(image_file), parse_debug_info_(true), image_layout_(NULL),
1122 E : image_(NULL), current_block_(NULL), current_scope_count_(0) {
1123 E : }
1124 :
1125 E : bool NewDecomposer::Decompose(ImageLayout* image_layout) {
1126 E : DCHECK(image_layout != NULL);
1127 :
1128 : // The temporaries should be NULL.
1129 E : DCHECK(image_layout_ == NULL);
1130 E : DCHECK(image_ == NULL);
1131 :
1132 : // We start by finding the PDB path.
1133 E : if (!FindAndValidatePdbPath())
1134 E : return false;
1135 E : DCHECK(!pdb_path_.empty());
1136 :
1137 : // Load the serialized block-graph from the PDB if it exists. This allows
1138 : // round-trip decomposition.
1139 E : bool stream_exists = false;
1140 : if (LoadBlockGraphFromPdb(
1141 E : pdb_path_, image_file_, image_layout, &stream_exists)) {
1142 E : return true;
1143 E : } else if (stream_exists) {
1144 : // If the stream exists but hasn't been loaded we return an error. At this
1145 : // point an error message has already been logged if there was one.
1146 i : return false;
1147 : }
1148 :
1149 : // At this point a full decomposition needs to be performed.
1150 E : image_layout_ = image_layout;
1151 E : image_ = &(image_layout->blocks);
1152 E : bool success = DecomposeImpl();
1153 E : image_layout_ = NULL;
1154 E : image_ = NULL;
1155 :
1156 E : return success;
1157 E : }
1158 :
1159 E : bool NewDecomposer::FindAndValidatePdbPath() {
1160 : // Manually find the PDB path if it is not specified.
1161 E : if (pdb_path_.empty()) {
1162 : if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
1163 E : pdb_path_.empty()) {
1164 i : LOG(ERROR) << "Unable to find PDB file for module: "
1165 : << image_file_.path().value();
1166 i : return false;
1167 : }
1168 : }
1169 E : DCHECK(!pdb_path_.empty());
1170 :
1171 E : if (!file_util::PathExists(pdb_path_)) {
1172 E : LOG(ERROR) << "Path not found: " << pdb_path_.value();
1173 E : return false;
1174 : }
1175 :
1176 E : if (!pe::PeAndPdbAreMatched(image_file_.path(), pdb_path_)) {
1177 i : LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
1178 : << "module \"" << image_file_.path().value() << "\".";
1179 i : return false;
1180 : }
1181 :
1182 E : return true;
1183 E : }
1184 :
1185 : bool NewDecomposer::LoadBlockGraphFromPdbStream(
1186 : const PEFile& image_file,
1187 : pdb::PdbStream* block_graph_stream,
1188 E : ImageLayout* image_layout) {
1189 E : DCHECK(block_graph_stream != NULL);
1190 E : DCHECK(image_layout != NULL);
1191 E : LOG(INFO) << "Reading block-graph and image layout from the PDB.";
1192 :
1193 : // Initialize an input archive pointing to the stream.
1194 E : scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
1195 E : if (!byte_stream->Init(block_graph_stream))
1196 i : return false;
1197 E : DCHECK(byte_stream.get() != NULL);
1198 :
1199 E : core::ScopedInStreamPtr pdb_in_stream;
1200 : pdb_in_stream.reset(core::CreateByteInStream(
1201 E : byte_stream->data(), byte_stream->data() + byte_stream->length()));
1202 :
1203 : // Read the header.
1204 E : uint32 stream_version = 0;
1205 E : unsigned char compressed = 0;
1206 : if (!pdb_in_stream->Read(sizeof(stream_version),
1207 : reinterpret_cast<core::Byte*>(&stream_version)) ||
1208 : !pdb_in_stream->Read(sizeof(compressed),
1209 E : reinterpret_cast<core::Byte*>(&compressed))) {
1210 i : LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
1211 i : return false;
1212 : }
1213 :
1214 : // Check the stream version.
1215 E : if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
1216 E : LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
1217 : << " version (got " << stream_version << ", expected "
1218 : << pdb::kSyzygyBlockGraphStreamVersion << ").";
1219 E : return false;
1220 : }
1221 :
1222 : // If the stream is compressed insert the decompression filter.
1223 E : core::InStream* in_stream = pdb_in_stream.get();
1224 E : scoped_ptr<core::ZInStream> zip_in_stream;
1225 E : if (compressed != 0) {
1226 E : zip_in_stream.reset(new core::ZInStream(in_stream));
1227 E : if (!zip_in_stream->Init()) {
1228 i : LOG(ERROR) << "Unable to initialize ZInStream.";
1229 i : return false;
1230 : }
1231 E : in_stream = zip_in_stream.get();
1232 : }
1233 :
1234 : // Deserialize the image-layout.
1235 E : core::NativeBinaryInArchive in_archive(in_stream);
1236 E : block_graph::BlockGraphSerializer::Attributes attributes = 0;
1237 : if (!LoadBlockGraphAndImageLayout(
1238 E : image_file, &attributes, image_layout, &in_archive)) {
1239 i : LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
1240 i : return false;
1241 : }
1242 :
1243 E : return true;
1244 E : }
1245 :
1246 : bool NewDecomposer::LoadBlockGraphFromPdb(const FilePath& pdb_path,
1247 : const PEFile& image_file,
1248 : ImageLayout* image_layout,
1249 E : bool* stream_exists) {
1250 E : DCHECK(image_layout != NULL);
1251 E : DCHECK(stream_exists != NULL);
1252 :
1253 E : pdb::PdbFile pdb_file;
1254 E : pdb::PdbReader pdb_reader;
1255 E : if (!pdb_reader.Read(pdb_path, &pdb_file)) {
1256 i : LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
1257 : << "\".";
1258 i : return NULL;
1259 : }
1260 :
1261 : // Try to get the block-graph stream from the PDB.
1262 E : scoped_refptr<pdb::PdbStream> block_graph_stream;
1263 : if (!pdb::LoadNamedStreamFromPdbFile(pdb::kSyzygyBlockGraphStreamName,
1264 : &pdb_file,
1265 : &block_graph_stream) ||
1266 E : block_graph_stream.get() == NULL) {
1267 E : *stream_exists = false;
1268 E : return false;
1269 : }
1270 E : if (block_graph_stream->length() == 0) {
1271 i : *stream_exists = false;
1272 i : LOG(WARNING) << "The block-graph stream is empty, ignoring it.";
1273 i : return false;
1274 : }
1275 :
1276 : // The PDB contains a block-graph stream, the block-graph and the image layout
1277 : // will be read from this stream.
1278 E : *stream_exists = true;
1279 : if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
1280 E : image_layout)) {
1281 i : return false;
1282 : }
1283 :
1284 E : return true;
1285 E : }
1286 :
1287 E : bool NewDecomposer::DecomposeImpl() {
1288 : // Instantiate and initialize our Debug Interface Access session. This logs
1289 : // verbosely for us.
1290 E : ScopedComPtr<IDiaDataSource> dia_source;
1291 E : ScopedComPtr<IDiaSession> dia_session;
1292 E : ScopedComPtr<IDiaSymbol> global;
1293 : if (!InitializeDia(image_file_, pdb_path_, dia_source.Receive(),
1294 E : dia_session.Receive(), global.Receive())) {
1295 i : return false;
1296 : }
1297 :
1298 : // Copy the image headers to the layout.
1299 : CopySectionHeadersToImageLayout(
1300 : image_file_.nt_headers()->FileHeader.NumberOfSections,
1301 : image_file_.section_headers(),
1302 E : &(image_layout_->sections));
1303 :
1304 : // Create the sections in the underlying block-graph.
1305 E : if (!CreateBlockGraphSections())
1306 i : return false;
1307 :
1308 : // We scope the first few operations so that we don't keep the intermediate
1309 : // references around any longer than we have to.
1310 : {
1311 E : IntermediateReferences references;
1312 :
1313 : // First we parse out the PE blocks.
1314 E : if (!CreatePEImageBlocksAndReferences(&references))
1315 i : return false;
1316 :
1317 : // Now we parse the COFF group symbols from the linker's symbol stream.
1318 : // These indicate things like static initializers, which must stay together
1319 : // in a single block.
1320 E : if (!CreateBlocksFromCoffGroups())
1321 i : return false;
1322 :
1323 : // Next we parse out section contributions. Some of these may coincide with
1324 : // existing PE parsed blocks, but when they do we expect them to be exact
1325 : // collisions.
1326 E : if (!CreateBlocksFromSectionContribs(dia_session.get()))
1327 i : return false;
1328 :
1329 : // Flesh out the rest of the image with gap blocks.
1330 E : if (!CreateGapBlocks())
1331 i : return false;
1332 :
1333 : // Finalize the PE-parsed intermediate references.
1334 E : if (!FinalizeIntermediateReferences(references))
1335 i : return false;
1336 E : }
1337 :
1338 : // Parse the fixups and use them to create references.
1339 E : if (!CreateReferencesFromFixups(dia_session.get()))
1340 i : return false;
1341 :
1342 : // Disassemble code blocks and use the results to infer case and jump tables.
1343 E : if (!DisassembleCodeBlocksAndLabelData())
1344 i : return false;
1345 :
1346 : // Annotate the block-graph with symbol information.
1347 E : if (parse_debug_info_ && !ProcessSymbols(global.get()))
1348 i : return false;
1349 :
1350 E : return true;
1351 E : }
1352 :
1353 E : bool NewDecomposer::CreateBlockGraphSections() {
1354 : // Iterate through the image sections, and create sections in the BlockGraph.
1355 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1356 E : for (size_t i = 0; i < num_sections; ++i) {
1357 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1358 E : std::string name = pe::PEFile::GetSectionName(*header);
1359 : BlockGraph::Section* section = image_->graph()->AddSection(
1360 E : name, header->Characteristics);
1361 E : DCHECK(section != NULL);
1362 :
1363 : // For now, we expect them to have been created with the same IDs as those
1364 : // in the original image.
1365 E : if (section->id() != i) {
1366 i : LOG(ERROR) << "Unexpected section ID.";
1367 i : return false;
1368 : }
1369 E : }
1370 :
1371 E : return true;
1372 E : }
1373 :
1374 : bool NewDecomposer::CreatePEImageBlocksAndReferences(
1375 E : IntermediateReferences* references) {
1376 E : DCHECK(references != NULL);
1377 :
1378 : PEFileParser::AddReferenceCallback add_reference(
1379 E : base::Bind(&AddIntermediateReference, base::Unretained(references)));
1380 E : PEFileParser parser(image_file_, image_, add_reference);
1381 E : PEFileParser::PEHeader header;
1382 E : if (!parser.ParseImage(&header)) {
1383 i : LOG(ERROR) << "Unable to parse PE image.";
1384 i : return false;
1385 : }
1386 :
1387 E : return true;
1388 E : }
1389 :
1390 E : bool NewDecomposer::CreateBlocksFromCoffGroups() {
1391 E : pdb::PdbFile pdb_file;
1392 E : pdb::PdbReader pdb_reader;
1393 E : if (!pdb_reader.Read(pdb_path_, &pdb_file)) {
1394 i : LOG(ERROR) << "Failed to load PDB: " << pdb_path_.value();
1395 i : return false;
1396 : }
1397 :
1398 E : scoped_refptr<pdb::PdbStream> symbols = GetLinkerSymbolStream(pdb_file);
1399 :
1400 : // Process the symbols in the linker module symbol stream.
1401 E : VisitLinkerSymbolContext context;
1402 : pdb::VisitSymbolsCallback callback = base::Bind(
1403 : &NewDecomposer::VisitLinkerSymbol,
1404 : base::Unretained(this),
1405 E : base::Unretained(&context));
1406 E : if (!pdb::VisitSymbols(callback, symbols->length(), true, symbols.get()))
1407 i : return false;
1408 :
1409 : // Bail if we did not encounter a closing bracketing symbol where one was
1410 : // expected.
1411 E : if (context.current_group_index != -1) {
1412 i : LOG(ERROR) << "Unable to close bracketed COFF group \""
1413 : << context.current_group_prefix << "\".";
1414 i : return false;
1415 : }
1416 :
1417 E : return true;
1418 E : }
1419 :
1420 E : bool NewDecomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1421 E : ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1422 : SearchResult search_result = FindDiaTable(session,
1423 E : section_contribs.Receive());
1424 E : if (search_result != kSearchSucceeded) {
1425 i : if (search_result == kSearchFailed)
1426 i : LOG(ERROR) << "No section contribution table found.";
1427 i : return false;
1428 : }
1429 :
1430 E : size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1431 :
1432 E : LONG count = 0;
1433 E : if (section_contribs->get_Count(&count) != S_OK) {
1434 i : LOG(ERROR) << "Failed to get section contributions enumeration length.";
1435 i : return false;
1436 : }
1437 :
1438 E : for (LONG visited = 0; visited < count; ++visited) {
1439 E : ScopedComPtr<IDiaSectionContrib> section_contrib;
1440 E : ULONG fetched = 0;
1441 E : HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1442 : // The standard way to end an enumeration (according to the docs) is by
1443 : // returning S_FALSE and setting fetched to 0. We don't actually see this,
1444 : // but it wouldn't be an error if we did.
1445 E : if (hr == S_FALSE && fetched == 0)
1446 i : break;
1447 E : if (hr != S_OK) {
1448 i : LOG(ERROR) << "Failed to get DIA section contribution: "
1449 : << com::LogHr(hr) << ".";
1450 i : return false;
1451 : }
1452 : // We actually end up seeing S_OK and fetched == 0 when the enumeration
1453 : // terminates, which goes against the publishes documentations.
1454 E : if (fetched == 0)
1455 i : break;
1456 :
1457 E : DWORD rva = 0;
1458 E : DWORD length = 0;
1459 E : DWORD section_id = 0;
1460 E : BOOL code = FALSE;
1461 E : ScopedComPtr<IDiaSymbol> compiland;
1462 E : ScopedBstr bstr_name;
1463 : if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1464 : (hr = section_contrib->get_length(&length)) != S_OK ||
1465 : (hr = section_contrib->get_addressSection(§ion_id)) != S_OK ||
1466 : (hr = section_contrib->get_code(&code)) != S_OK ||
1467 : (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1468 E : (hr = compiland->get_name(bstr_name.Receive())) != S_OK) {
1469 i : LOG(ERROR) << "Failed to get section contribution properties: "
1470 : << com::LogHr(hr) << ".";
1471 i : return false;
1472 : }
1473 :
1474 : // Determine if this function was built by a supported compiler.
1475 : bool is_built_by_supported_compiler =
1476 E : IsBuiltBySupportedCompiler(compiland.get());
1477 :
1478 : // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1479 E : DCHECK_LT(0u, section_id);
1480 E : --section_id;
1481 :
1482 : // We don't parse the resource section, as it is parsed by the PEFileParser.
1483 E : if (section_id == rsrc_id)
1484 E : continue;
1485 :
1486 E : std::string name;
1487 E : if (!WideToUTF8(bstr_name, bstr_name.Length(), &name)) {
1488 i : LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1489 i : return false;
1490 : }
1491 :
1492 : // TODO(chrisha): We see special section contributions with the name
1493 : // "* CIL *". These are concatenations of data symbols and can very
1494 : // likely be chunked using symbols directly. A cursory visual inspection
1495 : // of symbol names hints that these might be related to WPO.
1496 :
1497 : // Create the block.
1498 : BlockType block_type =
1499 E : code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1500 : Block* block = CreateBlockOrFindCoveringPeBlock(
1501 E : block_type, RelativeAddress(rva), length, name);
1502 E : if (block == NULL) {
1503 i : LOG(ERROR) << "Unable to create block for compiland \"" << name << "\".";
1504 i : return false;
1505 : }
1506 :
1507 : // Set the block attributes.
1508 E : block->set_attribute(BlockGraph::SECTION_CONTRIB);
1509 E : if (!is_built_by_supported_compiler)
1510 E : block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1511 E : }
1512 :
1513 E : return true;
1514 E : }
1515 :
1516 E : bool NewDecomposer::CreateGapBlocks() {
1517 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1518 :
1519 : // Iterate through all the image sections.
1520 E : for (size_t i = 0; i < num_sections; ++i) {
1521 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1522 E : DCHECK(header != NULL);
1523 :
1524 E : BlockType type = BlockGraph::CODE_BLOCK;
1525 E : const char* section_type = NULL;
1526 E : switch (GetSectionType(header)) {
1527 : case kSectionCode:
1528 E : type = BlockGraph::CODE_BLOCK;
1529 E : section_type = "code";
1530 E : break;
1531 :
1532 : case kSectionData:
1533 E : type = BlockGraph::DATA_BLOCK;
1534 E : section_type = "data";
1535 E : break;
1536 :
1537 : default:
1538 i : continue;
1539 : }
1540 :
1541 E : if (!CreateSectionGapBlocks(header, type)) {
1542 i : LOG(ERROR) << "Unable to create gap blocks for " << section_type
1543 : << " section \"" << header->Name << "\".";
1544 i : return false;
1545 : }
1546 E : }
1547 :
1548 E : return true;
1549 E : }
1550 :
1551 : bool NewDecomposer::FinalizeIntermediateReferences(
1552 E : const IntermediateReferences& references) {
1553 E : for (size_t i = 0; i < references.size(); ++i) {
1554 : // This logs verbosely for us.
1555 : if (!CreateReference(references[i].src_addr,
1556 : references[i].size,
1557 : references[i].type,
1558 : references[i].dst_addr,
1559 : references[i].dst_addr,
1560 E : image_)) {
1561 i : return false;
1562 : }
1563 E : }
1564 E : return true;
1565 E : }
1566 :
1567 E : bool NewDecomposer::DisassembleCodeBlocksAndLabelData() {
1568 E : DCHECK(image_ != NULL);
1569 :
1570 : const BlockGraph::Block* dos_header_block =
1571 E : image_->GetBlockByAddress(RelativeAddress(0));
1572 E : DCHECK(dos_header_block != NULL);
1573 :
1574 : const BlockGraph::Block* nt_headers_block =
1575 E : GetNtHeadersBlockFromDosHeaderBlock(dos_header_block);
1576 E : if (nt_headers_block == NULL) {
1577 i : LOG(ERROR) << "Unable to get NT headers block for image.";
1578 i : return false;
1579 : }
1580 :
1581 : // GetNtHeadersBlockFromDosHeaderBlock sanity checks things so we can cast
1582 : // with impunity.
1583 : const IMAGE_NT_HEADERS* nt_headers =
1584 E : reinterpret_cast<const IMAGE_NT_HEADERS*>(nt_headers_block->data());
1585 E : core::AbsoluteAddress image_base(nt_headers->OptionalHeader.ImageBase);
1586 :
1587 : // Walk through the blocks and disassemble each one of them.
1588 E : BlockGraph::AddressSpace::RangeMapConstIter it = image_->begin();
1589 E : for (; it != image_->end(); ++it) {
1590 E : BlockGraph::Block* block = it->second;
1591 :
1592 E : if (block->type() != BlockGraph::CODE_BLOCK)
1593 E : continue;
1594 :
1595 E : core::AbsoluteAddress abs_addr(image_base + it->first.start().value());
1596 : if (!DisassembleCodeBlockAndLabelData(
1597 E : parse_debug_info_, image_base, abs_addr, image_, block)) {
1598 i : return false;
1599 : }
1600 E : }
1601 :
1602 E : return true;
1603 E : }
1604 :
1605 E : bool NewDecomposer::CreateReferencesFromFixups(IDiaSession* session) {
1606 E : DCHECK(session != NULL);
1607 :
1608 E : PEFile::RelocSet reloc_set;
1609 E : if (!image_file_.DecodeRelocs(&reloc_set))
1610 i : return false;
1611 :
1612 E : OMAPs omap_from;
1613 E : PdbFixups fixups;
1614 E : if (!LoadDebugStreams(session, &fixups, &omap_from))
1615 i : return false;
1616 :
1617 : // While creating references from the fixups this removes the
1618 : // corresponding reference data from the relocs. We use this as a kind of
1619 : // double-entry bookkeeping to ensure all is well and right in the world.
1620 : if (!CreateReferencesFromFixupsImpl(image_file_, fixups, omap_from,
1621 E : &reloc_set, image_)) {
1622 i : return false;
1623 : }
1624 :
1625 E : if (!reloc_set.empty()) {
1626 i : LOG(ERROR) << "Found reloc entries without matching FIXUP entries.";
1627 i : return false;
1628 : }
1629 :
1630 E : return true;
1631 E : }
1632 :
1633 E : bool NewDecomposer::ProcessSymbols(IDiaSymbol* root) {
1634 E : DCHECK(root != NULL);
1635 :
1636 : DiaBrowser::MatchCallback on_push_function_or_thunk_symbol(
1637 : base::Bind(&NewDecomposer::OnPushFunctionOrThunkSymbol,
1638 E : base::Unretained(this)));
1639 : DiaBrowser::MatchCallback on_pop_function_or_thunk_symbol(
1640 : base::Bind(&NewDecomposer::OnPopFunctionOrThunkSymbol,
1641 E : base::Unretained(this)));
1642 : DiaBrowser::MatchCallback on_function_child_symbol(
1643 : base::Bind(&NewDecomposer::OnFunctionChildSymbol,
1644 E : base::Unretained(this)));
1645 : DiaBrowser::MatchCallback on_data_symbol(
1646 E : base::Bind(&NewDecomposer::OnDataSymbol, base::Unretained(this)));
1647 : DiaBrowser::MatchCallback on_public_symbol(
1648 E : base::Bind(&NewDecomposer::OnPublicSymbol, base::Unretained(this)));
1649 : DiaBrowser::MatchCallback on_label_symbol(
1650 E : base::Bind(&NewDecomposer::OnLabelSymbol, base::Unretained(this)));
1651 :
1652 E : DiaBrowser dia_browser;
1653 :
1654 : // Find thunks.
1655 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagThunk),
1656 : on_push_function_or_thunk_symbol,
1657 E : on_pop_function_or_thunk_symbol);
1658 :
1659 : // Find functions and all data, labels, callsites, debug start/end and block
1660 : // symbols below them. This is done in one single pattern so that the
1661 : // function pushes/pops happen in the right order.
1662 : dia_browser.AddPattern(
1663 : Seq(Opt(SymTagCompiland),
1664 : Callback(Or(SymTagFunction, SymTagThunk),
1665 : on_push_function_or_thunk_symbol,
1666 : on_pop_function_or_thunk_symbol),
1667 : Star(SymTagBlock),
1668 : Or(SymTagData,
1669 : SymTagLabel,
1670 : SymTagBlock,
1671 : SymTagFuncDebugStart,
1672 : SymTagFuncDebugEnd,
1673 : SymTagCallSite)),
1674 E : on_function_child_symbol);
1675 :
1676 : // Global data and code label symbols.
1677 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagLabel),
1678 E : on_label_symbol);
1679 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1680 E : on_data_symbol);
1681 :
1682 : // Public symbols. These provide decorated names without any type info, but
1683 : // are useful for debugging.
1684 E : dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
1685 :
1686 E : return dia_browser.Browse(root);
1687 E : }
1688 :
1689 : bool NewDecomposer::VisitLinkerSymbol(VisitLinkerSymbolContext* context,
1690 : uint16 symbol_length,
1691 : uint16 symbol_type,
1692 E : pdb::PdbStream* stream) {
1693 E : DCHECK(context != NULL);
1694 E : DCHECK(stream != NULL);
1695 :
1696 E : if (symbol_type != cci::S_COFFGROUP)
1697 E : return true;
1698 :
1699 E : std::vector<uint8> buffer;
1700 : const cci::CoffGroupSym* coffgroup =
1701 E : ParseSymbol<cci::CoffGroupSym>(symbol_length, stream, &buffer);
1702 E : if (coffgroup == NULL)
1703 i : return false;
1704 :
1705 : // The PDB numbers sections starting at index 1 but we use index 0.
1706 : RelativeAddress rva(image_layout_->sections[coffgroup->seg - 1].addr +
1707 E : coffgroup->off);
1708 :
1709 : // We are looking for an opening symbol.
1710 E : if (context->current_group_index == -1) {
1711 E : for (size_t i = 0; i < context->bracketing_groups.size(); ++i) {
1712 E : std::string prefix;
1713 E : if (context->bracketing_groups[i].FullMatch(coffgroup->name, &prefix)) {
1714 E : context->current_group_index = i;
1715 E : context->current_group_prefix = prefix;
1716 E : context->current_group_start = rva;
1717 E : return true;
1718 : }
1719 E : }
1720 :
1721 : // No opening symbol was encountered. We can safely ignore this
1722 : // COFF group symbol.
1723 E : return true;
1724 : }
1725 :
1726 : // If we get here we've found an opening symbol and we're looking for the
1727 : // matching closing symbol.
1728 E : std::string prefix;
1729 : if (!context->bracketing_groups[context->current_group_index].FullMatch(
1730 E : coffgroup->name, &prefix)) {
1731 E : return true;
1732 : }
1733 :
1734 E : if (prefix != context->current_group_prefix) {
1735 : // We see another symbol open/close while already in an opened symbol.
1736 : // This indicates nested bracketing information, which we've never seen
1737 : // before.
1738 i : LOG(ERROR) << "Encountered nested bracket symbol \"" << prefix
1739 : << "\" while in \"" << context->current_group_prefix << "\".";
1740 i : return false;
1741 : }
1742 :
1743 E : RelativeAddress end = rva + coffgroup->cb;
1744 E : DCHECK_LT(context->current_group_start, end);
1745 :
1746 : // Create a block for this bracketed COFF group.
1747 : BlockGraph::Block* block = CreateBlock(
1748 : BlockGraph::DATA_BLOCK,
1749 : context->current_group_start,
1750 : end - context->current_group_start,
1751 E : base::StringPrintf("Bracketed COFF group: %s", prefix.c_str()));
1752 E : if (block == NULL) {
1753 i : LOG(ERROR) << "Failed to create bracketed COFF group \""
1754 : << prefix << "\".";
1755 i : return false;
1756 : }
1757 E : block->set_attribute(BlockGraph::COFF_GROUP);
1758 :
1759 : // Indicate that this block is closed and we're looking for another opening
1760 : // bracket symbol.
1761 E : context->current_group_index = -1;
1762 E : context->current_group_prefix.clear();
1763 E : context->current_group_start = RelativeAddress(0);
1764 :
1765 E : return true;
1766 E : }
1767 :
1768 : DiaBrowser::BrowserDirective NewDecomposer::OnPushFunctionOrThunkSymbol(
1769 : const DiaBrowser& dia_browser,
1770 : const DiaBrowser::SymTagVector& sym_tags,
1771 E : const DiaBrowser::SymbolPtrVector& symbols) {
1772 E : DCHECK(!symbols.empty());
1773 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1774 E : DiaBrowser::SymbolPtr symbol = symbols.back();
1775 :
1776 E : DCHECK(current_block_ == NULL);
1777 E : DCHECK_EQ(current_address_, RelativeAddress(0));
1778 E : DCHECK_EQ(0u, current_scope_count_);
1779 :
1780 E : HRESULT hr = E_FAIL;
1781 E : DWORD location_type = LocIsNull;
1782 E : DWORD rva = 0;
1783 E : ULONGLONG length = 0;
1784 E : ScopedBstr name_bstr;
1785 : if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1786 : FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1787 : FAILED(hr = symbol->get_length(&length)) ||
1788 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1789 i : LOG(ERROR) << "Failed to get function/thunk properties: " << com::LogHr(hr)
1790 : << ".";
1791 i : return DiaBrowser::kBrowserAbort;
1792 : }
1793 :
1794 : // We only care about functions with static storage. We can stop looking at
1795 : // things below this node, as we won't be able to resolve them either.
1796 E : if (location_type != LocIsStatic)
1797 i : return DiaBrowser::kBrowserTerminatePath;
1798 :
1799 E : RelativeAddress addr(rva);
1800 E : Block* block = image_->GetBlockByAddress(addr);
1801 E : CHECK(block != NULL);
1802 E : RelativeAddress block_addr;
1803 E : CHECK(image_->GetAddressOf(block, &block_addr));
1804 E : DCHECK(InRange(addr, block_addr, block->size()));
1805 :
1806 E : std::string name;
1807 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1808 i : LOG(ERROR) << "Failed to convert function/thunk name to UTF8.";
1809 i : return DiaBrowser::kBrowserAbort;
1810 : }
1811 :
1812 : // We know the function starts in this block but we need to make sure its
1813 : // end does not extend past the end of the block.
1814 E : if (addr + length > block_addr + block->size()) {
1815 i : LOG(ERROR) << "Got function/thunk \"" << name << "\" that is not contained "
1816 : << "by section contribution \"" << block->name() << "\".";
1817 i : return DiaBrowser::kBrowserAbort;
1818 : }
1819 :
1820 E : Offset offset = addr - block_addr;
1821 E : if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
1822 i : return DiaBrowser::kBrowserAbort;
1823 :
1824 : // Keep track of the generated block. We will use this when parsing symbols
1825 : // that belong to this function. This prevents us from having to do repeated
1826 : // lookups and also allows us to associate labels outside of the block to the
1827 : // correct block.
1828 E : current_block_ = block;
1829 E : current_address_ = block_addr;
1830 :
1831 : // Certain properties are not defined on all blocks, so the following calls
1832 : // may return S_FALSE.
1833 E : BOOL no_return = FALSE;
1834 E : if (symbol->get_noReturn(&no_return) != S_OK)
1835 E : no_return = FALSE;
1836 :
1837 E : BOOL has_inl_asm = FALSE;
1838 E : if (symbol->get_hasInlAsm(&has_inl_asm) != S_OK)
1839 E : has_inl_asm = FALSE;
1840 :
1841 E : BOOL has_eh = FALSE;
1842 E : if (symbol->get_hasEH(&has_eh) != S_OK)
1843 E : has_eh = FALSE;
1844 :
1845 E : BOOL has_seh = FALSE;
1846 E : if (symbol->get_hasSEH(&has_seh) != S_OK)
1847 E : has_seh = FALSE;
1848 :
1849 : // Set the block attributes.
1850 E : if (no_return == TRUE)
1851 E : block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1852 E : if (has_inl_asm == TRUE)
1853 E : block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1854 E : if (has_eh || has_seh)
1855 E : block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1856 E : if (IsSymTag(symbol, SymTagThunk))
1857 E : block->set_attribute(BlockGraph::THUNK);
1858 :
1859 E : return DiaBrowser::kBrowserContinue;
1860 E : }
1861 :
1862 : DiaBrowser::BrowserDirective NewDecomposer::OnPopFunctionOrThunkSymbol(
1863 : const DiaBrowser& dia_browser,
1864 : const DiaBrowser::SymTagVector& sym_tags,
1865 E : const DiaBrowser::SymbolPtrVector& symbols) {
1866 : // Simply clean up the current function block and address.
1867 E : current_block_ = NULL;
1868 E : current_address_ = RelativeAddress(0);
1869 E : current_scope_count_ = 0;
1870 E : return DiaBrowser::kBrowserContinue;
1871 E : }
1872 :
1873 : DiaBrowser::BrowserDirective NewDecomposer::OnFunctionChildSymbol(
1874 : const DiaBrowser& dia_browser,
1875 : const DiaBrowser::SymTagVector& sym_tags,
1876 E : const DiaBrowser::SymbolPtrVector& symbols) {
1877 E : DCHECK(!symbols.empty());
1878 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1879 :
1880 : // This can only be called from the context of a function, so we expect the
1881 : // parent function block to be set and remembered.
1882 E : DCHECK(current_block_ != NULL);
1883 :
1884 : // The set of sym tags here should match the pattern used in the DiaBrowser
1885 : // instance set up in ProcessSymbols.
1886 E : switch (sym_tags.back()) {
1887 : case SymTagData:
1888 E : return OnDataSymbol(dia_browser, sym_tags, symbols);
1889 :
1890 : case SymTagLabel:
1891 E : return OnLabelSymbol(dia_browser, sym_tags, symbols);
1892 :
1893 : case SymTagBlock:
1894 : case SymTagFuncDebugStart:
1895 : case SymTagFuncDebugEnd:
1896 E : return OnScopeSymbol(sym_tags.back(), symbols.back());
1897 :
1898 : case SymTagCallSite:
1899 E : return OnCallSiteSymbol(symbols.back());
1900 :
1901 : default:
1902 : break;
1903 : }
1904 :
1905 i : LOG(ERROR) << "Unhandled function child symbol: " << sym_tags.back() << ".";
1906 i : return DiaBrowser::kBrowserAbort;
1907 E : }
1908 :
1909 : DiaBrowser::BrowserDirective NewDecomposer::OnDataSymbol(
1910 : const DiaBrowser& dia_browser,
1911 : const DiaBrowser::SymTagVector& sym_tags,
1912 E : const DiaBrowser::SymbolPtrVector& symbols) {
1913 E : DCHECK(!symbols.empty());
1914 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1915 E : DiaBrowser::SymbolPtr symbol = symbols.back();
1916 :
1917 E : HRESULT hr = E_FAIL;
1918 E : DWORD location_type = LocIsNull;
1919 E : DWORD rva = 0;
1920 E : ScopedBstr name_bstr;
1921 : if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1922 : FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1923 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1924 i : LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1925 i : return DiaBrowser::kBrowserAbort;
1926 : }
1927 :
1928 : // Symbols with an address of zero are essentially invalid. They appear to
1929 : // have been optimized away by the compiler, but they are still reported.
1930 E : if (rva == 0)
1931 E : return DiaBrowser::kBrowserTerminatePath;
1932 :
1933 : // We only care about functions with static storage. We can stop looking at
1934 : // things below this node, as we won't be able to resolve them either.
1935 E : if (location_type != LocIsStatic)
1936 i : return DiaBrowser::kBrowserTerminatePath;
1937 :
1938 : // Get the size of this datum from its type info.
1939 E : size_t length = 0;
1940 E : if (!GetDataSymbolSize(symbol, &length))
1941 i : return DiaBrowser::kBrowserAbort;
1942 :
1943 : // Reuse the parent function block if we can. This acts as small lookup
1944 : // cache.
1945 E : RelativeAddress addr(rva);
1946 E : Block* block = current_block_;
1947 E : RelativeAddress block_addr(current_address_);
1948 E : if (block == NULL || !InRange(addr, block_addr, block->size())) {
1949 E : block = image_->GetBlockByAddress(addr);
1950 E : CHECK(block != NULL);
1951 E : CHECK(image_->GetAddressOf(block, &block_addr));
1952 E : DCHECK(InRange(addr, block_addr, block->size()));
1953 : }
1954 :
1955 E : std::string name;
1956 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1957 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1958 i : return DiaBrowser::kBrowserAbort;
1959 : }
1960 :
1961 : // Zero-length data symbols mark case/jump tables, or are forward declares.
1962 E : BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1963 E : Offset offset = addr - block_addr;
1964 E : if (length == 0) {
1965 : // Jump and case tables come in as data symbols with no name. Jump tables
1966 : // are always an array of pointers, thus they coincide exactly with a
1967 : // reference. Case tables are simple arrays of integer values (themselves
1968 : // indices into a jump table), thus do not coincide with a reference.
1969 E : if (name.empty() && block->type() == BlockGraph::CODE_BLOCK) {
1970 E : if (block->references().find(offset) != block->references().end()) {
1971 E : name = kJumpTable;
1972 E : attr |= BlockGraph::JUMP_TABLE_LABEL;
1973 E : } else {
1974 E : name = kCaseTable;
1975 E : attr |= BlockGraph::CASE_TABLE_LABEL;
1976 : }
1977 :
1978 : // We expect jump and case tables to already have been discovered by
1979 : // the disassembly operation. If this is not the case then our decoding
1980 : // step is in error and its results can't be trusted.
1981 E : if (!JumpAndCaseTableAlreadyLabelled(block, offset, attr))
1982 i : return DiaBrowser::kBrowserAbort;
1983 E : } else {
1984 : // Zero-length data symbols act as 'forward declares' in some sense. They
1985 : // are always followed by a non-zero length data symbol with the same name
1986 : // and location.
1987 E : return DiaBrowser::kBrowserTerminatePath;
1988 : }
1989 : }
1990 :
1991 : // Verify that the data symbol does not exceed the size of the block.
1992 E : if (addr + length > block_addr + block->size()) {
1993 : // The data symbol can exceed the size of the block in the case of data
1994 : // imports. For some reason the toolchain emits a global data symbol with
1995 : // type information equal to the type of the data *pointed* to by the import
1996 : // entry rather than the type of the entry itself. Thus, if the data type
1997 : // is bigger than the entire IAT this symbol will exceed it. To complicate
1998 : // matters even more, a poorly written module can import its own export in
1999 : // which case a linker generated pseudo-import-entry block will be
2000 : // generated. This won't be part of the IAT, so we can't even filter based
2001 : // on that. Instead, we simply ignore global data symbols that exceed the
2002 : // block size.
2003 E : base::StringPiece spname(name);
2004 E : if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
2005 E : VLOG(1) << "Encountered an imported data symbol \"" << name << "\" that "
2006 : << "extends past its parent block \"" << block->name() << "\".";
2007 E : } else {
2008 i : LOG(ERROR) << "Received data symbol \"" << name << "\" that extends past "
2009 : << "its parent block \"" << block->name() << "\".";
2010 i : return DiaBrowser::kBrowserAbort;
2011 : }
2012 : }
2013 :
2014 E : if (!AddLabelToBlock(offset, name, attr, block))
2015 i : return DiaBrowser::kBrowserAbort;
2016 :
2017 E : return DiaBrowser::kBrowserContinue;
2018 E : }
2019 :
2020 : DiaBrowser::BrowserDirective NewDecomposer::OnPublicSymbol(
2021 : const DiaBrowser& dia_browser,
2022 : const DiaBrowser::SymTagVector& sym_tags,
2023 E : const DiaBrowser::SymbolPtrVector& symbols) {
2024 E : DCHECK(!symbols.empty());
2025 E : DCHECK_EQ(sym_tags.size(), symbols.size());
2026 E : DCHECK(current_block_ == NULL);
2027 E : DiaBrowser::SymbolPtr symbol = symbols.back();
2028 :
2029 E : HRESULT hr = E_FAIL;
2030 E : DWORD rva = 0;
2031 E : ScopedBstr name_bstr;
2032 : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
2033 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
2034 i : LOG(ERROR) << "Failed to get public symbol properties: " << com::LogHr(hr)
2035 : << ".";
2036 i : return DiaBrowser::kBrowserAbort;
2037 : }
2038 :
2039 E : RelativeAddress addr(rva);
2040 E : Block* block = image_->GetBlockByAddress(addr);
2041 E : CHECK(block != NULL);
2042 E : RelativeAddress block_addr;
2043 E : CHECK(image_->GetAddressOf(block, &block_addr));
2044 E : DCHECK(InRange(addr, block_addr, block->size()));
2045 :
2046 E : std::string name;
2047 E : WideToUTF8(name_bstr, name_bstr.Length(), &name);
2048 :
2049 : // Public symbol names are mangled. Remove leading '_' as per
2050 : // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
2051 E : if (name[0] == '_')
2052 E : name = name.substr(1);
2053 :
2054 E : Offset offset = addr - block_addr;
2055 E : if (!AddLabelToBlock(offset, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
2056 i : return DiaBrowser::kBrowserAbort;
2057 :
2058 E : return DiaBrowser::kBrowserContinue;
2059 E : }
2060 :
2061 : DiaBrowser::BrowserDirective NewDecomposer::OnLabelSymbol(
2062 : const DiaBrowser& dia_browser,
2063 : const DiaBrowser::SymTagVector& sym_tags,
2064 E : const DiaBrowser::SymbolPtrVector& symbols) {
2065 E : DCHECK(!symbols.empty());
2066 E : DCHECK_EQ(sym_tags.size(), symbols.size());
2067 E : DiaBrowser::SymbolPtr symbol = symbols.back();
2068 :
2069 E : HRESULT hr = E_FAIL;
2070 E : DWORD rva = 0;
2071 E : ScopedBstr name_bstr;
2072 : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
2073 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
2074 i : LOG(ERROR) << "Failed to get label symbol properties: " << com::LogHr(hr)
2075 : << ".";
2076 i : return DiaBrowser::kBrowserAbort;
2077 : }
2078 :
2079 : // If we have a current_block_ the label should lie within its scope.
2080 E : RelativeAddress addr(rva);
2081 E : Block* block = current_block_;
2082 E : RelativeAddress block_addr(current_address_);
2083 E : if (block != NULL) {
2084 E : if (!InRangeIncl(addr, current_address_, current_block_->size())) {
2085 i : LOG(ERROR) << "Label falls outside of current block \""
2086 : << current_block_->name() << "\".";
2087 i : return DiaBrowser::kBrowserAbort;
2088 : }
2089 E : } else {
2090 : // If there is no current block this is a compiland scope label.
2091 E : block = image_->GetBlockByAddress(addr);
2092 E : CHECK(block != NULL);
2093 E : CHECK(image_->GetAddressOf(block, &block_addr));
2094 E : DCHECK(InRange(addr, block_addr, block->size()));
2095 :
2096 : // TODO(chrisha): This label is in compiland scope, so we should be
2097 : // finding the block whose section contribution shares the same
2098 : // compiland.
2099 : }
2100 :
2101 E : std::string name;
2102 E : WideToUTF8(name_bstr, name_bstr.Length(), &name);
2103 :
2104 E : Offset offset = addr - block_addr;
2105 E : if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
2106 i : return DiaBrowser::kBrowserAbort;
2107 :
2108 E : return DiaBrowser::kBrowserContinue;
2109 E : }
2110 :
2111 : DiaBrowser::BrowserDirective NewDecomposer::OnScopeSymbol(
2112 E : enum SymTagEnum type, DiaBrowser::SymbolPtr symbol) {
2113 : // We should only get here via the successful exploration of a SymTagFunction,
2114 : // so current_block_ should be set.
2115 E : DCHECK(current_block_ != NULL);
2116 :
2117 E : HRESULT hr = E_FAIL;
2118 E : DWORD rva = 0;
2119 E : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
2120 i : LOG(ERROR) << "Failed to get scope symbol properties: " << com::LogHr(hr)
2121 : << ".";
2122 i : return DiaBrowser::kBrowserAbort;
2123 : }
2124 :
2125 : // The label may potentially lay at the first byte past the function.
2126 E : RelativeAddress addr(rva);
2127 E : DCHECK_LE(current_address_, addr);
2128 E : DCHECK_LE(addr, current_address_ + current_block_->size());
2129 :
2130 : // Get the attributes for this label.
2131 E : BlockGraph::LabelAttributes attr = 0;
2132 E : std::string name;
2133 E : CHECK(ScopeSymTagToLabelProperties(type, current_scope_count_, &attr, &name));
2134 :
2135 : // Add the label.
2136 E : Offset offset = addr - current_address_;
2137 E : if (!AddLabelToBlock(offset, name, attr, current_block_))
2138 i : return DiaBrowser::kBrowserAbort;
2139 :
2140 : // If this is a scope we extract the length and explicitly add a corresponding
2141 : // end label.
2142 E : if (type == SymTagBlock) {
2143 E : ULONGLONG length = 0;
2144 E : if (symbol->get_length(&length) != S_OK) {
2145 i : LOG(ERROR) << "Failed to extract code scope length for block \""
2146 : << current_block_->name() << "\".";
2147 i : return DiaBrowser::kBrowserAbort;
2148 : }
2149 E : DCHECK_LE(static_cast<size_t>(offset + length), current_block_->size());
2150 E : name = base::StringPrintf("<scope-end-%d>", current_scope_count_);
2151 E : ++current_scope_count_;
2152 : if (!AddLabelToBlock(offset + length, name,
2153 E : BlockGraph::SCOPE_END_LABEL, current_block_)) {
2154 i : return DiaBrowser::kBrowserAbort;
2155 : }
2156 : }
2157 :
2158 E : return DiaBrowser::kBrowserContinue;
2159 E : }
2160 :
2161 : DiaBrowser::BrowserDirective NewDecomposer::OnCallSiteSymbol(
2162 E : DiaBrowser::SymbolPtr symbol) {
2163 : // We should only get here via the successful exploration of a SymTagFunction,
2164 : // so current_block_ should be set.
2165 E : DCHECK(current_block_ != NULL);
2166 :
2167 E : HRESULT hr = E_FAIL;
2168 E : DWORD rva = 0;
2169 E : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
2170 i : LOG(ERROR) << "Failed to get call site symbol properties: "
2171 : << com::LogHr(hr) << ".";
2172 i : return DiaBrowser::kBrowserAbort;
2173 : }
2174 :
2175 E : RelativeAddress addr(rva);
2176 E : if (!InRange(addr, current_address_, current_block_->size())) {
2177 i : LOG(ERROR) << "Call site falls outside of current block \""
2178 : << current_block_->name() << "\".";
2179 i : return DiaBrowser::kBrowserAbort;
2180 : }
2181 :
2182 E : Offset offset = addr - current_address_;
2183 : if (!AddLabelToBlock(offset, "<call-site>", BlockGraph::CALL_SITE_LABEL,
2184 E : current_block_)) {
2185 i : return DiaBrowser::kBrowserAbort;
2186 : }
2187 :
2188 E : return DiaBrowser::kBrowserContinue;
2189 E : }
2190 :
2191 : Block* NewDecomposer::CreateBlock(BlockType type,
2192 : RelativeAddress address,
2193 : BlockGraph::Size size,
2194 E : const base::StringPiece& name) {
2195 E : Block* block = image_->AddBlock(type, address, size, name);
2196 E : if (block == NULL) {
2197 i : LOG(ERROR) << "Unable to add block \"" << name.as_string() << "\" at "
2198 : << address << " with size " << size << ".";
2199 i : return NULL;
2200 : }
2201 :
2202 : // Mark the source range from whence this block originates. This is assuming
2203 : // an untransformed image. To handle transformed images we'd have to use the
2204 : // OMAP information to do this properly.
2205 : bool pushed = block->source_ranges().Push(
2206 : Block::DataRange(0, size),
2207 E : Block::SourceRange(address, size));
2208 E : DCHECK(pushed);
2209 :
2210 E : BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
2211 E : if (section == BlockGraph::kInvalidSectionId) {
2212 i : LOG(ERROR) << "Block \"" << name.as_string() << "\" at " << address
2213 : << " with size " << size << " lies outside of all sections.";
2214 i : return NULL;
2215 : }
2216 E : block->set_section(section);
2217 :
2218 E : const uint8* data = image_file_.GetImageData(address, size);
2219 E : if (data != NULL)
2220 E : block->SetData(data, size);
2221 :
2222 E : return block;
2223 E : }
2224 :
2225 : Block* NewDecomposer::CreateBlockOrFindCoveringPeBlock(
2226 : BlockType type,
2227 : RelativeAddress addr,
2228 : BlockGraph::Size size,
2229 E : const base::StringPiece& name) {
2230 E : Block* block = image_->GetBlockByAddress(addr);
2231 E : if (block != NULL) {
2232 E : RelativeAddress block_addr;
2233 E : CHECK(image_->GetAddressOf(block, &block_addr));
2234 :
2235 E : RelativeRange existing_block(block_addr, block->size());
2236 :
2237 : // If this is not a PE parsed or COFF group block that covers us entirely,
2238 : // then this is an error.
2239 : static const BlockGraph::BlockAttributes kCoveringAttributes =
2240 : BlockGraph::PE_PARSED | BlockGraph::COFF_GROUP;
2241 : if ((block->attributes() & kCoveringAttributes) == 0 ||
2242 E : !existing_block.Contains(addr, size)) {
2243 i : LOG(ERROR) << "Trying to create block \"" << name.as_string() << "\" at "
2244 : << addr.value() << " with size " << size << " that conflicts "
2245 : << "with existing block \"" << block->name() << " at "
2246 : << block_addr << " with size " << block->size() << ".";
2247 i : return NULL;
2248 : }
2249 :
2250 E : return block;
2251 : }
2252 E : DCHECK(block == NULL);
2253 :
2254 E : return CreateBlock(type, addr, size, name);
2255 E : }
2256 :
2257 : bool NewDecomposer::CreateGapBlock(BlockType block_type,
2258 : RelativeAddress address,
2259 E : BlockGraph::Size size) {
2260 : Block* block = CreateBlock(block_type, address, size,
2261 E : StringPrintf("Gap Block 0x%08X", address.value()).c_str());
2262 E : if (block == NULL) {
2263 i : LOG(ERROR) << "Unable to create gap block.";
2264 i : return false;
2265 : }
2266 E : block->set_attribute(BlockGraph::GAP_BLOCK);
2267 :
2268 E : return true;
2269 E : }
2270 :
2271 : bool NewDecomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
2272 E : BlockType block_type) {
2273 E : RelativeAddress section_begin(header->VirtualAddress);
2274 E : RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
2275 : RelativeAddress image_end(
2276 E : image_file_.nt_headers()->OptionalHeader.SizeOfImage);
2277 :
2278 : // Search for the first and last blocks interesting from the start and end
2279 : // of the section to the end of the image.
2280 : BlockGraph::AddressSpace::RangeMap::const_iterator it(
2281 : image_->address_space_impl().FindFirstIntersection(
2282 : BlockGraph::AddressSpace::Range(section_begin,
2283 E : image_end - section_begin)));
2284 : BlockGraph::AddressSpace::RangeMap::const_iterator end(
2285 : image_->address_space_impl().FindFirstIntersection(
2286 : BlockGraph::AddressSpace::Range(section_end,
2287 E : image_end - section_end)));
2288 :
2289 : // The whole section is missing. Cover it with one gap block.
2290 E : if (it == end)
2291 : return CreateGapBlock(
2292 i : block_type, section_begin, section_end - section_begin);
2293 :
2294 : // Create the head gap block if need be.
2295 E : if (section_begin < it->first.start()) {
2296 : if (!CreateGapBlock(
2297 i : block_type, section_begin, it->first.start() - section_begin)) {
2298 i : return false;
2299 : }
2300 : }
2301 :
2302 : // Now iterate the blocks and fill in gaps.
2303 E : for (; it != end; ++it) {
2304 E : const Block* block = it->second;
2305 E : DCHECK(block != NULL);
2306 E : RelativeAddress block_end = it->first.start() + block->size();
2307 E : if (block_end >= section_end)
2308 E : break;
2309 :
2310 : // Walk to the next address in turn.
2311 E : BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
2312 E : ++next;
2313 E : if (next == end) {
2314 : // We're at the end of the list. Create the tail gap block.
2315 E : DCHECK_GT(section_end, block_end);
2316 E : if (!CreateGapBlock(block_type, block_end, section_end - block_end))
2317 i : return false;
2318 E : break;
2319 : }
2320 :
2321 : // Create the interstitial gap block.
2322 E : if (block_end < next->first.start())
2323 : if (!CreateGapBlock(
2324 E : block_type, block_end, next->first.start() - block_end)) {
2325 i : return false;
2326 : }
2327 E : }
2328 :
2329 E : return true;
2330 E : }
2331 :
2332 : } // namespace pe
|