1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/new_decomposer.h"
16 :
17 : #include "pcrecpp.h" // NOLINT
18 : #include "base/bind.h"
19 : #include "base/stringprintf.h"
20 : #include "base/utf_string_conversions.h"
21 : #include "base/strings/string_split.h"
22 : #include "base/win/scoped_bstr.h"
23 : #include "base/win/scoped_comptr.h"
24 : #include "syzygy/core/zstream.h"
25 : #include "syzygy/pdb/omap.h"
26 : #include "syzygy/pdb/pdb_byte_stream.h"
27 : #include "syzygy/pdb/pdb_constants.h"
28 : #include "syzygy/pdb/pdb_dbi_stream.h"
29 : #include "syzygy/pdb/pdb_file.h"
30 : #include "syzygy/pdb/pdb_reader.h"
31 : #include "syzygy/pdb/pdb_symbol_record.h"
32 : #include "syzygy/pdb/pdb_util.h"
33 : #include "syzygy/pe/dia_util.h"
34 : #include "syzygy/pe/find.h"
35 : #include "syzygy/pe/pe_file_parser.h"
36 : #include "syzygy/pe/pe_utils.h"
37 : #include "syzygy/pe/serialization.h"
38 : #include "third_party/cci/Files/CvInfo.h"
39 :
40 : namespace cci = Microsoft_Cci_Pdb;
41 :
42 : namespace {
43 :
44 : using block_graph::BlockGraph;
45 : using core::AbsoluteAddress;
46 : using core::FileOffsetAddress;
47 : using core::RelativeAddress;
48 :
49 : typedef BlockGraph::Block Block;
50 :
51 : // A small helper struct for dumping block information to log messages.
52 : // TODO(chrisha): Move this to block_graph and reuse it everywhere!
53 : struct BlockInfo {
54 : enum AddressType {
55 : kNoAddress,
56 : kAbsoluteAddress,
57 : kFileOffsetAddress,
58 : kRelativeAddress,
59 : };
60 :
61 : explicit BlockInfo(const Block* block)
62 : : block(block), type(kNoAddress) {
63 : DCHECK_NE(reinterpret_cast<Block*>(NULL), block);
64 : }
65 :
66 : BlockInfo(const Block* block,
67 : AbsoluteAddress address)
68 : : block(block), type(kAbsoluteAddress), abs_addr(address) {
69 : DCHECK_NE(reinterpret_cast<Block*>(NULL), block);
70 : }
71 : BlockInfo(const Block* block,
72 : FileOffsetAddress address)
73 : : block(block), type(kFileOffsetAddress), file_addr(address) {
74 : DCHECK_NE(reinterpret_cast<Block*>(NULL), block);
75 : }
76 i : BlockInfo(const Block* block,
77 : RelativeAddress address)
78 : : block(block), type(kRelativeAddress), rel_addr(address) {
79 i : DCHECK_NE(reinterpret_cast<Block*>(NULL), block);
80 i : }
81 :
82 : const Block* block;
83 : AddressType type;
84 :
85 : // Ideally these would be in a union but because they have non-trivial
86 : // constructors they are not allowed.
87 : AbsoluteAddress abs_addr;
88 : FileOffsetAddress file_addr;
89 : RelativeAddress rel_addr;
90 :
91 : private:
92 : DISALLOW_COPY_AND_ASSIGN(BlockInfo);
93 : };
94 :
95 : } // namespace
96 :
97 : // Pretty prints a BlockInfo to an ostream. This has to be outside of any
98 : // namespaces so that operator<< is found properly.
99 i : std::ostream& operator<<(std::ostream& os, const BlockInfo& bi) {
100 : os << "Block(id=" << bi.block->id() << ", name=\"" << bi.block->name()
101 i : << "\", size=" << bi.block->size();
102 i : if (bi.type != BlockInfo::kNoAddress) {
103 i : os << ", address=";
104 i : switch (bi.type) {
105 : case BlockInfo::kAbsoluteAddress: {
106 i : os << bi.abs_addr;
107 i : break;
108 : }
109 : case BlockInfo::kFileOffsetAddress: {
110 i : os << bi.file_addr;
111 i : break;
112 : }
113 : case BlockInfo::kRelativeAddress: {
114 i : os << bi.rel_addr;
115 : break;
116 : }
117 : default: break;
118 : }
119 : }
120 i : os << ")";
121 i : return os;
122 i : }
123 :
124 : namespace pe {
125 :
126 : // An intermediate reference representation used while parsing PE blocks.
127 : // This is necessary because at that point we haven't yet chunked the whole
128 : // image into blocks thus some references cannot be resolved.
129 : struct NewDecomposer::IntermediateReference {
130 : RelativeAddress src_addr;
131 : BlockGraph::ReferenceType type;
132 : BlockGraph::Size size;
133 : RelativeAddress dst_addr;
134 : };
135 :
136 : namespace {
137 :
138 : using base::win::ScopedBstr;
139 : using base::win::ScopedComPtr;
140 : using builder::Callback;
141 : using builder::Opt;
142 : using builder::Or;
143 : using builder::Seq;
144 : using builder::Star;
145 :
146 : typedef BlockGraph::BlockType BlockType;
147 : typedef BlockGraph::Offset Offset;
148 : typedef BlockGraph::Reference Reference;
149 : typedef BlockGraph::ReferenceType ReferenceType;
150 : typedef core::AddressRange<RelativeAddress, size_t> RelativeRange;
151 : typedef NewDecomposer::IntermediateReference IntermediateReference;
152 : typedef NewDecomposer::IntermediateReferences IntermediateReferences;
153 : typedef pcrecpp::RE RE;
154 : typedef std::vector<OMAP> OMAPs;
155 : typedef std::vector<pdb::PdbFixup> PdbFixups;
156 :
157 : const char kJumpTable[] = "<jump-table>";
158 : const char kCaseTable[] = "<case-table>";
159 :
160 : // The MS linker pads between code blocks with int3s.
161 : static const uint8 kInt3 = 0xCC;
162 : static const size_t kPointerSize = BlockGraph::Reference::kMaximumSize;
163 :
164 : // Some helper functions for testing ranges.
165 : template<typename T1, typename T2, typename T3>
166 E : bool InRange(T1 value, T2 lower_bound_incl, T3 length_excl) {
167 E : T1 upper_bound_excl = static_cast<T1>(lower_bound_incl) + length_excl;
168 : return static_cast<T1>(lower_bound_incl) <= value &&
169 E : value < static_cast<T2>(upper_bound_excl);
170 E : }
171 : template<typename T1, typename T2, typename T3>
172 E : bool InRangeIncl(T1 value, T2 lower_bound_incl, T3 length_incl) {
173 E : T1 upper_bound_incl = static_cast<T1>(lower_bound_incl) + length_incl;
174 : return static_cast<T1>(lower_bound_incl) <= value &&
175 E : value <= upper_bound_incl;
176 E : }
177 :
178 : bool InitializeDia(const PEFile& image_file,
179 : const base::FilePath& pdb_path,
180 : IDiaDataSource** dia_source,
181 : IDiaSession** dia_session,
182 E : IDiaSymbol** global) {
183 E : DCHECK_EQ(reinterpret_cast<IDiaDataSource*>(NULL), *dia_source);
184 E : DCHECK_EQ(reinterpret_cast<IDiaSession*>(NULL), *dia_session);
185 E : DCHECK_EQ(reinterpret_cast<IDiaSymbol*>(NULL), *global);
186 :
187 E : if (!CreateDiaSource(dia_source))
188 i : return false;
189 E : DCHECK_NE(reinterpret_cast<IDiaDataSource*>(NULL), *dia_source);
190 :
191 : // We create the session using the PDB file directly, as we've already
192 : // validated that it matches the module.
193 E : if (!CreateDiaSession(pdb_path, *dia_source, dia_session))
194 i : return false;
195 E : DCHECK_NE(reinterpret_cast<IDiaSession*>(NULL), *dia_session);
196 :
197 E : HRESULT hr = (*dia_session)->get_globalScope(global);
198 E : if (hr != S_OK) {
199 i : LOG(ERROR) << "Failed to get the DIA global scope: "
200 : << com::LogHr(hr) << ".";
201 i : return false;
202 : }
203 :
204 E : return true;
205 E : }
206 :
207 : // Given a compiland, returns its compiland details.
208 : bool GetCompilandDetailsForCompiland(IDiaSymbol* compiland,
209 E : IDiaSymbol** compiland_details) {
210 E : DCHECK_NE(reinterpret_cast<IDiaSymbol*>(NULL), compiland);
211 E : DCHECK_NE(reinterpret_cast<IDiaSymbol**>(NULL), compiland_details);
212 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
213 E : DCHECK_EQ(reinterpret_cast<IDiaSymbol*>(NULL), *compiland_details);
214 :
215 : // Get the enumeration of compiland details.
216 E : ScopedComPtr<IDiaEnumSymbols> enum_symbols;
217 : HRESULT hr = compiland->findChildren(SymTagCompilandDetails, NULL, 0,
218 E : enum_symbols.Receive());
219 E : DCHECK_EQ(S_OK, hr);
220 :
221 : // We expect there to be compiland details. For compilands built by
222 : // non-standard toolchains, there usually aren't any.
223 E : LONG count = 0;
224 E : hr = enum_symbols->get_Count(&count);
225 E : DCHECK_EQ(S_OK, hr);
226 E : if (count == 0) {
227 : // We don't log here because we see this quite often.
228 i : return false;
229 : }
230 :
231 : // We do sometimes encounter more than one compiland detail. In fact, for
232 : // import and export tables we get one compiland detail per table entry.
233 : // They are all marked as having been generated by the linker, so using the
234 : // first one is sufficient.
235 :
236 : // Get the compiland details.
237 E : ULONG fetched = 0;
238 E : hr = enum_symbols->Next(1, compiland_details, &fetched);
239 E : DCHECK_EQ(S_OK, hr);
240 E : DCHECK_EQ(1u, fetched);
241 :
242 E : return true;
243 E : }
244 :
245 : // Stores information regarding known compilers.
246 : struct KnownCompilerInfo {
247 : wchar_t* compiler_name;
248 : bool supported;
249 : };
250 :
251 : // A list of known compilers, and their status as being supported or not.
252 : KnownCompilerInfo kKnownCompilerInfos[] = {
253 : { L"Microsoft (R) Macro Assembler", false },
254 : { L"Microsoft (R) Optimizing Compiler", true },
255 : { L"Microsoft (R) LINK", false }
256 : };
257 :
258 : // Given a compiland, determines whether the compiler used is one of those that
259 : // we whitelist.
260 E : bool IsBuiltBySupportedCompiler(IDiaSymbol* compiland) {
261 E : DCHECK_NE(reinterpret_cast<IDiaSymbol*>(NULL), compiland);
262 E : DCHECK(IsSymTag(compiland, SymTagCompiland));
263 :
264 E : ScopedComPtr<IDiaSymbol> compiland_details;
265 : if (!GetCompilandDetailsForCompiland(compiland,
266 E : compiland_details.Receive())) {
267 : // If the compiland has no compiland details we assume the compiler is not
268 : // supported.
269 i : ScopedBstr compiland_name;
270 i : if (compiland->get_name(compiland_name.Receive()) == S_OK) {
271 i : VLOG(1) << "Compiland has no compiland details: "
272 : << com::ToString(compiland_name);
273 : }
274 i : return false;
275 : }
276 E : DCHECK_NE(reinterpret_cast<IDiaSymbol*>(NULL), compiland_details.get());
277 :
278 : // Get the compiler name.
279 E : ScopedBstr compiler_name;
280 E : HRESULT hr = compiland_details->get_compilerName(compiler_name.Receive());
281 E : DCHECK_EQ(S_OK, hr);
282 :
283 : // Check the compiler name against the list of known compilers.
284 E : for (size_t i = 0; i < arraysize(kKnownCompilerInfos); ++i) {
285 E : if (::wcscmp(kKnownCompilerInfos[i].compiler_name, compiler_name) == 0) {
286 E : return kKnownCompilerInfos[i].supported;
287 : }
288 E : }
289 :
290 : // Anything we don't explicitly know about is not supported.
291 E : VLOG(1) << "Encountered unknown compiler: " << compiler_name;
292 E : return false;
293 E : }
294 :
295 : // Adds an intermediate reference to the provided vector. The vector is
296 : // specified as the first parameter (in slight violation of our coding
297 : // standards) because this function is intended to be used by Bind.
298 : bool AddIntermediateReference(IntermediateReferences* references,
299 : RelativeAddress src_addr,
300 : ReferenceType type,
301 : BlockGraph::Size size,
302 E : RelativeAddress dst_addr) {
303 E : DCHECK_NE(reinterpret_cast<IntermediateReferences*>(NULL), references);
304 E : IntermediateReference ref = { src_addr, type, size, dst_addr };
305 E : references->push_back(ref);
306 E : return true;
307 E : }
308 :
309 : // Create a reference as specified. Ignores existing references if they are of
310 : // the exact same type.
311 : bool CreateReference(RelativeAddress src_addr,
312 : BlockGraph::Size ref_size,
313 : ReferenceType ref_type,
314 : RelativeAddress base_addr,
315 : RelativeAddress dst_addr,
316 E : BlockGraph::AddressSpace* image) {
317 E : DCHECK_NE(reinterpret_cast<BlockGraph::AddressSpace*>(NULL), image);
318 :
319 : // Get the source block and offset, and ensure that the reference fits
320 : // within it.
321 E : Block* src_block = image->GetBlockByAddress(src_addr);
322 E : if (src_block == NULL) {
323 i : LOG(ERROR) << "Unable to find block for reference originating at "
324 : << src_addr << ".";
325 i : return false;
326 : }
327 E : RelativeAddress src_block_addr;
328 E : CHECK(image->GetAddressOf(src_block, &src_block_addr));
329 E : Offset src_block_offset = src_addr - src_block_addr;
330 E : if (src_block_offset + ref_size > src_block->size()) {
331 i : LOG(ERROR) << "Reference originating at " << src_addr
332 : << " extends beyond block \"" << src_block->name() << "\".";
333 i : return false;
334 : }
335 :
336 : // Get the destination block and offset.
337 E : Block* dst_block = image->GetBlockByAddress(base_addr);
338 E : if (dst_block == NULL) {
339 i : LOG(ERROR) << "Unable to find block for reference pointing at "
340 : << base_addr << ".";
341 i : return false;
342 : }
343 E : RelativeAddress dst_block_addr;
344 E : CHECK(image->GetAddressOf(dst_block, &dst_block_addr));
345 E : Offset base = base_addr - dst_block_addr;
346 E : Offset offset = dst_addr - dst_block_addr;
347 :
348 E : Reference ref(ref_type, ref_size, dst_block, offset, base);
349 :
350 : // Check if a reference already exists at this offset.
351 : Block::ReferenceMap::const_iterator ref_it =
352 E : src_block->references().find(src_block_offset);
353 E : if (ref_it != src_block->references().end()) {
354 : // If an identical reference already exists then we're done.
355 E : if (ref == ref_it->second)
356 E : return true;
357 i : LOG(ERROR) << "Block \"" << src_block->name() << "\" has a conflicting "
358 : << "reference at offset " << src_block_offset << ".";
359 i : return false;
360 : }
361 :
362 E : CHECK(src_block->SetReference(src_block_offset, ref));
363 :
364 E : return true;
365 E : }
366 :
367 : // Loads FIXUP and OMAP_FROM debug streams.
368 : bool LoadDebugStreams(IDiaSession* dia_session,
369 : PdbFixups* pdb_fixups,
370 E : OMAPs* omap_from) {
371 E : DCHECK_NE(reinterpret_cast<IDiaSession*>(NULL), dia_session);
372 E : DCHECK_NE(reinterpret_cast<PdbFixups*>(NULL), pdb_fixups);
373 E : DCHECK_NE(reinterpret_cast<OMAPs*>(NULL), omap_from);
374 :
375 : // Load the fixups. These must exist.
376 : SearchResult search_result = FindAndLoadDiaDebugStreamByName(
377 E : kFixupDiaDebugStreamName, dia_session, pdb_fixups);
378 E : if (search_result != kSearchSucceeded) {
379 i : if (search_result == kSearchFailed) {
380 i : LOG(ERROR) << "PDB file does not contain a FIXUP stream. Module must be "
381 : "linked with '/PROFILE' or '/DEBUGINFO:FIXUP' flag.";
382 : }
383 i : return false;
384 : }
385 :
386 : // Load the omap_from table. It is not necessary that one exist.
387 : search_result = FindAndLoadDiaDebugStreamByName(
388 E : kOmapFromDiaDebugStreamName, dia_session, omap_from);
389 E : if (search_result == kSearchErrored) {
390 i : LOG(ERROR) << "Error trying to read " << kOmapFromDiaDebugStreamName
391 : << " stream.";
392 i : return false;
393 : }
394 :
395 E : return true;
396 E : }
397 :
398 : bool GetFixupDestinationAndType(const PEFile& image_file,
399 : const pdb::PdbFixup& fixup,
400 : RelativeAddress* dst_addr,
401 E : ReferenceType* ref_type) {
402 E : DCHECK_NE(reinterpret_cast<RelativeAddress*>(NULL), dst_addr);
403 E : DCHECK_NE(reinterpret_cast<ReferenceType*>(NULL), ref_type);
404 :
405 E : RelativeAddress src_addr(fixup.rva_location);
406 :
407 : // Get the destination displacement from the actual image itself. We only see
408 : // fixups for 32-bit references.
409 E : uint32 data = 0;
410 E : if (!image_file.ReadImage(src_addr, &data, sizeof(data))) {
411 i : LOG(ERROR) << "Unable to read image data for fixup with source address "
412 : << "at" << src_addr << ".";
413 i : return false;
414 : }
415 :
416 : // Translate this to a relative displacement value.
417 E : switch (fixup.type) {
418 : case pdb::PdbFixup::TYPE_ABSOLUTE: {
419 E : *ref_type = BlockGraph::ABSOLUTE_REF;
420 E : *dst_addr = RelativeAddress(image_file.AbsToRelDisplacement(data));
421 E : break;
422 : }
423 :
424 : case pdb::PdbFixup::TYPE_PC_RELATIVE: {
425 E : *ref_type = BlockGraph::PC_RELATIVE_REF;
426 E : *dst_addr = RelativeAddress(fixup.rva_location) + sizeof(data) + data;
427 E : break;
428 : }
429 :
430 : case pdb::PdbFixup::TYPE_RELATIVE: {
431 E : *ref_type = BlockGraph::RELATIVE_REF;
432 E : *dst_addr = RelativeAddress(data);
433 E : break;
434 : }
435 :
436 : default: {
437 i : LOG(ERROR) << "Unexpected fixup type (" << fixup.type << ").";
438 i : return false;
439 : }
440 : }
441 :
442 E : return true;
443 E : }
444 :
445 : // Creates references from the @p pdb_fixups (translating them via the
446 : // provided @p omap_from information if it is not empty), all while removing the
447 : // corresponding entries from @p reloc_set. If @p reloc_set is not empty after
448 : // this then the PDB fixups are out of sync with the image and we are unable to
449 : // safely decompose.
450 : //
451 : // @note This function deliberately ignores fixup information for the resource
452 : // section. This is because chrome.dll gets modified by a manifest tool
453 : // which doesn't update the FIXUPs in the corresponding PDB. They are thus
454 : // out of sync. Even if they were in sync this doesn't harm us as we have no
455 : // need to reach in and modify resource data.
456 : bool CreateReferencesFromFixupsImpl(
457 : const PEFile& image_file,
458 : const PdbFixups& pdb_fixups,
459 : const OMAPs& omap_from,
460 : PEFile::RelocSet* reloc_set,
461 E : BlockGraph::AddressSpace* image) {
462 E : DCHECK_NE(reinterpret_cast<PEFile::RelocSet*>(NULL), reloc_set);
463 E : DCHECK_NE(reinterpret_cast<BlockGraph::AddressSpace*>(NULL), image);
464 :
465 E : bool have_omap = !omap_from.empty();
466 E : size_t fixups_used = 0;
467 :
468 : // The resource section in Chrome is modified post-link by a tool that adds a
469 : // manifest to it. This causes all of the fixups in the resource section (and
470 : // anything beyond it) to be invalid. As long as the resource section is the
471 : // last section in the image, this is not a problem (we can safely ignore the
472 : // .rsrc fixups, which we know how to parse without them). However, if there
473 : // is a section after the resource section, things will have been shifted
474 : // and potentially crucial fixups will be invalid.
475 : const IMAGE_SECTION_HEADER* rsrc_header = image_file.GetSectionHeader(
476 E : kResourceSectionName);
477 E : RelativeAddress rsrc_start(0xffffffff);
478 E : RelativeAddress rsrc_end(0xffffffff);
479 E : if (rsrc_header != NULL) {
480 E : rsrc_start = RelativeAddress(rsrc_header->VirtualAddress);
481 E : rsrc_end = rsrc_start + rsrc_header->Misc.VirtualSize;
482 : }
483 :
484 : // Ensure the fixups are all valid.
485 E : for (size_t i = 0; i < pdb_fixups.size(); ++i) {
486 E : if (!pdb_fixups[i].ValidHeader()) {
487 i : LOG(ERROR) << "Unknown fixup header: "
488 : << base::StringPrintf("0x%08X.", pdb_fixups[i].header);
489 i : return false;
490 : }
491 :
492 : // For now, we skip any offset fixups. We've only seen this in the context
493 : // of TLS data access, and we don't mess with TLS structures.
494 E : if (pdb_fixups[i].is_offset())
495 E : continue;
496 :
497 : // All fixups we handle should be full size pointers.
498 E : DCHECK_EQ(Reference::kMaximumSize, pdb_fixups[i].size());
499 :
500 : // Get the original addresses, and map them through OMAP information.
501 : // Normally DIA takes care of this for us, but there is no API for
502 : // getting DIA to give us FIXUP information, so we have to do it manually.
503 E : RelativeAddress src_addr(pdb_fixups[i].rva_location);
504 E : RelativeAddress base_addr(pdb_fixups[i].rva_base);
505 E : if (have_omap) {
506 i : src_addr = pdb::TranslateAddressViaOmap(omap_from, src_addr);
507 i : base_addr = pdb::TranslateAddressViaOmap(omap_from, base_addr);
508 : }
509 :
510 : // If the reference originates beyond the .rsrc section then we can't
511 : // trust it.
512 E : if (src_addr >= rsrc_end) {
513 i : LOG(ERROR) << "Found fixup originating beyond .rsrc section.";
514 i : return false;
515 : }
516 :
517 : // If the reference originates from a part of the .rsrc section, ignore it.
518 E : if (src_addr >= rsrc_start)
519 E : continue;
520 :
521 : // Get the relative address/displacement of the fixup. This logs on failure.
522 E : RelativeAddress dst_addr;
523 E : ReferenceType type = BlockGraph::RELATIVE_REF;
524 : if (!GetFixupDestinationAndType(image_file, pdb_fixups[i], &dst_addr,
525 E : &type)) {
526 i : return false;
527 : }
528 :
529 : // Finally, create the reference. This logs verbosely for us on failure.
530 : if (!CreateReference(src_addr, Reference::kMaximumSize, type, base_addr,
531 E : dst_addr, image)) {
532 i : return false;
533 : }
534 :
535 : // Remove this reference from the relocs.
536 E : PEFile::RelocSet::iterator reloc_it = reloc_set->find(src_addr);
537 E : if (reloc_it != reloc_set->end()) {
538 : // We should only find a reloc if the fixup was of absolute type.
539 E : if (type != BlockGraph::ABSOLUTE_REF) {
540 i : LOG(ERROR) << "Found a reloc corresponding to a non-absolute fixup.";
541 i : return false;
542 : }
543 :
544 E : reloc_set->erase(reloc_it);
545 : }
546 :
547 E : ++fixups_used;
548 E : }
549 :
550 E : return true;
551 E : }
552 :
553 E : bool GetDataSymbolSize(IDiaSymbol* symbol, size_t* length) {
554 E : DCHECK_NE(reinterpret_cast<IDiaSymbol*>(NULL), symbol);
555 E : DCHECK_NE(reinterpret_cast<size_t*>(NULL), length);
556 :
557 E : *length = 0;
558 E : ScopedComPtr<IDiaSymbol> type;
559 E : HRESULT hr = symbol->get_type(type.Receive());
560 : // This happens if the symbol has no type information.
561 E : if (hr == S_FALSE)
562 E : return true;
563 E : if (hr != S_OK) {
564 i : LOG(ERROR) << "Failed to get type symbol: " << com::LogHr(hr) << ".";
565 i : return false;
566 : }
567 :
568 E : ULONGLONG ull_length = 0;
569 E : hr = type->get_length(&ull_length);
570 E : if (hr != S_OK) {
571 i : LOG(ERROR) << "Failed to retrieve type length properties: "
572 : << com::LogHr(hr) << ".";
573 i : return false;
574 : }
575 E : DCHECK_LE(ull_length, 0xFFFFFFFF);
576 E : *length = static_cast<size_t>(ull_length);
577 :
578 E : return true;
579 E : }
580 :
581 : bool ScopeSymTagToLabelProperties(enum SymTagEnum sym_tag,
582 : size_t scope_count,
583 : BlockGraph::LabelAttributes* attr,
584 E : std::string* name) {
585 E : DCHECK_NE(reinterpret_cast<BlockGraph::LabelAttributes*>(NULL), attr);
586 E : DCHECK_NE(reinterpret_cast<std::string*>(NULL), name);
587 :
588 E : switch (sym_tag) {
589 : case SymTagFuncDebugStart: {
590 E : *attr = BlockGraph::DEBUG_START_LABEL;
591 E : *name = "<debug-start>";
592 E : return true;
593 : }
594 : case SymTagFuncDebugEnd: {
595 E : *attr = BlockGraph::DEBUG_END_LABEL;
596 E : *name = "<debug-end>";
597 E : return true;
598 : }
599 : case SymTagBlock: {
600 E : *attr = BlockGraph::SCOPE_START_LABEL;
601 E : *name = base::StringPrintf("<scope-start-%d>", scope_count);
602 E : return true;
603 : }
604 : default:
605 i : return false;
606 : }
607 i : return false;
608 E : }
609 :
610 : // Reads the linker module symbol stream from the given PDB file. This should
611 : // always exist as the last module.
612 : scoped_refptr<pdb::PdbStream> GetLinkerSymbolStream(
613 E : const pdb::PdbFile& pdb_file) {
614 : static const char kLinkerModuleName[] = "* Linker *";
615 :
616 : // Get the DBI stream.
617 : scoped_refptr<pdb::PdbStream> stream =
618 E : pdb_file.GetStream(pdb::kDbiStream);
619 E : if (stream.get() == NULL) {
620 i : LOG(ERROR) << "PDB does not contain a DBI stream.";
621 i : return false;
622 : }
623 :
624 : // Read the entire thing into memory before parsing it. This makes parsing
625 : // much faster.
626 E : scoped_refptr<pdb::PdbByteStream> dbi_stream(new pdb::PdbByteStream());
627 E : if (!dbi_stream->Init(stream)) {
628 i : LOG(ERROR) << "Failed to read DBI stream.";
629 : }
630 :
631 : // Parse the DBI stream.
632 E : pdb::DbiStream dbi;
633 E : if (!dbi.Read(dbi_stream.get())) {
634 i : LOG(ERROR) << "Unable to parse DBI stream.";
635 i : return false;
636 : }
637 :
638 E : if (dbi.modules().empty()) {
639 i : LOG(ERROR) << "DBI stream contains no modules.";
640 i : return false;
641 : }
642 :
643 : // The last module has always been observed to be the linker module.
644 E : const pdb::DbiModuleInfo& linker = dbi.modules().back();
645 E : if (linker.module_name() != kLinkerModuleName) {
646 i : LOG(ERROR) << "Last module is not the linker module.";
647 i : return false;
648 : }
649 :
650 : // Get the symbol stream.
651 E : stream = pdb_file.GetStream(linker.module_info_base().stream);
652 E : if (stream.get() == NULL) {
653 i : LOG(ERROR) << "Unable to open linker symbol stream.";
654 i : return false;
655 : }
656 :
657 : // Also read it entirely into memory for faster parsing.
658 E : scoped_refptr<pdb::PdbByteStream> symbols(new pdb::PdbByteStream());
659 E : if (!symbols->Init(stream)) {
660 i : LOG(ERROR) << "Failed to read linker symbol stream.";
661 : }
662 :
663 E : return symbols;
664 E : }
665 :
666 : // Parses a symbol from a PDB symbol stream. The @p buffer is populated with the
667 : // data and upon success this returns the symbol directly cast onto the
668 : // @p buffer data. On failure this returns NULL.
669 : template<typename SymbolType>
670 : const SymbolType* ParseSymbol(uint16 symbol_length,
671 : pdb::PdbStream* stream,
672 E : std::vector<uint8>* buffer) {
673 E : DCHECK_NE(reinterpret_cast<pdb::PdbStream*>(NULL), stream);
674 E : DCHECK_NE(reinterpret_cast<std::vector<uint8>*>(NULL), buffer);
675 :
676 E : buffer->clear();
677 :
678 E : if (symbol_length < sizeof(SymbolType)) {
679 i : LOG(ERROR) << "Symbol too small for casting.";
680 i : return NULL;
681 : }
682 :
683 E : if (!stream->Read(buffer, symbol_length)) {
684 i : LOG(ERROR) << "Failed to read symbol.";
685 i : return NULL;
686 : }
687 :
688 E : return reinterpret_cast<const SymbolType*>(buffer->data());
689 E : }
690 :
691 : // If the given run of bytes consists of a single value repeated, returns that
692 : // value. Otherwise, returns -1.
693 E : int RepeatedValue(const uint8* data, size_t size) {
694 E : DCHECK_NE(reinterpret_cast<uint8*>(NULL), data);
695 E : const uint8* data_end = data + size;
696 E : uint8 value = *(data++);
697 E : for (; data < data_end; ++data) {
698 E : if (*data != value)
699 i : return -1;
700 E : }
701 E : return value;
702 E : }
703 :
704 : // Searches through the given image layout graph, and labels blocks that are
705 : // simply padding blocks.
706 E : bool FindPaddingBlocks(ImageLayout* image_layout) {
707 E : DCHECK_NE(reinterpret_cast<ImageLayout*>(NULL), image_layout);
708 :
709 E : BlockGraph* block_graph = image_layout->blocks.graph();
710 E : DCHECK_NE(reinterpret_cast<BlockGraph*>(NULL), block_graph);
711 :
712 : BlockGraph::BlockMap::iterator block_it =
713 E : block_graph->blocks_mutable().begin();
714 E : for (; block_it != block_graph->blocks_mutable().end(); ++block_it) {
715 E : Block& block = block_it->second;
716 :
717 : // Padding blocks must not have any symbol information: no labels,
718 : // no references, no referrers, and they must be a gap block.
719 : if (block.labels().size() != 0 ||
720 : block.references().size() != 0 ||
721 : block.referrers().size() != 0 ||
722 E : (block.attributes() & BlockGraph::GAP_BLOCK) == 0) {
723 E : continue;
724 : }
725 :
726 E : switch (block.type()) {
727 : // Code blocks should be fully defined and consist of only int3s.
728 : case BlockGraph::CODE_BLOCK: {
729 : if (block.data_size() != block.size() ||
730 E : RepeatedValue(block.data(), block.data_size()) != kInt3)
731 i : continue;
732 E : break;
733 : }
734 :
735 : // Data blocks should be uninitialized or have fully defined data
736 : // consisting only of zeros.
737 : default: {
738 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, block.type());
739 E : if (block.data_size() == 0) // Uninitialized data blocks are padding.
740 E : break;
741 : if (block.data_size() != block.size() ||
742 E : RepeatedValue(block.data(), block.data_size()) != 0)
743 i : continue;
744 : }
745 : }
746 :
747 : // If we fall through to this point, then the block is a padding block.
748 E : block.set_attribute(BlockGraph::PADDING_BLOCK);
749 E : }
750 :
751 E : return true;
752 E : }
753 :
754 E : bool CodeBlockHasAlignedJumpTables(const Block* block) {
755 E : DCHECK_NE(reinterpret_cast<Block*>(NULL), block);
756 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
757 :
758 : // Iterate over the labels of this block looking for jump tables.
759 E : bool has_jump_tables = false;
760 : Block::LabelMap::const_iterator label_it =
761 E : block->labels().begin();
762 E : for (; label_it != block->labels().end(); ++label_it) {
763 E : if (!label_it->second.has_attributes(BlockGraph::JUMP_TABLE_LABEL))
764 E : continue;
765 :
766 E : has_jump_tables = true;
767 :
768 : // If the jump table is misaligned we can return false immediately.
769 E : if (label_it->first % kPointerSize != 0)
770 i : return false;
771 E : }
772 :
773 E : return has_jump_tables;
774 E : }
775 :
776 E : bool AlignCodeBlocksWithJumpTables(ImageLayout* image_layout) {
777 E : DCHECK_NE(reinterpret_cast<ImageLayout*>(NULL), image_layout);
778 :
779 : BlockGraph::AddressSpace::RangeMapConstIter block_it =
780 E : image_layout->blocks.begin();
781 E : for (; block_it != image_layout->blocks.end(); ++block_it) {
782 E : Block* block = block_it->second;
783 :
784 : // We only care about code blocks that are already aligned 0 mod 4 but
785 : // whose explicit alignment is currently less than that.
786 E : if (block->type() != BlockGraph::CODE_BLOCK)
787 E : continue;
788 E : if (block->alignment() >= kPointerSize)
789 i : continue;
790 E : if (block_it->first.start().value() % kPointerSize != 0)
791 E : continue;
792 :
793 : // Inspect them to see if they have aligned jump tables. If they do,
794 : // set the alignment of the block itself.
795 E : if (CodeBlockHasAlignedJumpTables(block_it->second))
796 E : block->set_alignment(kPointerSize);
797 E : }
798 :
799 E : return true;
800 E : }
801 :
802 : void GuessDataBlockAlignment(uint32 max_alignment,
803 : RelativeAddress block_rva,
804 E : Block* block) {
805 E : DCHECK_NE(static_cast<Block*>(NULL), block);
806 E : DCHECK_EQ(BlockGraph::DATA_BLOCK, block->type());
807 E : uint32 alignment = block_rva.GetAlignment();
808 : // Cap the alignment.
809 E : if (alignment > max_alignment)
810 E : alignment = max_alignment;
811 E : block->set_alignment(alignment);
812 E : }
813 :
814 : void GuessDataBlockAlignments(const PEFile& pe_file,
815 E : ImageLayout* image_layout) {
816 E : DCHECK_NE(static_cast<ImageLayout*>(NULL), image_layout);
817 :
818 E : uint32 max_alignment = pe_file.nt_headers()->OptionalHeader.SectionAlignment;
819 :
820 E : BlockGraph::AddressSpace::RangeMapConstIter it = image_layout->blocks.begin();
821 E : for (; it != image_layout->blocks.end(); ++it) {
822 E : RelativeAddress block_rva = it->first.start();
823 E : BlockGraph::Block* block = it->second;
824 E : if (block->type() != BlockGraph::DATA_BLOCK)
825 E : continue;
826 E : GuessDataBlockAlignment(max_alignment, block_rva, block);
827 E : }
828 E : }
829 :
830 : } // namespace
831 :
832 : // We use ", " as a separator between symbol names. We sometimes see commas
833 : // in symbol names but do not see whitespace. Thus, this provides a useful
834 : // separator that is also human friendly to read.
835 : const char NewDecomposer::kLabelNameSep[] = ", ";
836 :
837 : // This is by CreateBlocksFromCoffGroups to communicate shared state to
838 : // VisitLinkerSymbol via the VisitSymbols helper function.
839 : struct NewDecomposer::VisitLinkerSymbolContext {
840 : int current_group_index;
841 : std::string current_group_prefix;
842 : RelativeAddress current_group_start;
843 :
844 : // These are the set of patterns that indicate bracketing groups. They
845 : // should match both the opening and the closing symbol, and have at least
846 : // one match group returning the common prefix.
847 : std::vector<RE> bracketing_groups;
848 :
849 E : VisitLinkerSymbolContext() : current_group_index(-1) {
850 : // Matches groups like: .CRT$XCA -> .CRT$XCZ
851 E : bracketing_groups.push_back(RE("(\\.CRT\\$X.)[AZ]"));
852 : // Matches groups like: .rtc$IAA -> .rtc$IZZ
853 E : bracketing_groups.push_back(RE("(\\.rtc\\$.*)(AA|ZZ)"));
854 : // Matches exactly: ATL$__a -> ATL$__z
855 E : bracketing_groups.push_back(RE("(ATL\\$__)[az]"));
856 : // Matches exactly: .tls -> .tls$ZZZ
857 E : bracketing_groups.push_back(RE("(\\.tls)(\\$ZZZ)?"));
858 E : }
859 :
860 : private:
861 : DISALLOW_COPY_AND_ASSIGN(VisitLinkerSymbolContext);
862 : };
863 :
864 : NewDecomposer::NewDecomposer(const PEFile& image_file)
865 : : image_file_(image_file), image_layout_(NULL), image_(NULL),
866 E : current_block_(NULL), current_scope_count_(0) {
867 E : }
868 :
869 E : bool NewDecomposer::Decompose(ImageLayout* image_layout) {
870 E : DCHECK_NE(reinterpret_cast<ImageLayout*>(NULL), image_layout);
871 :
872 : // The temporaries should be NULL.
873 E : DCHECK_EQ(reinterpret_cast<ImageLayout*>(NULL), image_layout_);
874 E : DCHECK_EQ(reinterpret_cast<BlockGraph::AddressSpace*>(NULL), image_);
875 :
876 : // We start by finding the PDB path.
877 E : if (!FindAndValidatePdbPath())
878 E : return false;
879 E : DCHECK(!pdb_path_.empty());
880 :
881 : // Load the serialized block-graph from the PDB if it exists. This allows
882 : // round-trip decomposition.
883 E : bool stream_exists = false;
884 : if (LoadBlockGraphFromPdb(
885 E : pdb_path_, image_file_, image_layout, &stream_exists)) {
886 E : return true;
887 E : } else if (stream_exists) {
888 : // If the stream exists but hasn't been loaded we return an error. At this
889 : // point an error message has already been logged if there was one.
890 i : return false;
891 : }
892 :
893 : // At this point a full decomposition needs to be performed.
894 E : image_layout_ = image_layout;
895 E : image_ = &(image_layout->blocks);
896 E : bool success = DecomposeImpl();
897 E : image_layout_ = NULL;
898 E : image_ = NULL;
899 :
900 E : return success;
901 E : }
902 :
903 E : bool NewDecomposer::FindAndValidatePdbPath() {
904 : // Manually find the PDB path if it is not specified.
905 E : if (pdb_path_.empty()) {
906 : if (!FindPdbForModule(image_file_.path(), &pdb_path_) ||
907 E : pdb_path_.empty()) {
908 i : LOG(ERROR) << "Unable to find PDB file for module: "
909 : << image_file_.path().value();
910 i : return false;
911 : }
912 : }
913 E : DCHECK(!pdb_path_.empty());
914 :
915 E : if (!file_util::PathExists(pdb_path_)) {
916 E : LOG(ERROR) << "Path not found: " << pdb_path_.value();
917 E : return false;
918 : }
919 :
920 E : if (!pe::PeAndPdbAreMatched(image_file_.path(), pdb_path_)) {
921 i : LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
922 : << "module \"" << image_file_.path().value() << "\".";
923 i : return false;
924 : }
925 :
926 E : return true;
927 E : }
928 :
929 : bool NewDecomposer::LoadBlockGraphFromPdbStream(
930 : const PEFile& image_file,
931 : pdb::PdbStream* block_graph_stream,
932 E : ImageLayout* image_layout) {
933 E : DCHECK_NE(reinterpret_cast<pdb::PdbStream*>(NULL), block_graph_stream);
934 E : DCHECK_NE(reinterpret_cast<ImageLayout*>(NULL), image_layout);
935 E : LOG(INFO) << "Reading block-graph and image layout from the PDB.";
936 :
937 : // Initialize an input archive pointing to the stream.
938 E : scoped_refptr<pdb::PdbByteStream> byte_stream = new pdb::PdbByteStream();
939 E : if (!byte_stream->Init(block_graph_stream))
940 i : return false;
941 E : DCHECK_NE(reinterpret_cast<pdb::PdbByteStream*>(NULL), byte_stream.get());
942 :
943 E : core::ScopedInStreamPtr pdb_in_stream;
944 : pdb_in_stream.reset(core::CreateByteInStream(
945 E : byte_stream->data(), byte_stream->data() + byte_stream->length()));
946 :
947 : // Read the header.
948 E : uint32 stream_version = 0;
949 E : unsigned char compressed = 0;
950 : if (!pdb_in_stream->Read(sizeof(stream_version),
951 : reinterpret_cast<core::Byte*>(&stream_version)) ||
952 : !pdb_in_stream->Read(sizeof(compressed),
953 E : reinterpret_cast<core::Byte*>(&compressed))) {
954 i : LOG(ERROR) << "Failed to read existing Syzygy block-graph stream header.";
955 i : return false;
956 : }
957 :
958 : // Check the stream version.
959 E : if (stream_version != pdb::kSyzygyBlockGraphStreamVersion) {
960 E : LOG(ERROR) << "PDB contains an unsupported Syzygy block-graph stream"
961 : << " version (got " << stream_version << ", expected "
962 : << pdb::kSyzygyBlockGraphStreamVersion << ").";
963 E : return false;
964 : }
965 :
966 : // If the stream is compressed insert the decompression filter.
967 E : core::InStream* in_stream = pdb_in_stream.get();
968 E : scoped_ptr<core::ZInStream> zip_in_stream;
969 E : if (compressed != 0) {
970 E : zip_in_stream.reset(new core::ZInStream(in_stream));
971 E : if (!zip_in_stream->Init()) {
972 i : LOG(ERROR) << "Unable to initialize ZInStream.";
973 i : return false;
974 : }
975 E : in_stream = zip_in_stream.get();
976 : }
977 :
978 : // Deserialize the image-layout.
979 E : core::NativeBinaryInArchive in_archive(in_stream);
980 E : block_graph::BlockGraphSerializer::Attributes attributes = 0;
981 : if (!LoadBlockGraphAndImageLayout(
982 E : image_file, &attributes, image_layout, &in_archive)) {
983 i : LOG(ERROR) << "Failed to deserialize block-graph and image layout.";
984 i : return false;
985 : }
986 :
987 E : return true;
988 E : }
989 :
990 : bool NewDecomposer::LoadBlockGraphFromPdb(const base::FilePath& pdb_path,
991 : const PEFile& image_file,
992 : ImageLayout* image_layout,
993 E : bool* stream_exists) {
994 E : DCHECK_NE(reinterpret_cast<ImageLayout*>(NULL), image_layout);
995 E : DCHECK_NE(reinterpret_cast<bool*>(NULL), stream_exists);
996 :
997 E : pdb::PdbFile pdb_file;
998 E : pdb::PdbReader pdb_reader;
999 E : if (!pdb_reader.Read(pdb_path, &pdb_file)) {
1000 i : LOG(ERROR) << "Unable to read the PDB named \"" << pdb_path.value()
1001 : << "\".";
1002 i : return NULL;
1003 : }
1004 :
1005 : // Try to get the block-graph stream from the PDB.
1006 E : scoped_refptr<pdb::PdbStream> block_graph_stream;
1007 : if (!pdb::LoadNamedStreamFromPdbFile(pdb::kSyzygyBlockGraphStreamName,
1008 : &pdb_file,
1009 : &block_graph_stream) ||
1010 E : block_graph_stream.get() == NULL) {
1011 E : *stream_exists = false;
1012 E : return false;
1013 : }
1014 E : if (block_graph_stream->length() == 0) {
1015 i : *stream_exists = false;
1016 i : LOG(WARNING) << "The block-graph stream is empty, ignoring it.";
1017 i : return false;
1018 : }
1019 :
1020 : // The PDB contains a block-graph stream, the block-graph and the image layout
1021 : // will be read from this stream.
1022 E : *stream_exists = true;
1023 : if (!LoadBlockGraphFromPdbStream(image_file, block_graph_stream.get(),
1024 E : image_layout)) {
1025 i : return false;
1026 : }
1027 :
1028 E : return true;
1029 E : }
1030 :
1031 E : bool NewDecomposer::DecomposeImpl() {
1032 : // Instantiate and initialize our Debug Interface Access session. This logs
1033 : // verbosely for us.
1034 E : ScopedComPtr<IDiaDataSource> dia_source;
1035 E : ScopedComPtr<IDiaSession> dia_session;
1036 E : ScopedComPtr<IDiaSymbol> global;
1037 : if (!InitializeDia(image_file_, pdb_path_, dia_source.Receive(),
1038 E : dia_session.Receive(), global.Receive())) {
1039 i : return false;
1040 : }
1041 :
1042 : // Copy the image headers to the layout.
1043 : CopySectionHeadersToImageLayout(
1044 : image_file_.nt_headers()->FileHeader.NumberOfSections,
1045 : image_file_.section_headers(),
1046 E : &(image_layout_->sections));
1047 :
1048 : // Create the sections in the underlying block-graph.
1049 E : if (!CopySectionInfoToBlockGraph(image_file_, image_->graph()))
1050 i : return false;
1051 :
1052 : // We scope the first few operations so that we don't keep the intermediate
1053 : // references around any longer than we have to.
1054 : {
1055 E : IntermediateReferences references;
1056 :
1057 : // First we parse out the PE blocks.
1058 E : VLOG(1) << "Parsing PE blocks.";
1059 E : if (!CreatePEImageBlocksAndReferences(&references))
1060 i : return false;
1061 :
1062 : // Now we parse the COFF group symbols from the linker's symbol stream.
1063 : // These indicate things like static initializers, which must stay together
1064 : // in a single block.
1065 E : VLOG(1) << "Parsing COFF groups.";
1066 E : if (!CreateBlocksFromCoffGroups())
1067 i : return false;
1068 :
1069 : // Next we parse out section contributions. Some of these may coincide with
1070 : // existing PE parsed blocks, but when they do we expect them to be exact
1071 : // collisions.
1072 E : VLOG(1) << "Parsing section contributions.";
1073 E : if (!CreateBlocksFromSectionContribs(dia_session.get()))
1074 i : return false;
1075 :
1076 : // Flesh out the rest of the image with gap blocks.
1077 E : VLOG(1) << "Creating gap blocks.";
1078 E : if (!CreateGapBlocks())
1079 i : return false;
1080 :
1081 : // Finalize the PE-parsed intermediate references.
1082 E : VLOG(1) << "Finalizing intermediate references.";
1083 E : if (!FinalizeIntermediateReferences(references))
1084 i : return false;
1085 E : }
1086 :
1087 : // Parse the fixups and use them to create references.
1088 E : VLOG(1) << "Parsing fixups.";
1089 E : if (!CreateReferencesFromFixups(dia_session.get()))
1090 i : return false;
1091 :
1092 : // Annotate the block-graph with symbol information.
1093 E : VLOG(1) << "Parsing symbols.";
1094 E : if (!ProcessSymbols(global.get()))
1095 i : return false;
1096 :
1097 : // Now, find and label any padding blocks.
1098 E : VLOG(1) << "Labeling padding blocks.";
1099 E : if (!FindPaddingBlocks(image_layout_))
1100 i : return false;
1101 :
1102 : // Set the alignment on code blocks with jump tables. This ensures that the
1103 : // jump tables remain aligned post-transform.
1104 E : VLOG(1) << "Calculating code block alignments.";
1105 E : if (!AlignCodeBlocksWithJumpTables(image_layout_))
1106 i : return false;
1107 :
1108 : // Set the alignment of data blocks. This is not precise in that it simply
1109 : // guesses the alignment based on the address of the block. Some instructions
1110 : // have alignment requirements on their data but unfortunately the PDB does
1111 : // not contain explicit alignment information.
1112 E : VLOG(1) << "Guessing data block alignments.";
1113 E : GuessDataBlockAlignments(image_file_, image_layout_);
1114 :
1115 E : return true;
1116 E : }
1117 :
1118 : bool NewDecomposer::CreatePEImageBlocksAndReferences(
1119 E : IntermediateReferences* references) {
1120 E : DCHECK_NE(reinterpret_cast<IntermediateReferences*>(NULL), references);
1121 :
1122 : PEFileParser::AddReferenceCallback add_reference(
1123 E : base::Bind(&AddIntermediateReference, base::Unretained(references)));
1124 E : PEFileParser parser(image_file_, image_, add_reference);
1125 E : PEFileParser::PEHeader header;
1126 E : if (!parser.ParseImage(&header)) {
1127 i : LOG(ERROR) << "Unable to parse PE image.";
1128 i : return false;
1129 : }
1130 :
1131 E : return true;
1132 E : }
1133 :
1134 E : bool NewDecomposer::CreateBlocksFromCoffGroups() {
1135 E : pdb::PdbFile pdb_file;
1136 E : pdb::PdbReader pdb_reader;
1137 E : if (!pdb_reader.Read(pdb_path_, &pdb_file)) {
1138 i : LOG(ERROR) << "Failed to load PDB: " << pdb_path_.value();
1139 i : return false;
1140 : }
1141 :
1142 E : scoped_refptr<pdb::PdbStream> symbols = GetLinkerSymbolStream(pdb_file);
1143 :
1144 : // Process the symbols in the linker module symbol stream.
1145 E : VisitLinkerSymbolContext context;
1146 : pdb::VisitSymbolsCallback callback = base::Bind(
1147 : &NewDecomposer::VisitLinkerSymbol,
1148 : base::Unretained(this),
1149 E : base::Unretained(&context));
1150 E : if (!pdb::VisitSymbols(callback, symbols->length(), true, symbols.get()))
1151 i : return false;
1152 :
1153 : // Bail if we did not encounter a closing bracketing symbol where one was
1154 : // expected.
1155 E : if (context.current_group_index != -1) {
1156 i : LOG(ERROR) << "Unable to close bracketed COFF group \""
1157 : << context.current_group_prefix << "\".";
1158 i : return false;
1159 : }
1160 :
1161 E : return true;
1162 E : }
1163 :
1164 E : bool NewDecomposer::CreateBlocksFromSectionContribs(IDiaSession* session) {
1165 E : ScopedComPtr<IDiaEnumSectionContribs> section_contribs;
1166 : SearchResult search_result = FindDiaTable(session,
1167 E : section_contribs.Receive());
1168 E : if (search_result != kSearchSucceeded) {
1169 i : if (search_result == kSearchFailed)
1170 i : LOG(ERROR) << "No section contribution table found.";
1171 i : return false;
1172 : }
1173 :
1174 E : size_t rsrc_id = image_file_.GetSectionIndex(kResourceSectionName);
1175 :
1176 E : LONG count = 0;
1177 E : if (section_contribs->get_Count(&count) != S_OK) {
1178 i : LOG(ERROR) << "Failed to get section contributions enumeration length.";
1179 i : return false;
1180 : }
1181 :
1182 E : for (LONG visited = 0; visited < count; ++visited) {
1183 E : ScopedComPtr<IDiaSectionContrib> section_contrib;
1184 E : ULONG fetched = 0;
1185 E : HRESULT hr = section_contribs->Next(1, section_contrib.Receive(), &fetched);
1186 : // The standard way to end an enumeration (according to the docs) is by
1187 : // returning S_FALSE and setting fetched to 0. We don't actually see this,
1188 : // but it wouldn't be an error if we did.
1189 E : if (hr == S_FALSE && fetched == 0)
1190 i : break;
1191 E : if (hr != S_OK) {
1192 i : LOG(ERROR) << "Failed to get DIA section contribution: "
1193 : << com::LogHr(hr) << ".";
1194 i : return false;
1195 : }
1196 : // We actually end up seeing S_OK and fetched == 0 when the enumeration
1197 : // terminates, which goes against the publishes documentations.
1198 E : if (fetched == 0)
1199 i : break;
1200 :
1201 E : DWORD rva = 0;
1202 E : DWORD length = 0;
1203 E : DWORD section_id = 0;
1204 E : BOOL code = FALSE;
1205 E : ScopedComPtr<IDiaSymbol> compiland;
1206 E : ScopedBstr bstr_compiland_name;
1207 : if ((hr = section_contrib->get_relativeVirtualAddress(&rva)) != S_OK ||
1208 : (hr = section_contrib->get_length(&length)) != S_OK ||
1209 : (hr = section_contrib->get_addressSection(§ion_id)) != S_OK ||
1210 : (hr = section_contrib->get_code(&code)) != S_OK ||
1211 : (hr = section_contrib->get_compiland(compiland.Receive())) != S_OK ||
1212 E : (hr = compiland->get_name(bstr_compiland_name.Receive())) != S_OK) {
1213 i : LOG(ERROR) << "Failed to get section contribution properties: "
1214 : << com::LogHr(hr) << ".";
1215 i : return false;
1216 : }
1217 :
1218 : // Determine if this function was built by a supported compiler.
1219 : bool is_built_by_supported_compiler =
1220 E : IsBuiltBySupportedCompiler(compiland.get());
1221 :
1222 : // DIA numbers sections from 1 to n, while we do 0 to n - 1.
1223 E : DCHECK_LT(0u, section_id);
1224 E : --section_id;
1225 :
1226 : // We don't parse the resource section, as it is parsed by the PEFileParser.
1227 E : if (section_id == rsrc_id)
1228 E : continue;
1229 :
1230 E : std::string compiland_name;
1231 : if (!WideToUTF8(bstr_compiland_name, bstr_compiland_name.Length(),
1232 E : &compiland_name)) {
1233 i : LOG(ERROR) << "Failed to convert compiland name to UTF8.";
1234 i : return false;
1235 : }
1236 :
1237 : // Give a name to the block based on the basename of the object file. This
1238 : // will eventually be replaced by the full symbol name, if one exists for
1239 : // the block.
1240 E : size_t last_component = compiland_name.find_last_of('\\');
1241 E : size_t extension = compiland_name.find_last_of('.');
1242 E : if (last_component == std::string::npos) {
1243 E : last_component = 0;
1244 E : } else {
1245 : // We don't want to include the last slash.
1246 E : ++last_component;
1247 : }
1248 E : if (extension < last_component)
1249 i : extension = compiland_name.size();
1250 : std::string name = compiland_name.substr(last_component,
1251 E : extension - last_component);
1252 :
1253 : // TODO(chrisha): We see special section contributions with the name
1254 : // "* CIL *". These are concatenations of data symbols and can very
1255 : // likely be chunked using symbols directly. A cursory visual inspection
1256 : // of symbol names hints that these might be related to WPO.
1257 :
1258 : // Create the block.
1259 : BlockType block_type =
1260 E : code ? BlockGraph::CODE_BLOCK : BlockGraph::DATA_BLOCK;
1261 : Block* block = CreateBlockOrFindCoveringPeBlock(
1262 E : block_type, RelativeAddress(rva), length, name);
1263 E : if (block == NULL) {
1264 i : LOG(ERROR) << "Unable to create block for compiland \""
1265 : << compiland_name << "\".";
1266 i : return false;
1267 : }
1268 :
1269 : // Set the block compiland name.
1270 E : block->set_compiland_name(compiland_name);
1271 :
1272 : // Set the block attributes.
1273 E : block->set_attribute(BlockGraph::SECTION_CONTRIB);
1274 E : if (!is_built_by_supported_compiler)
1275 E : block->set_attribute(BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER);
1276 E : }
1277 :
1278 E : return true;
1279 E : }
1280 :
1281 E : bool NewDecomposer::CreateGapBlocks() {
1282 E : size_t num_sections = image_file_.nt_headers()->FileHeader.NumberOfSections;
1283 :
1284 : // Iterate through all the image sections.
1285 E : for (size_t i = 0; i < num_sections; ++i) {
1286 E : const IMAGE_SECTION_HEADER* header = image_file_.section_header(i);
1287 E : DCHECK_NE(reinterpret_cast<IMAGE_SECTION_HEADER*>(NULL), header);
1288 :
1289 E : BlockType type = BlockGraph::CODE_BLOCK;
1290 E : const char* section_type = NULL;
1291 E : switch (GetSectionType(*header)) {
1292 : case kSectionCode:
1293 E : type = BlockGraph::CODE_BLOCK;
1294 E : section_type = "code";
1295 E : break;
1296 :
1297 : case kSectionData:
1298 E : type = BlockGraph::DATA_BLOCK;
1299 E : section_type = "data";
1300 E : break;
1301 :
1302 : default:
1303 i : continue;
1304 : }
1305 :
1306 E : if (!CreateSectionGapBlocks(header, type)) {
1307 i : LOG(ERROR) << "Unable to create gap blocks for " << section_type
1308 : << " section \"" << header->Name << "\".";
1309 i : return false;
1310 : }
1311 E : }
1312 :
1313 E : return true;
1314 E : }
1315 :
1316 : bool NewDecomposer::FinalizeIntermediateReferences(
1317 E : const IntermediateReferences& references) {
1318 E : for (size_t i = 0; i < references.size(); ++i) {
1319 : // This logs verbosely for us.
1320 : if (!CreateReference(references[i].src_addr,
1321 : references[i].size,
1322 : references[i].type,
1323 : references[i].dst_addr,
1324 : references[i].dst_addr,
1325 E : image_)) {
1326 i : return false;
1327 : }
1328 E : }
1329 E : return true;
1330 E : }
1331 :
1332 E : bool NewDecomposer::CreateReferencesFromFixups(IDiaSession* session) {
1333 E : DCHECK_NE(reinterpret_cast<IDiaSession*>(NULL), session);
1334 :
1335 E : PEFile::RelocSet reloc_set;
1336 E : if (!image_file_.DecodeRelocs(&reloc_set))
1337 i : return false;
1338 :
1339 E : OMAPs omap_from;
1340 E : PdbFixups fixups;
1341 E : if (!LoadDebugStreams(session, &fixups, &omap_from))
1342 i : return false;
1343 :
1344 : // While creating references from the fixups this removes the
1345 : // corresponding reference data from the relocs. We use this as a kind of
1346 : // double-entry bookkeeping to ensure all is well and right in the world.
1347 : if (!CreateReferencesFromFixupsImpl(image_file_, fixups, omap_from,
1348 E : &reloc_set, image_)) {
1349 i : return false;
1350 : }
1351 :
1352 E : if (!reloc_set.empty()) {
1353 i : LOG(ERROR) << "Found reloc entries without matching FIXUP entries.";
1354 i : return false;
1355 : }
1356 :
1357 E : return true;
1358 E : }
1359 :
1360 E : bool NewDecomposer::ProcessSymbols(IDiaSymbol* root) {
1361 E : DCHECK_NE(reinterpret_cast<IDiaSymbol*>(NULL), root);
1362 :
1363 : DiaBrowser::MatchCallback on_push_function_or_thunk_symbol(
1364 : base::Bind(&NewDecomposer::OnPushFunctionOrThunkSymbol,
1365 E : base::Unretained(this)));
1366 : DiaBrowser::MatchCallback on_pop_function_or_thunk_symbol(
1367 : base::Bind(&NewDecomposer::OnPopFunctionOrThunkSymbol,
1368 E : base::Unretained(this)));
1369 : DiaBrowser::MatchCallback on_function_child_symbol(
1370 : base::Bind(&NewDecomposer::OnFunctionChildSymbol,
1371 E : base::Unretained(this)));
1372 : DiaBrowser::MatchCallback on_data_symbol(
1373 E : base::Bind(&NewDecomposer::OnDataSymbol, base::Unretained(this)));
1374 : DiaBrowser::MatchCallback on_public_symbol(
1375 E : base::Bind(&NewDecomposer::OnPublicSymbol, base::Unretained(this)));
1376 : DiaBrowser::MatchCallback on_label_symbol(
1377 E : base::Bind(&NewDecomposer::OnLabelSymbol, base::Unretained(this)));
1378 :
1379 E : DiaBrowser dia_browser;
1380 :
1381 : // Find thunks.
1382 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagThunk),
1383 : on_push_function_or_thunk_symbol,
1384 E : on_pop_function_or_thunk_symbol);
1385 :
1386 : // Find functions and all data, labels, callsites, debug start/end and block
1387 : // symbols below them. This is done in one single pattern so that the
1388 : // function pushes/pops happen in the right order.
1389 : dia_browser.AddPattern(
1390 : Seq(Opt(SymTagCompiland),
1391 : Callback(Or(SymTagFunction, SymTagThunk),
1392 : on_push_function_or_thunk_symbol,
1393 : on_pop_function_or_thunk_symbol),
1394 : Star(SymTagBlock),
1395 : Or(SymTagData,
1396 : SymTagLabel,
1397 : SymTagBlock,
1398 : SymTagFuncDebugStart,
1399 : SymTagFuncDebugEnd,
1400 : SymTagCallSite)),
1401 E : on_function_child_symbol);
1402 :
1403 : // Global data and code label symbols.
1404 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagLabel),
1405 E : on_label_symbol);
1406 : dia_browser.AddPattern(Seq(Opt(SymTagCompiland), SymTagData),
1407 E : on_data_symbol);
1408 :
1409 : // Public symbols. These provide decorated names without any type info, but
1410 : // are useful for debugging.
1411 E : dia_browser.AddPattern(SymTagPublicSymbol, on_public_symbol);
1412 :
1413 E : return dia_browser.Browse(root);
1414 E : }
1415 :
1416 : bool NewDecomposer::VisitLinkerSymbol(VisitLinkerSymbolContext* context,
1417 : uint16 symbol_length,
1418 : uint16 symbol_type,
1419 E : pdb::PdbStream* stream) {
1420 E : DCHECK_NE(reinterpret_cast<VisitLinkerSymbolContext*>(NULL), context);
1421 E : DCHECK_NE(reinterpret_cast<pdb::PdbStream*>(NULL), stream);
1422 :
1423 E : if (symbol_type != cci::S_COFFGROUP)
1424 E : return true;
1425 :
1426 E : std::vector<uint8> buffer;
1427 : const cci::CoffGroupSym* coffgroup =
1428 E : ParseSymbol<cci::CoffGroupSym>(symbol_length, stream, &buffer);
1429 E : if (coffgroup == NULL)
1430 i : return false;
1431 :
1432 : // The PDB numbers sections starting at index 1 but we use index 0.
1433 : RelativeAddress rva(image_layout_->sections[coffgroup->seg - 1].addr +
1434 E : coffgroup->off);
1435 :
1436 : // We are looking for an opening symbol.
1437 E : if (context->current_group_index == -1) {
1438 E : for (size_t i = 0; i < context->bracketing_groups.size(); ++i) {
1439 E : std::string prefix;
1440 E : if (context->bracketing_groups[i].FullMatch(coffgroup->name, &prefix)) {
1441 E : context->current_group_index = i;
1442 E : context->current_group_prefix = prefix;
1443 E : context->current_group_start = rva;
1444 E : return true;
1445 : }
1446 E : }
1447 :
1448 : // No opening symbol was encountered. We can safely ignore this
1449 : // COFF group symbol.
1450 E : return true;
1451 : }
1452 :
1453 : // If we get here we've found an opening symbol and we're looking for the
1454 : // matching closing symbol.
1455 E : std::string prefix;
1456 : if (!context->bracketing_groups[context->current_group_index].FullMatch(
1457 E : coffgroup->name, &prefix)) {
1458 E : return true;
1459 : }
1460 :
1461 E : if (prefix != context->current_group_prefix) {
1462 : // We see another symbol open/close while already in an opened symbol.
1463 : // This indicates nested bracketing information, which we've never seen
1464 : // before.
1465 i : LOG(ERROR) << "Encountered nested bracket symbol \"" << prefix
1466 : << "\" while in \"" << context->current_group_prefix << "\".";
1467 i : return false;
1468 : }
1469 :
1470 E : RelativeAddress end = rva + coffgroup->cb;
1471 E : DCHECK_LE(context->current_group_start, end);
1472 :
1473 : // If the COFF group is not empty, then create a block corresponding to it.
1474 E : if (context->current_group_start != end) {
1475 : // Create a block for this bracketed COFF group.
1476 : Block* block = CreateBlock(
1477 : BlockGraph::DATA_BLOCK,
1478 : context->current_group_start,
1479 : end - context->current_group_start,
1480 E : base::StringPrintf("Bracketed COFF group: %s", prefix.c_str()));
1481 E : if (block == NULL) {
1482 i : LOG(ERROR) << "Failed to create bracketed COFF group \""
1483 : << prefix << "\".";
1484 i : return false;
1485 : }
1486 E : block->set_attribute(BlockGraph::COFF_GROUP);
1487 : }
1488 :
1489 : // Indicate that this block is closed and we're looking for another opening
1490 : // bracket symbol.
1491 E : context->current_group_index = -1;
1492 E : context->current_group_prefix.clear();
1493 E : context->current_group_start = RelativeAddress(0);
1494 :
1495 E : return true;
1496 E : }
1497 :
1498 : DiaBrowser::BrowserDirective NewDecomposer::OnPushFunctionOrThunkSymbol(
1499 : const DiaBrowser& dia_browser,
1500 : const DiaBrowser::SymTagVector& sym_tags,
1501 E : const DiaBrowser::SymbolPtrVector& symbols) {
1502 E : DCHECK(!symbols.empty());
1503 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1504 E : DiaBrowser::SymbolPtr symbol = symbols.back();
1505 :
1506 E : DCHECK_EQ(reinterpret_cast<Block*>(NULL), current_block_);
1507 E : DCHECK_EQ(current_address_, RelativeAddress(0));
1508 E : DCHECK_EQ(0u, current_scope_count_);
1509 :
1510 E : HRESULT hr = E_FAIL;
1511 E : DWORD location_type = LocIsNull;
1512 E : DWORD rva = 0;
1513 E : ULONGLONG length = 0;
1514 E : ScopedBstr name_bstr;
1515 : if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1516 : FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1517 : FAILED(hr = symbol->get_length(&length)) ||
1518 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1519 i : LOG(ERROR) << "Failed to get function/thunk properties: " << com::LogHr(hr)
1520 : << ".";
1521 i : return DiaBrowser::kBrowserAbort;
1522 : }
1523 :
1524 : // We only care about functions with static storage. We can stop looking at
1525 : // things below this node, as we won't be able to resolve them either.
1526 E : if (location_type != LocIsStatic)
1527 i : return DiaBrowser::kBrowserTerminatePath;
1528 :
1529 E : RelativeAddress addr(rva);
1530 E : Block* block = image_->GetBlockByAddress(addr);
1531 E : CHECK(block != NULL);
1532 E : RelativeAddress block_addr;
1533 E : CHECK(image_->GetAddressOf(block, &block_addr));
1534 E : DCHECK(InRange(addr, block_addr, block->size()));
1535 :
1536 E : std::string name;
1537 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1538 i : LOG(ERROR) << "Failed to convert function/thunk name to UTF8.";
1539 i : return DiaBrowser::kBrowserAbort;
1540 : }
1541 :
1542 : // We know the function starts in this block but we need to make sure its
1543 : // end does not extend past the end of the block.
1544 E : if (addr + length > block_addr + block->size()) {
1545 i : LOG(ERROR) << "Got function/thunk \"" << name << "\" that is not contained "
1546 : << "by section contribution \"" << block->name() << "\".";
1547 i : return DiaBrowser::kBrowserAbort;
1548 : }
1549 :
1550 E : Offset offset = addr - block_addr;
1551 E : if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
1552 i : return DiaBrowser::kBrowserAbort;
1553 :
1554 : // Keep track of the generated block. We will use this when parsing symbols
1555 : // that belong to this function. This prevents us from having to do repeated
1556 : // lookups and also allows us to associate labels outside of the block to the
1557 : // correct block.
1558 E : current_block_ = block;
1559 E : current_address_ = block_addr;
1560 :
1561 : // Certain properties are not defined on all blocks, so the following calls
1562 : // may return S_FALSE.
1563 E : BOOL no_return = FALSE;
1564 E : if (symbol->get_noReturn(&no_return) != S_OK)
1565 E : no_return = FALSE;
1566 :
1567 E : BOOL has_inl_asm = FALSE;
1568 E : if (symbol->get_hasInlAsm(&has_inl_asm) != S_OK)
1569 E : has_inl_asm = FALSE;
1570 :
1571 E : BOOL has_eh = FALSE;
1572 E : if (symbol->get_hasEH(&has_eh) != S_OK)
1573 E : has_eh = FALSE;
1574 :
1575 E : BOOL has_seh = FALSE;
1576 E : if (symbol->get_hasSEH(&has_seh) != S_OK)
1577 E : has_seh = FALSE;
1578 :
1579 : // Set the block attributes.
1580 E : if (no_return == TRUE)
1581 E : block->set_attribute(BlockGraph::NON_RETURN_FUNCTION);
1582 E : if (has_inl_asm == TRUE)
1583 E : block->set_attribute(BlockGraph::HAS_INLINE_ASSEMBLY);
1584 E : if (has_eh || has_seh)
1585 E : block->set_attribute(BlockGraph::HAS_EXCEPTION_HANDLING);
1586 E : if (IsSymTag(symbol, SymTagThunk))
1587 E : block->set_attribute(BlockGraph::THUNK);
1588 :
1589 E : return DiaBrowser::kBrowserContinue;
1590 E : }
1591 :
1592 : DiaBrowser::BrowserDirective NewDecomposer::OnPopFunctionOrThunkSymbol(
1593 : const DiaBrowser& dia_browser,
1594 : const DiaBrowser::SymTagVector& sym_tags,
1595 E : const DiaBrowser::SymbolPtrVector& symbols) {
1596 : // Simply clean up the current function block and address.
1597 E : current_block_ = NULL;
1598 E : current_address_ = RelativeAddress(0);
1599 E : current_scope_count_ = 0;
1600 E : return DiaBrowser::kBrowserContinue;
1601 E : }
1602 :
1603 : DiaBrowser::BrowserDirective NewDecomposer::OnFunctionChildSymbol(
1604 : const DiaBrowser& dia_browser,
1605 : const DiaBrowser::SymTagVector& sym_tags,
1606 E : const DiaBrowser::SymbolPtrVector& symbols) {
1607 E : DCHECK(!symbols.empty());
1608 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1609 :
1610 : // This can only be called from the context of a function, so we expect the
1611 : // parent function block to be set and remembered.
1612 E : DCHECK_NE(reinterpret_cast<Block*>(NULL), current_block_);
1613 :
1614 : // The set of sym tags here should match the pattern used in the DiaBrowser
1615 : // instance set up in ProcessSymbols.
1616 E : switch (sym_tags.back()) {
1617 : case SymTagData:
1618 E : return OnDataSymbol(dia_browser, sym_tags, symbols);
1619 :
1620 : case SymTagLabel:
1621 E : return OnLabelSymbol(dia_browser, sym_tags, symbols);
1622 :
1623 : case SymTagBlock:
1624 : case SymTagFuncDebugStart:
1625 : case SymTagFuncDebugEnd:
1626 E : return OnScopeSymbol(sym_tags.back(), symbols.back());
1627 :
1628 : case SymTagCallSite:
1629 E : return OnCallSiteSymbol(symbols.back());
1630 :
1631 : default:
1632 : break;
1633 : }
1634 :
1635 i : LOG(ERROR) << "Unhandled function child symbol: " << sym_tags.back() << ".";
1636 i : return DiaBrowser::kBrowserAbort;
1637 E : }
1638 :
1639 : DiaBrowser::BrowserDirective NewDecomposer::OnDataSymbol(
1640 : const DiaBrowser& dia_browser,
1641 : const DiaBrowser::SymTagVector& sym_tags,
1642 E : const DiaBrowser::SymbolPtrVector& symbols) {
1643 E : DCHECK(!symbols.empty());
1644 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1645 E : DiaBrowser::SymbolPtr symbol = symbols.back();
1646 :
1647 E : HRESULT hr = E_FAIL;
1648 E : DWORD location_type = LocIsNull;
1649 E : DWORD rva = 0;
1650 E : ScopedBstr name_bstr;
1651 : if (FAILED(hr = symbol->get_locationType(&location_type)) ||
1652 : FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1653 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1654 i : LOG(ERROR) << "Failed to get data properties: " << com::LogHr(hr) << ".";
1655 i : return DiaBrowser::kBrowserAbort;
1656 : }
1657 :
1658 : // Symbols with an address of zero are essentially invalid. They appear to
1659 : // have been optimized away by the compiler, but they are still reported.
1660 E : if (rva == 0)
1661 E : return DiaBrowser::kBrowserTerminatePath;
1662 :
1663 : // We only care about functions with static storage. We can stop looking at
1664 : // things below this node, as we won't be able to resolve them either.
1665 E : if (location_type != LocIsStatic)
1666 i : return DiaBrowser::kBrowserTerminatePath;
1667 :
1668 : // Get the size of this datum from its type info.
1669 E : size_t length = 0;
1670 E : if (!GetDataSymbolSize(symbol, &length))
1671 i : return DiaBrowser::kBrowserAbort;
1672 :
1673 : // Reuse the parent function block if we can. This acts as small lookup
1674 : // cache.
1675 E : RelativeAddress addr(rva);
1676 E : Block* block = current_block_;
1677 E : RelativeAddress block_addr(current_address_);
1678 E : if (block == NULL || !InRange(addr, block_addr, block->size())) {
1679 E : block = image_->GetBlockByAddress(addr);
1680 E : CHECK(block != NULL);
1681 E : CHECK(image_->GetAddressOf(block, &block_addr));
1682 E : DCHECK(InRange(addr, block_addr, block->size()));
1683 : }
1684 :
1685 E : std::string name;
1686 E : if (!WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
1687 i : LOG(ERROR) << "Failed to convert label name to UTF8.";
1688 i : return DiaBrowser::kBrowserAbort;
1689 : }
1690 :
1691 : // Zero-length data symbols mark case/jump tables, or are forward declares.
1692 E : BlockGraph::LabelAttributes attr = BlockGraph::DATA_LABEL;
1693 E : Offset offset = addr - block_addr;
1694 E : if (length == 0) {
1695 : // Jump and case tables come in as data symbols with no name. Jump tables
1696 : // are always an array of pointers, thus they coincide exactly with a
1697 : // reference. Case tables are simple arrays of integer values (themselves
1698 : // indices into a jump table), thus do not coincide with a reference.
1699 E : if (name.empty() && block->type() == BlockGraph::CODE_BLOCK) {
1700 E : if (block->references().find(offset) != block->references().end()) {
1701 E : name = kJumpTable;
1702 E : attr |= BlockGraph::JUMP_TABLE_LABEL;
1703 E : } else {
1704 E : name = kCaseTable;
1705 E : attr |= BlockGraph::CASE_TABLE_LABEL;
1706 : }
1707 E : } else {
1708 : // Zero-length data symbols act as 'forward declares' in some sense. They
1709 : // are always followed by a non-zero length data symbol with the same name
1710 : // and location.
1711 E : return DiaBrowser::kBrowserTerminatePath;
1712 : }
1713 : }
1714 :
1715 : // Verify that the data symbol does not exceed the size of the block.
1716 E : if (addr + length > block_addr + block->size()) {
1717 : // The data symbol can exceed the size of the block in the case of data
1718 : // imports. For some reason the toolchain emits a global data symbol with
1719 : // type information equal to the type of the data *pointed* to by the import
1720 : // entry rather than the type of the entry itself. Thus, if the data type
1721 : // is bigger than the entire IAT this symbol will exceed it. To complicate
1722 : // matters even more, a poorly written module can import its own export in
1723 : // which case a linker generated pseudo-import-entry block will be
1724 : // generated. This won't be part of the IAT, so we can't even filter based
1725 : // on that. Instead, we simply ignore global data symbols that exceed the
1726 : // block size.
1727 E : base::StringPiece spname(name);
1728 E : if (sym_tags.size() == 1 && spname.starts_with("_imp_")) {
1729 E : VLOG(1) << "Encountered an imported data symbol \"" << name << "\" that "
1730 : << "extends past its parent block \"" << block->name() << "\".";
1731 E : } else {
1732 i : LOG(ERROR) << "Received data symbol \"" << name << "\" that extends past "
1733 : << "its parent block \"" << block->name() << "\".";
1734 i : return DiaBrowser::kBrowserAbort;
1735 : }
1736 : }
1737 :
1738 E : if (!AddLabelToBlock(offset, name, attr, block))
1739 i : return DiaBrowser::kBrowserAbort;
1740 :
1741 E : return DiaBrowser::kBrowserContinue;
1742 E : }
1743 :
1744 : DiaBrowser::BrowserDirective NewDecomposer::OnPublicSymbol(
1745 : const DiaBrowser& dia_browser,
1746 : const DiaBrowser::SymTagVector& sym_tags,
1747 E : const DiaBrowser::SymbolPtrVector& symbols) {
1748 E : DCHECK(!symbols.empty());
1749 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1750 E : DCHECK_EQ(reinterpret_cast<Block*>(NULL), current_block_);
1751 E : DiaBrowser::SymbolPtr symbol = symbols.back();
1752 :
1753 E : HRESULT hr = E_FAIL;
1754 E : DWORD rva = 0;
1755 E : ScopedBstr name_bstr;
1756 : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1757 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1758 i : LOG(ERROR) << "Failed to get public symbol properties: " << com::LogHr(hr)
1759 : << ".";
1760 i : return DiaBrowser::kBrowserAbort;
1761 : }
1762 :
1763 E : RelativeAddress addr(rva);
1764 E : Block* block = image_->GetBlockByAddress(addr);
1765 E : CHECK(block != NULL);
1766 E : RelativeAddress block_addr;
1767 E : CHECK(image_->GetAddressOf(block, &block_addr));
1768 E : DCHECK(InRange(addr, block_addr, block->size()));
1769 :
1770 E : std::string name;
1771 E : WideToUTF8(name_bstr, name_bstr.Length(), &name);
1772 :
1773 : // Public symbol names are mangled. Remove leading '_' as per
1774 : // http://msdn.microsoft.com/en-us/library/00kh39zz(v=vs.80).aspx
1775 E : if (name[0] == '_')
1776 E : name = name.substr(1);
1777 :
1778 E : Offset offset = addr - block_addr;
1779 E : if (!AddLabelToBlock(offset, name, BlockGraph::PUBLIC_SYMBOL_LABEL, block))
1780 i : return DiaBrowser::kBrowserAbort;
1781 :
1782 E : return DiaBrowser::kBrowserContinue;
1783 E : }
1784 :
1785 : DiaBrowser::BrowserDirective NewDecomposer::OnLabelSymbol(
1786 : const DiaBrowser& dia_browser,
1787 : const DiaBrowser::SymTagVector& sym_tags,
1788 E : const DiaBrowser::SymbolPtrVector& symbols) {
1789 E : DCHECK(!symbols.empty());
1790 E : DCHECK_EQ(sym_tags.size(), symbols.size());
1791 E : DiaBrowser::SymbolPtr symbol = symbols.back();
1792 :
1793 E : HRESULT hr = E_FAIL;
1794 E : DWORD rva = 0;
1795 E : ScopedBstr name_bstr;
1796 : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva)) ||
1797 E : FAILED(hr = symbol->get_name(name_bstr.Receive()))) {
1798 i : LOG(ERROR) << "Failed to get label symbol properties: " << com::LogHr(hr)
1799 : << ".";
1800 i : return DiaBrowser::kBrowserAbort;
1801 : }
1802 :
1803 : // If we have a current_block_ the label should lie within its scope.
1804 E : RelativeAddress addr(rva);
1805 E : Block* block = current_block_;
1806 E : RelativeAddress block_addr(current_address_);
1807 E : if (block != NULL) {
1808 E : if (!InRangeIncl(addr, current_address_, current_block_->size())) {
1809 i : LOG(ERROR) << "Label falls outside of current block \""
1810 : << current_block_->name() << "\".";
1811 i : return DiaBrowser::kBrowserAbort;
1812 : }
1813 E : } else {
1814 : // If there is no current block this is a compiland scope label.
1815 E : block = image_->GetBlockByAddress(addr);
1816 E : CHECK(block != NULL);
1817 E : CHECK(image_->GetAddressOf(block, &block_addr));
1818 E : DCHECK(InRange(addr, block_addr, block->size()));
1819 :
1820 : // TODO(chrisha): This label is in compiland scope, so we should be
1821 : // finding the block whose section contribution shares the same
1822 : // compiland.
1823 : }
1824 :
1825 E : std::string name;
1826 E : WideToUTF8(name_bstr, name_bstr.Length(), &name);
1827 :
1828 E : Offset offset = addr - block_addr;
1829 E : if (!AddLabelToBlock(offset, name, BlockGraph::CODE_LABEL, block))
1830 i : return DiaBrowser::kBrowserAbort;
1831 :
1832 E : return DiaBrowser::kBrowserContinue;
1833 E : }
1834 :
1835 : DiaBrowser::BrowserDirective NewDecomposer::OnScopeSymbol(
1836 E : enum SymTagEnum type, DiaBrowser::SymbolPtr symbol) {
1837 : // We should only get here via the successful exploration of a SymTagFunction,
1838 : // so current_block_ should be set.
1839 E : DCHECK_NE(reinterpret_cast<Block*>(NULL), current_block_);
1840 :
1841 E : HRESULT hr = E_FAIL;
1842 E : DWORD rva = 0;
1843 E : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
1844 i : LOG(ERROR) << "Failed to get scope symbol properties: " << com::LogHr(hr)
1845 : << ".";
1846 i : return DiaBrowser::kBrowserAbort;
1847 : }
1848 :
1849 : // The label may potentially lay at the first byte past the function.
1850 E : RelativeAddress addr(rva);
1851 E : DCHECK_LE(current_address_, addr);
1852 E : DCHECK_LE(addr, current_address_ + current_block_->size());
1853 :
1854 : // Get the attributes for this label.
1855 E : BlockGraph::LabelAttributes attr = 0;
1856 E : std::string name;
1857 E : CHECK(ScopeSymTagToLabelProperties(type, current_scope_count_, &attr, &name));
1858 :
1859 : // Add the label.
1860 E : Offset offset = addr - current_address_;
1861 E : if (!AddLabelToBlock(offset, name, attr, current_block_))
1862 i : return DiaBrowser::kBrowserAbort;
1863 :
1864 : // If this is a scope we extract the length and explicitly add a corresponding
1865 : // end label.
1866 E : if (type == SymTagBlock) {
1867 E : ULONGLONG length = 0;
1868 E : if (symbol->get_length(&length) != S_OK) {
1869 i : LOG(ERROR) << "Failed to extract code scope length for block \""
1870 : << current_block_->name() << "\".";
1871 i : return DiaBrowser::kBrowserAbort;
1872 : }
1873 E : DCHECK_LE(static_cast<size_t>(offset + length), current_block_->size());
1874 E : name = base::StringPrintf("<scope-end-%d>", current_scope_count_);
1875 E : ++current_scope_count_;
1876 : if (!AddLabelToBlock(offset + length, name,
1877 E : BlockGraph::SCOPE_END_LABEL, current_block_)) {
1878 i : return DiaBrowser::kBrowserAbort;
1879 : }
1880 : }
1881 :
1882 E : return DiaBrowser::kBrowserContinue;
1883 E : }
1884 :
1885 : DiaBrowser::BrowserDirective NewDecomposer::OnCallSiteSymbol(
1886 E : DiaBrowser::SymbolPtr symbol) {
1887 : // We should only get here via the successful exploration of a SymTagFunction,
1888 : // so current_block_ should be set.
1889 E : DCHECK_NE(reinterpret_cast<Block*>(NULL), current_block_);
1890 :
1891 E : HRESULT hr = E_FAIL;
1892 E : DWORD rva = 0;
1893 E : if (FAILED(hr = symbol->get_relativeVirtualAddress(&rva))) {
1894 i : LOG(ERROR) << "Failed to get call site symbol properties: "
1895 : << com::LogHr(hr) << ".";
1896 i : return DiaBrowser::kBrowserAbort;
1897 : }
1898 :
1899 E : RelativeAddress addr(rva);
1900 E : if (!InRange(addr, current_address_, current_block_->size())) {
1901 i : LOG(ERROR) << "Call site falls outside of current block \""
1902 : << current_block_->name() << "\".";
1903 i : return DiaBrowser::kBrowserAbort;
1904 : }
1905 :
1906 E : Offset offset = addr - current_address_;
1907 : if (!AddLabelToBlock(offset, "<call-site>", BlockGraph::CALL_SITE_LABEL,
1908 E : current_block_)) {
1909 i : return DiaBrowser::kBrowserAbort;
1910 : }
1911 :
1912 E : return DiaBrowser::kBrowserContinue;
1913 E : }
1914 :
1915 : Block* NewDecomposer::CreateBlock(BlockType type,
1916 : RelativeAddress address,
1917 : BlockGraph::Size size,
1918 E : const base::StringPiece& name) {
1919 E : Block* block = image_->AddBlock(type, address, size, name);
1920 E : if (block == NULL) {
1921 i : LOG(ERROR) << "Unable to add block \"" << name.as_string() << "\" at "
1922 : << address << " with size " << size << ".";
1923 i : return NULL;
1924 : }
1925 :
1926 : // Mark the source range from whence this block originates. This is assuming
1927 : // an untransformed image. To handle transformed images we'd have to use the
1928 : // OMAP information to do this properly.
1929 : bool pushed = block->source_ranges().Push(
1930 : Block::DataRange(0, size),
1931 E : Block::SourceRange(address, size));
1932 E : DCHECK(pushed);
1933 :
1934 E : BlockGraph::SectionId section = image_file_.GetSectionIndex(address, size);
1935 E : if (section == BlockGraph::kInvalidSectionId) {
1936 i : LOG(ERROR) << "Block \"" << name.as_string() << "\" at " << address
1937 : << " with size " << size << " lies outside of all sections.";
1938 i : return NULL;
1939 : }
1940 E : block->set_section(section);
1941 :
1942 E : const uint8* data = image_file_.GetImageData(address, size);
1943 E : if (data != NULL)
1944 E : block->SetData(data, size);
1945 :
1946 E : return block;
1947 E : }
1948 :
1949 : Block* NewDecomposer::CreateBlockOrFindCoveringPeBlock(
1950 : BlockType type,
1951 : RelativeAddress addr,
1952 : BlockGraph::Size size,
1953 E : const base::StringPiece& name) {
1954 E : Block* block = image_->GetBlockByAddress(addr);
1955 E : if (block != NULL) {
1956 E : RelativeAddress block_addr;
1957 E : CHECK(image_->GetAddressOf(block, &block_addr));
1958 :
1959 : // Allow PE-parsed blocks to be grown to reflect reality. For example,
1960 : // in VS2013 the linker makes space for 2 debug directories rather than
1961 : // just one, and the symbols reflect this. We parse the debug directory
1962 : // with the size indicated in the PE header, which conflicts with that
1963 : // indicated by the section contributions.
1964 E : if (name == "* Linker *" && block_addr == addr && size > block->size()) {
1965 E : if (!image_->ResizeBlock(block, size)) {
1966 i : LOG(ERROR) << "Failed to extend PE-parsed "
1967 : << BlockInfo(block, block_addr) << " with linker "
1968 : << "section contribution of size " << size << ".";
1969 :
1970 : // Get the conflicting block and output additional information about
1971 : // it.
1972 : Block* conflict = image_->GetFirstIntersectingBlock(
1973 i : block_addr + block->size(), size - block->size());
1974 i : if (conflict) {
1975 i : RelativeAddress conflict_addr;
1976 i : CHECK(image_->GetAddressOf(conflict, &conflict_addr));
1977 i : LOG(ERROR) << "Conflicts with existing "
1978 : << BlockInfo(conflict, conflict_addr) << ".";
1979 : }
1980 :
1981 i : return NULL;
1982 : }
1983 :
1984 : // Update the data in the extended block.
1985 E : const uint8* data = image_file_.GetImageData(addr, size);
1986 E : block->SetData(data, size);
1987 E : return block;
1988 : }
1989 :
1990 : // If this is not a PE parsed or COFF group block that covers us entirely,
1991 : // then this is an error.
1992 : static const BlockGraph::BlockAttributes kCoveringAttributes =
1993 : BlockGraph::PE_PARSED | BlockGraph::COFF_GROUP;
1994 E : RelativeRange existing_block(block_addr, block->size());
1995 : if ((block->attributes() & kCoveringAttributes) == 0 ||
1996 E : !existing_block.Contains(addr, size)) {
1997 i : LOG(ERROR) << "Trying to create block \"" << name.as_string() << "\" at "
1998 : << addr.value() << " with size " << size << " that conflicts "
1999 : << "with existing " << BlockInfo(block, block_addr) << ".";
2000 i : return NULL;
2001 : }
2002 :
2003 E : return block;
2004 : }
2005 E : DCHECK_EQ(reinterpret_cast<Block*>(NULL), block);
2006 :
2007 E : return CreateBlock(type, addr, size, name);
2008 E : }
2009 :
2010 : bool NewDecomposer::CreateGapBlock(BlockType block_type,
2011 : RelativeAddress address,
2012 E : BlockGraph::Size size) {
2013 : Block* block = CreateBlock(block_type, address, size,
2014 E : base::StringPrintf("Gap Block 0x%08X", address.value()).c_str());
2015 E : if (block == NULL) {
2016 i : LOG(ERROR) << "Unable to create gap block.";
2017 i : return false;
2018 : }
2019 E : block->set_attribute(BlockGraph::GAP_BLOCK);
2020 :
2021 E : return true;
2022 E : }
2023 :
2024 : bool NewDecomposer::CreateSectionGapBlocks(const IMAGE_SECTION_HEADER* header,
2025 E : BlockType block_type) {
2026 E : RelativeAddress section_begin(header->VirtualAddress);
2027 E : RelativeAddress section_end(section_begin + header->Misc.VirtualSize);
2028 : RelativeAddress image_end(
2029 E : image_file_.nt_headers()->OptionalHeader.SizeOfImage);
2030 :
2031 : // Search for the first and last blocks interesting from the start and end
2032 : // of the section to the end of the image.
2033 : BlockGraph::AddressSpace::RangeMap::const_iterator it(
2034 : image_->address_space_impl().FindFirstIntersection(
2035 : BlockGraph::AddressSpace::Range(section_begin,
2036 E : image_end - section_begin)));
2037 :
2038 : BlockGraph::AddressSpace::RangeMap::const_iterator end =
2039 E : image_->address_space_impl().end();
2040 E : if (section_end < image_end) {
2041 : end = image_->address_space_impl().FindFirstIntersection(
2042 : BlockGraph::AddressSpace::Range(section_end,
2043 E : image_end - section_end));
2044 : }
2045 :
2046 : // The whole section is missing. Cover it with one gap block.
2047 E : if (it == end)
2048 : return CreateGapBlock(
2049 i : block_type, section_begin, section_end - section_begin);
2050 :
2051 : // Create the head gap block if need be.
2052 E : if (section_begin < it->first.start()) {
2053 : if (!CreateGapBlock(
2054 i : block_type, section_begin, it->first.start() - section_begin)) {
2055 i : return false;
2056 : }
2057 : }
2058 :
2059 : // Now iterate the blocks and fill in gaps.
2060 E : for (; it != end; ++it) {
2061 E : const Block* block = it->second;
2062 E : DCHECK_NE(reinterpret_cast<Block*>(NULL), block);
2063 E : RelativeAddress block_end = it->first.start() + block->size();
2064 E : if (block_end >= section_end)
2065 E : break;
2066 :
2067 : // Walk to the next address in turn.
2068 E : BlockGraph::AddressSpace::RangeMap::const_iterator next = it;
2069 E : ++next;
2070 E : if (next == end) {
2071 : // We're at the end of the list. Create the tail gap block.
2072 E : DCHECK_GT(section_end, block_end);
2073 E : if (!CreateGapBlock(block_type, block_end, section_end - block_end))
2074 i : return false;
2075 E : break;
2076 : }
2077 :
2078 : // Create the interstitial gap block.
2079 E : if (block_end < next->first.start())
2080 : if (!CreateGapBlock(
2081 E : block_type, block_end, next->first.start() - block_end)) {
2082 i : return false;
2083 : }
2084 E : }
2085 :
2086 E : return true;
2087 E : }
2088 :
2089 : } // namespace pe
|