1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of basic block decomposer.
16 :
17 : #include "syzygy/block_graph/basic_block_decomposer.h"
18 :
19 : #include <algorithm>
20 : #include <vector>
21 :
22 : #include "base/logging.h"
23 : #include "base/stringprintf.h"
24 : #include "syzygy/block_graph/basic_block.h"
25 : #include "syzygy/block_graph/basic_block_subgraph.h"
26 : #include "syzygy/block_graph/block_graph.h"
27 : #include "syzygy/block_graph/block_util.h"
28 :
29 : #include "mnemonics.h" // NOLINT
30 :
31 : namespace block_graph {
32 :
33 : namespace {
34 :
35 : using block_graph::BasicBlock;
36 : using block_graph::BasicBlockReference;
37 : using block_graph::BasicBlockReferrer;
38 : using block_graph::BasicBlockSubGraph;
39 : using block_graph::BlockGraph;
40 : using block_graph::Instruction;
41 : using block_graph::Successor;
42 : using core::Disassembler;
43 :
44 : typedef BlockGraph::Block Block;
45 : typedef BlockGraph::Offset Offset;
46 : typedef BlockGraph::Size Size;
47 : typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
48 : typedef BBAddressSpace::Range Range;
49 : typedef BBAddressSpace::RangeMap RangeMap;
50 : typedef BBAddressSpace::RangeMapConstIter RangeMapConstIter;
51 : typedef BBAddressSpace::RangeMapIter RangeMapIter;
52 :
53 : const size_t kPointerSize = BlockGraph::Reference::kMaximumSize;
54 :
55 : // We use a (somewhat) arbitrary value as the disassembly address for a block
56 : // so we can tell the difference between a reference to the beginning of the
57 : // block (offset=0) and a null address.
58 : const size_t kDisassemblyAddress = 65536;
59 :
60 : // Look up the reference made from an instruction's byte range within the
61 : // given block. The reference should start AFTER the instruction starts
62 : // and there should be exactly 1 reference in the byte range.
63 : // Returns true if the reference was found, false otherwise.
64 : bool GetReferenceOfInstructionAt(const Block* block,
65 : Offset instr_offset,
66 : Size instr_size,
67 E : BlockGraph::Reference* ref) {
68 E : DCHECK(block != NULL);
69 E : DCHECK_LE(0, instr_offset);
70 E : DCHECK_LT(0U, instr_size);
71 E : DCHECK(ref != NULL);
72 :
73 : // Find the first reference following the instruction offset.
74 : Block::ReferenceMap::const_iterator ref_iter =
75 E : block->references().upper_bound(instr_offset);
76 :
77 : // If no reference is found then we're done.
78 E : if (ref_iter == block->references().end())
79 E : return false;
80 :
81 : // If the reference occurs outside the instruction then we're done.
82 E : Offset next_instr_offset = instr_offset + instr_size;
83 E : if (ref_iter->first >= next_instr_offset)
84 E : return false;
85 :
86 : // Otherwise, the reference should fit into the instruction.
87 : CHECK_LE(static_cast<size_t>(next_instr_offset),
88 E : ref_iter->first + ref_iter->second.size());
89 :
90 : // And it should be the only reference in the instruction.
91 E : if (ref_iter != block->references().begin()) {
92 E : Block::ReferenceMap::const_iterator prev_iter = ref_iter;
93 E : --prev_iter;
94 : CHECK_GE(static_cast<size_t>(instr_offset),
95 E : prev_iter->first + prev_iter->second.size());
96 : }
97 E : Block::ReferenceMap::const_iterator next_iter = ref_iter;
98 E : ++next_iter;
99 : CHECK(next_iter == block->references().end() ||
100 E : next_iter->first >= next_instr_offset);
101 :
102 E : *ref = ref_iter->second;
103 E : return true;
104 E : }
105 :
106 : // Transfer instructions from original to tail, starting with the instruction
107 : // starting at offset.
108 : bool SplitInstructionListAt(Offset offset,
109 : BasicBlock::Instructions* original,
110 E : BasicBlock::Instructions* tail) {
111 E : DCHECK(original != NULL);
112 E : DCHECK(tail != NULL && tail->empty());
113 :
114 E : BasicBlock::Instructions::iterator it(original->begin());
115 E : while (offset > 0 && it != original->end()) {
116 E : offset -= it->size();
117 E : ++it;
118 E : }
119 :
120 : // Did we terminate at an instruction boundary?
121 E : if (offset != 0)
122 E : return false;
123 :
124 E : tail->splice(tail->end(), *original, it, original->end());
125 E : return true;
126 E : }
127 :
128 : } // namespace
129 :
130 : BasicBlockDecomposer::BasicBlockDecomposer(const BlockGraph::Block* block,
131 : BasicBlockSubGraph* subgraph)
132 : : block_(block),
133 : subgraph_(subgraph),
134 : current_block_start_(0),
135 E : check_decomposition_results_(true) {
136 : // TODO(rogerm): Once we're certain this is stable for all input binaries
137 : // turn on check_decomposition_results_ by default only ifndef NDEBUG.
138 E : DCHECK(block != NULL);
139 E : DCHECK(block->type() == BlockGraph::CODE_BLOCK);
140 :
141 : // If no subgraph was provided then use a scratch one.
142 E : if (subgraph == NULL) {
143 E : scratch_subgraph_.reset(new BasicBlockSubGraph());
144 E : subgraph_ = scratch_subgraph_.get();
145 : }
146 E : }
147 :
148 E : bool BasicBlockDecomposer::Decompose() {
149 E : DCHECK(subgraph_->basic_blocks().empty());
150 E : DCHECK(subgraph_->block_descriptions().empty());
151 E : DCHECK(original_address_space_.empty());
152 E : subgraph_->set_original_block(block_);
153 :
154 : // We cache the fact that disassembly failed, and don't do it again.
155 : // TODO(chrisha): Once policy is in place, cache policy results. Then make
156 : // this decomposer fail hard (CHECK) rather than returning false. Finally,
157 : // remove this caching.
158 E : if (block_->attributes() & BlockGraph::ERRORED_DISASSEMBLY)
159 i : return false;
160 :
161 E : if (!Disassemble()) {
162 : // We are knowingly casting away const status here. This uglyness shall go
163 : // away post policy-refactor, but I don't want to needlessly change the
164 : // BB decomposer API in the meantime.
165 : // TODO(chrisha): Get rid of this heinous breach of const correctness!
166 E : const_cast<Block*>(block_)->set_attribute(BlockGraph::ERRORED_DISASSEMBLY);
167 E : return false;
168 : }
169 :
170 : // Don't bother with the following bookkeeping work if the results aren't
171 : // being looked at.
172 E : if (scratch_subgraph_.get() != NULL)
173 E : return true;
174 :
175 : typedef BasicBlockSubGraph::BlockDescription BlockDescription;
176 E : subgraph_->block_descriptions().push_back(BlockDescription());
177 E : BlockDescription& desc = subgraph_->block_descriptions().back();
178 E : desc.name = block_->name();
179 E : desc.compiland_name = block_->compiland_name();
180 E : desc.type = block_->type();
181 E : desc.alignment = block_->alignment();
182 E : desc.attributes = block_->attributes();
183 E : desc.section = block_->section();
184 :
185 : // Add the basic blocks to the block descriptor.
186 E : Offset offset = 0;
187 E : RangeMapConstIter it = original_address_space_.begin();
188 E : for (; it != original_address_space_.end(); ++it) {
189 E : DCHECK_EQ(it->first.start(), offset);
190 E : desc.basic_block_order.push_back(it->second);
191 :
192 : // Any data basic blocks (jump and case tables) with 0 mod 4 alignment
193 : // are marked so that the alignment is preserved by the block builder.
194 : if (desc.alignment >= kPointerSize &&
195 : it->second->type() == BasicBlock::BASIC_DATA_BLOCK &&
196 E : (offset % kPointerSize) == 0) {
197 E : it->second->set_alignment(kPointerSize);
198 : }
199 :
200 E : offset += it->first.size();
201 E : }
202 :
203 E : return true;
204 E : }
205 :
206 : bool BasicBlockDecomposer::DecodeInstruction(Offset offset,
207 : Offset code_end_offset,
208 E : Instruction* instruction) const {
209 : // The entire offset range should fall within the extent of block_ and the
210 : // output instruction pointer must not be NULL.
211 E : DCHECK_LE(0, offset);
212 E : DCHECK_LT(offset, code_end_offset);
213 E : DCHECK_LE(static_cast<Size>(code_end_offset), block_->size());
214 E : DCHECK(instruction != NULL);
215 :
216 : // Decode the instruction.
217 E : const uint8* buffer = block_->data() + offset;
218 E : size_t max_length = code_end_offset - offset;
219 E : if (!Instruction::FromBuffer(buffer, max_length, instruction)) {
220 i : VLOG(1) << "Failed to decode instruction at offset " << offset
221 : << " of block '" << block_->name() << "'.";
222 :
223 : // Dump the bytes to aid in debugging.
224 i : std::string dump;
225 i : size_t dump_length = std::min(max_length, Instruction::kMaxSize);
226 i : for (size_t i = 0; i < dump_length; ++i)
227 i : base::StringAppendF(&dump, " %02X", buffer[i]);
228 i : VLOG(2) << ".text =" << dump << (dump_length < max_length ? "..." : ".");
229 :
230 : // Return false to indicate an error.
231 i : return false;
232 : }
233 :
234 E : VLOG(3) << "Disassembled " << instruction->GetName()
235 : << " instruction (" << instruction->size()
236 : << " bytes) at offset " << offset << ".";
237 :
238 : // Track the source range.
239 : instruction->set_source_range(
240 E : GetSourceRange(offset, instruction->size()));
241 :
242 : // If the block is labeled, preserve the label.
243 E : BlockGraph::Label label;
244 E : if (block_->GetLabel(offset, &label)) {
245 : // If this instruction has run into known data, then we have a problem!
246 E : CHECK(!label.has_attributes(BlockGraph::DATA_LABEL))
247 : << "Disassembling into data at offset " << offset << " of "
248 : << block_->name() << ".";
249 E : instruction->set_label(label);
250 : }
251 :
252 E : return true;
253 E : }
254 :
255 : BasicBlockDecomposer::SourceRange BasicBlockDecomposer::GetSourceRange(
256 E : Offset offset, Size size) const {
257 : // Find the source range for the original bytes. We may not have a data
258 : // range for bytes that were synthesized in other transformations. As a
259 : // rule, however, there should be a covered data range for each instruction,
260 : // successor, that relates back to the original image.
261 : const Block::SourceRanges::RangePair* range_pair =
262 E : block_->source_ranges().FindRangePair(offset, size);
263 : // Return an empty range if we found nothing.
264 E : if (range_pair == NULL)
265 E : return SourceRange();
266 :
267 E : const Block::DataRange& data_range = range_pair->first;
268 E : const Block::SourceRange& source_range = range_pair->second;
269 E : if (offset == data_range.start() && size == data_range.size()) {
270 : // We match a data range exactly, so let's use the entire
271 : // matching source range.
272 E : return source_range;
273 : }
274 :
275 : // The data range doesn't match exactly, so let's slice the corresponding
276 : // source range. The assumption here is that no transformation will ever
277 : // slice the data or source ranges for an instruction, so we should always
278 : // have a covering data and source ranges.
279 E : DCHECK_GE(offset, data_range.start());
280 E : DCHECK_LE(offset + size, data_range.start() + data_range.size());
281 :
282 E : Offset start_offs = offset - data_range.start();
283 E : return SourceRange(source_range.start() + start_offs, size);
284 E : }
285 :
286 : bool BasicBlockDecomposer::FindBasicBlock(Offset offset,
287 : BasicBlock** basic_block,
288 E : Range* range) const {
289 E : DCHECK_LE(0, offset);
290 E : DCHECK(basic_block != NULL);
291 E : DCHECK(range != NULL);
292 E : DCHECK(subgraph_->original_block() != NULL);
293 E : DCHECK_GT(subgraph_->original_block()->size(), static_cast<size_t>(offset));
294 :
295 : RangeMapConstIter bb_iter =
296 E : original_address_space_.FindFirstIntersection(Range(offset, 1));
297 :
298 E : if (bb_iter == original_address_space_.end())
299 i : return false;
300 :
301 E : *basic_block = bb_iter->second;
302 E : *range = bb_iter->first;
303 E : return true;
304 E : }
305 :
306 E : BasicBlock* BasicBlockDecomposer::GetBasicBlockAt(Offset offset) const {
307 E : DCHECK_LE(0, offset);
308 E : DCHECK(subgraph_->original_block() != NULL);
309 E : DCHECK_GT(subgraph_->original_block()->size(), static_cast<size_t>(offset));
310 :
311 E : BasicBlock* bb = NULL;
312 E : Range range;
313 E : CHECK(FindBasicBlock(offset, &bb, &range));
314 E : DCHECK(bb != NULL);
315 E : DCHECK_EQ(offset, range.start());
316 E : return bb;
317 E : }
318 :
319 E : void BasicBlockDecomposer::InitJumpTargets(Offset code_end_offset) {
320 E : DCHECK_LE(static_cast<Size>(code_end_offset), block_->size());
321 :
322 : // Make sure the jump target set is empty.
323 E : jump_targets_.clear();
324 :
325 : // For each referrer, check if it references code. If so, it's a jump target.
326 : BlockGraph::Block::ReferrerSet::const_iterator ref_iter =
327 E : block_->referrers().begin();
328 E : for (; ref_iter != block_->referrers().end(); ++ref_iter) {
329 E : BlockGraph::Reference ref;
330 E : bool found = ref_iter->first->GetReference(ref_iter->second, &ref);
331 E : DCHECK(found);
332 E : DCHECK_EQ(block_, ref.referenced());
333 E : DCHECK_LE(0, ref.base());
334 E : DCHECK_LT(static_cast<size_t>(ref.base()), block_->size());
335 :
336 : // Ignore references to the data portion of the block.
337 E : if (ref.base() >= code_end_offset)
338 E : continue;
339 :
340 E : jump_targets_.insert(ref.base());
341 E : }
342 E : }
343 :
344 : bool BasicBlockDecomposer::HandleInstruction(const Instruction& instruction,
345 E : Offset offset) {
346 : // We do not handle the SYS* instructions. These should ONLY occur inside
347 : // the OS system libraries, mediated by an OS system call. We expect that
348 : // they NEVER occur in application code.
349 E : if (instruction.IsSystemCall()) {
350 i : VLOG(1) << "Encountered an unexpected " << instruction.GetName()
351 : << " instruction at offset " << offset << " of block '"
352 : << block_->name() << "'.";
353 i : return false;
354 : }
355 :
356 : // Calculate the offset of the next instruction. We'll need this if this
357 : // instruction marks the end of a basic block.
358 E : Offset next_instruction_offset = offset + instruction.size();
359 :
360 : // If the instruction is not a branch then it needs to be appended to the
361 : // current basic block... which we close if the instruction is a return or
362 : // a call to a non-returning function.
363 E : if (!instruction.IsBranch()) {
364 E : current_instructions_.push_back(instruction);
365 E : if (instruction.IsReturn()) {
366 E : EndCurrentBasicBlock(next_instruction_offset);
367 E : } else if (instruction.IsCall()) {
368 E : BlockGraph::Reference ref;
369 : bool found = GetReferenceOfInstructionAt(
370 E : block_, offset, instruction.size(), &ref);
371 : if (found && Instruction::IsCallToNonReturningFunction(
372 E : instruction.representation(), ref.referenced(), ref.offset())) {
373 E : EndCurrentBasicBlock(next_instruction_offset);
374 : }
375 : }
376 E : return true;
377 : }
378 :
379 : // If the branch is not PC-Relative then it also needs to be appended to
380 : // the current basic block... which we then close.
381 E : if (!instruction.HasPcRelativeOperand(0)) {
382 E : current_instructions_.push_back(instruction);
383 E : EndCurrentBasicBlock(next_instruction_offset);
384 E : return true;
385 : }
386 :
387 : // Otherwise, we're dealing with a branch whose destination is explicit.
388 E : DCHECK(instruction.IsBranch());
389 E : DCHECK(instruction.HasPcRelativeOperand(0));
390 :
391 : // Make sure we understand the branching condition. If we don't, then
392 : // there's an instruction we have failed to consider.
393 : Successor::Condition condition = Successor::OpCodeToCondition(
394 E : instruction.opcode());
395 E : CHECK_NE(Successor::kInvalidCondition, condition)
396 : << "Received unknown condition for branch instruction: "
397 : << instruction.GetName() << ".";
398 :
399 : // If this is a conditional branch add the inverse conditional successor
400 : // to represent the fall-through. If we don't understand the inverse, then
401 : // there's an instruction we have failed to consider.
402 E : if (instruction.IsConditionalBranch()) {
403 : Successor::Condition inverse_condition =
404 E : Successor::InvertCondition(condition);
405 E : CHECK_NE(Successor::kInvalidCondition, inverse_condition)
406 : << "Non-invertible condition seen for branch instruction: "
407 : << instruction.GetName() << ".";
408 :
409 : // Create an (unresolved) successor pointing to the next instruction.
410 : BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
411 : 1, // The size is irrelevant in successors.
412 : const_cast<Block*>(block_),
413 : next_instruction_offset,
414 E : next_instruction_offset);
415 E : current_successors_.push_front(Successor(inverse_condition, ref, 0));
416 E : jump_targets_.insert(next_instruction_offset);
417 E : }
418 :
419 : // Attempt to figure out where the branch is going by finding a
420 : // reference inside the instruction's byte range.
421 E : BlockGraph::Reference ref;
422 : bool found = GetReferenceOfInstructionAt(
423 E : block_, offset, instruction.size(), &ref);
424 :
425 : // If a reference was found, prefer its destination information to the
426 : // information conveyed by the bytes in the instruction. This should
427 : // handle all inter-block jumps (thunks, tail-call elimination, etc).
428 : // Otherwise, create a reference into the current block.
429 E : if (found) {
430 : // This is an explicit branching instruction so we expect the reference to
431 : // be direct.
432 E : if (!ref.IsDirect()) {
433 i : VLOG(1) << "Encountered an explicit control flow instruction containing "
434 : << "an indirect reference.";
435 i : return false;
436 : }
437 E : } else {
438 : Offset target_offset =
439 E : next_instruction_offset + instruction.representation().imm.addr;
440 :
441 : // If we don't have a reference (coming from a fixup) for a PC-relative jump
442 : // then we expect its destination to be in the block. We only see otherwise
443 : // in assembly generated code where section contributions don't correspond
444 : // to entire function bodies.
445 : if (target_offset < 0 ||
446 E : static_cast<Size>(target_offset) >= block_->size()) {
447 i : VLOG(1) << "Unexpected PC-relative target offset is external to block.";
448 i : return false;
449 : }
450 :
451 : ref = BlockGraph::Reference(BlockGraph::PC_RELATIVE_REF,
452 : 1, // Size is irrelevant in successors.
453 : const_cast<Block*>(block_),
454 : target_offset,
455 E : target_offset);
456 : }
457 :
458 : // If the reference points to the current block, track the target offset.
459 E : if (ref.referenced() == block_)
460 E : jump_targets_.insert(ref.offset());
461 :
462 : // Create the successor, preserving the source range and label.
463 : BasicBlockReference bb_ref(
464 E : ref.type(), ref.size(), ref.referenced(), ref.offset(), ref.base());
465 E : Successor succ(condition, bb_ref, instruction.size());
466 E : succ.set_source_range(instruction.source_range());
467 E : succ.set_label(instruction.label());
468 E : current_successors_.push_front(succ);
469 :
470 : // Having just branched, we need to end the current basic block.
471 E : EndCurrentBasicBlock(next_instruction_offset);
472 E : return true;
473 E : }
474 :
475 E : bool BasicBlockDecomposer::EndCurrentBasicBlock(Offset end_offset) {
476 : // We have reached the end of the current walk or we handled a conditional
477 : // branch. Let's mark this as the end of a basic block.
478 E : int basic_block_size = end_offset - current_block_start_;
479 E : DCHECK_LT(0, basic_block_size);
480 : if (!InsertBasicBlockRange(current_block_start_,
481 : basic_block_size,
482 E : BasicBlock::BASIC_CODE_BLOCK)) {
483 i : return false;
484 : }
485 :
486 : // Remember the end offset as the start of the next basic block.
487 E : current_block_start_ = end_offset;
488 E : return true;
489 E : }
490 :
491 E : bool BasicBlockDecomposer::GetCodeRangeAndCreateDataBasicBlocks(Offset* end) {
492 E : DCHECK_NE(reinterpret_cast<Offset*>(NULL), end);
493 :
494 E : *end = 0;
495 :
496 : // By default, we assume the entire block is code.
497 E : Offset code_end = block_->size();
498 :
499 : // Iterate over all labels, looking for data labels.
500 : BlockGraph::Block::LabelMap::const_reverse_iterator it =
501 E : block_->labels().rbegin();
502 E : bool saw_non_data_label = false;
503 E : for (; it != block_->labels().rend(); ++it) {
504 E : const BlockGraph::Label& label = it->second;
505 E : if (label.has_attributes(BlockGraph::DATA_LABEL)) {
506 : // There should never be data labels beyond the end of the block.
507 E : if (it->first >= static_cast<Offset>(block_->size())) {
508 i : VLOG(1) << "Encountered a data label at offset " << it->first
509 : << "of block \"" << block_->name() << "\" of size "
510 : << block_->size() << ".";
511 i : return false;
512 : }
513 :
514 : // If a non-data label was already encountered, and now there's another
515 : // data label then bail: the block does not respect the 'code first,
516 : // data second' supported layout requirement.
517 E : if (saw_non_data_label) {
518 E : VLOG(1) << "Block \"" << block_->name() << "\" has an unsupported "
519 : << "code-data layout.";
520 E : VLOG(1) << "Unexpected data label at offset " << it->first << ".";
521 E : return false;
522 : }
523 :
524 : // Create a data block and update the end-of-code offset. This should
525 : // never fail because this is the first time blocks are being created and
526 : // they are strictly non-overlapping by the iteration logic of this
527 : // function.
528 E : size_t size = code_end - it->first;
529 : CHECK(InsertBasicBlockRange(it->first, size,
530 E : BasicBlock::BASIC_DATA_BLOCK));
531 E : code_end = it->first;
532 E : } else {
533 : // We ignore the debug-end label, as it can come after block data.
534 E : if (label.attributes() == BlockGraph::DEBUG_END_LABEL)
535 E : continue;
536 :
537 : // Remember that a non-data label was seen. No further data labels should
538 : // be encountered.
539 E : saw_non_data_label = true;
540 : }
541 E : }
542 :
543 E : *end = code_end;
544 :
545 E : return true;
546 E : }
547 :
548 E : bool BasicBlockDecomposer::ParseInstructions() {
549 : // Find the beginning and ending offsets of code bytes within the block.
550 E : Offset code_end_offset = 0;
551 E : if (!GetCodeRangeAndCreateDataBasicBlocks(&code_end_offset))
552 E : return false;
553 :
554 : // Initialize jump_targets_ to include un-discoverable targets.
555 E : InitJumpTargets(code_end_offset);
556 :
557 : // Disassemble the instruction stream into rudimentary basic blocks.
558 E : Offset offset = 0;
559 E : current_block_start_ = offset;
560 E : while (offset < code_end_offset) {
561 : // Decode the next instruction.
562 E : Instruction instruction;
563 E : if (!DecodeInstruction(offset, code_end_offset, &instruction))
564 i : return false;
565 :
566 : // Handle the decoded instruction.
567 E : if (!HandleInstruction(instruction, offset))
568 i : return false;
569 :
570 : // Advance the instruction offset.
571 E : offset += instruction.size();
572 E : }
573 :
574 : // If we get here then we must have successfully consumed the entire code
575 : // range; otherwise, we should have failed to decode a partial instruction.
576 E : CHECK_EQ(offset, code_end_offset);
577 :
578 : // If the last bb we were working on didn't end with a RET or branch then
579 : // we need to close it now. We can detect this if the current_block_start_
580 : // does not match the current (end) offset.
581 E : if (current_block_start_ != code_end_offset)
582 E : EndCurrentBasicBlock(code_end_offset);
583 :
584 E : return true;
585 E : }
586 :
587 E : bool BasicBlockDecomposer::Disassemble() {
588 : // Parse the code bytes into instructions and rudimentary basic blocks.
589 E : if (!ParseInstructions())
590 E : return false;
591 :
592 : // Everything below this point is simply book-keeping that can't fail. These
593 : // can safely be skipped in a dry-run.
594 E : if (scratch_subgraph_.get() != NULL)
595 E : return true;
596 :
597 : // Split the basic blocks at branch targets.
598 E : SplitCodeBlocksAtBranchTargets();
599 :
600 : // By this point, we should have basic blocks for all visited code.
601 E : CheckAllJumpTargetsStartABasicCodeBlock();
602 :
603 : // We should now have contiguous block ranges that cover every byte in the
604 : // macro block. Verify that this is so.
605 E : CheckHasCompleteBasicBlockCoverage();
606 :
607 : // We should have propagated all of the labels in the original block into
608 : // the basic-block subgraph.
609 E : CheckAllLabelsArePreserved();
610 :
611 : // Populate the referrers in the basic block data structures by copying
612 : // them from the original source block.
613 E : CopyExternalReferrers();
614 :
615 : // Populate the references in the basic block data structures by copying
616 : // them from the original source block. This does not handle the successor
617 : // references.
618 E : CopyReferences();
619 :
620 : // Wire up the basic-block successors. These are not handled by
621 : // CopyReferences(), above.
622 E : ResolveSuccessors();
623 :
624 : // All the control flow we have derived should be valid.
625 E : CheckAllControlFlowIsValid();
626 :
627 : // Mark all unreachable code blocks as padding.
628 E : MarkUnreachableCodeAsPadding();
629 :
630 : // ... and we're done.
631 E : return true;
632 E : }
633 :
634 E : void BasicBlockDecomposer::CheckAllJumpTargetsStartABasicCodeBlock() const {
635 E : if (!check_decomposition_results_)
636 i : return;
637 :
638 E : JumpTargets::const_iterator offset_iter(jump_targets_.begin());
639 E : for (; offset_iter != jump_targets_.end(); ++offset_iter) {
640 : // The target basic-block should be a code basic-block.
641 E : BasicBlock* target_bb = GetBasicBlockAt(*offset_iter);
642 E : CHECK(target_bb != NULL);
643 E : CHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, target_bb->type());
644 E : }
645 E : }
646 :
647 E : void BasicBlockDecomposer::CheckHasCompleteBasicBlockCoverage() const {
648 E : if (!check_decomposition_results_)
649 i : return;
650 :
651 : // Walk through the basic-block address space.
652 E : Offset next_start = 0;
653 E : RangeMapConstIter it(original_address_space_.begin());
654 E : for (; it != original_address_space_.end(); ++it) {
655 E : CHECK_EQ(it->first.start(), next_start);
656 E : CHECK_EQ(it->first.start(), it->second->offset());
657 :
658 E : BasicDataBlock* data_block = BasicDataBlock::Cast(it->second);
659 E : if (data_block != NULL) {
660 : // Data block's size should match the address segment exactly.
661 E : CHECK_EQ(it->first.size(), data_block->size());
662 : }
663 E : BasicCodeBlock* code_block = BasicCodeBlock::Cast(it->second);
664 E : if (code_block != NULL) {
665 : // Code blocks may be short the trailing successor instruction.
666 : BasicCodeBlock::Successors::const_iterator succ_it(
667 E : code_block->successors().begin());
668 E : Size block_size = code_block->GetInstructionSize();
669 E : for (; succ_it != code_block->successors().end(); ++succ_it)
670 E : block_size += succ_it->instruction_size();
671 :
672 E : CHECK_GE(it->first.size(), block_size);
673 : }
674 E : next_start += it->first.size();
675 E : }
676 :
677 : // At this point, if there were no gaps, next start will be the same as the
678 : // full size of the block we're decomposing.
679 E : CHECK_EQ(block_->size(), static_cast<size_t>(next_start));
680 E : }
681 :
682 E : void BasicBlockDecomposer::CheckAllControlFlowIsValid() const {
683 E : if (!check_decomposition_results_)
684 i : return;
685 :
686 : // Check that the subgraph is valid. This will make sure that the
687 : // instructions and successors generally make sense.
688 E : CHECK(subgraph_->IsValid());
689 :
690 : // The only thing left to check is that synthesized flow-through
691 : // successors refer to the adjacent basic-blocks.
692 E : RangeMapConstIter it(original_address_space_.begin());
693 E : for (; it != original_address_space_.end(); ++it) {
694 E : const BasicCodeBlock* bb = BasicCodeBlock::Cast(it->second);
695 E : if (bb == NULL)
696 E : continue;
697 :
698 E : const BasicBlock::Successors& successors = bb->successors();
699 :
700 : // There may be at most 2 successors.
701 E : switch (successors.size()) {
702 : case 0:
703 E : break;
704 :
705 : case 1:
706 : // If the successor is synthesized, then flow is from this basic-block
707 : // to the next adjacent one.
708 E : if (successors.back().instruction_size() == 0) {
709 E : RangeMapConstIter next(it);
710 E : ++next;
711 E : CHECK(next != original_address_space_.end());
712 E : CHECK_EQ(successors.back().reference().basic_block(), next->second);
713 : }
714 E : break;
715 :
716 : case 2: {
717 : // Exactly one of the successors should have been synthesized.
718 E : bool front_synthesized = successors.front().instruction_size() == 0;
719 E : bool back_synthesized = successors.back().instruction_size() == 0;
720 E : CHECK_NE(front_synthesized, back_synthesized);
721 :
722 : // The synthesized successor flows from this basic-block to the next
723 : // adjacent one.
724 : const Successor& synthesized =
725 E : front_synthesized ? successors.front() : successors.back();
726 E : RangeMapConstIter next(it);
727 E : ++next;
728 E : CHECK(next != original_address_space_.end());
729 E : CHECK_EQ(synthesized.reference().basic_block(), next->second);
730 E : break;
731 : }
732 :
733 : default:
734 i : NOTREACHED();
735 : }
736 E : }
737 E : }
738 :
739 E : void BasicBlockDecomposer::CheckAllLabelsArePreserved() const {
740 E : if (!check_decomposition_results_)
741 i : return;
742 :
743 E : const Block* original_block = subgraph_->original_block();
744 E : if (original_block == NULL)
745 i : return;
746 :
747 : // Remove any labels that fall *after* the given block. This can happen for
748 : // scope and debug-end labels when the function has no epilog. It is rare, but
749 : // has been observed in the wild.
750 : // TODO(chrisha): Find a way to preserve these. We may need the notion of an
751 : // empty basic-block which gets assigned the label, or we may need to
752 : // augment BBs/instructions with the ability to have two labels: one tied
753 : // to the beginning of the object, and one to the end.
754 : Block::LabelMap::const_iterator it_past_block_end =
755 E : original_block->labels().lower_bound(original_block->size());
756 :
757 : // Grab a copy of the original labels (except any that are beyond the end of
758 : // the block data). We will be matching against these to ensure that they are
759 : // preserved in the BB decomposition.
760 : const Block::LabelMap original_labels(original_block->labels().begin(),
761 E : it_past_block_end);
762 E : if (original_labels.empty())
763 E : return;
764 :
765 : // A map to track which labels (by offset) have been found in the subgraph.
766 E : std::map<Offset, bool> labels_found;
767 :
768 : // Initialize the map of labels found in the subgraph.
769 E : Block::LabelMap::const_iterator label_iter = original_labels.begin();
770 E : for (; label_iter != original_labels.end(); ++label_iter)
771 E : labels_found.insert(std::make_pair(label_iter->first, false));
772 :
773 : // Walk through the subgraph and mark all of the labels found.
774 : BasicBlockSubGraph::BBCollection::const_iterator bb_iter =
775 E : subgraph_->basic_blocks().begin();
776 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
777 E : const BasicDataBlock* data_block = BasicDataBlock::Cast(*bb_iter);
778 E : if (data_block != NULL) {
779 : // Account for labels attached to basic-blocks.
780 E : if (data_block->has_label()) {
781 E : BlockGraph::Label label;
782 E : CHECK(original_block->GetLabel(data_block->offset(), &label));
783 E : CHECK(data_block->label() == label);
784 E : labels_found[data_block->offset()] = true;
785 E : }
786 : }
787 :
788 E : const BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
789 E : if (code_block != NULL) {
790 : // Account for labels attached to instructions.
791 : BasicBlock::Instructions::const_iterator inst_iter =
792 E : code_block->instructions().begin();
793 E : Offset inst_offset = code_block->offset();
794 E : for (; inst_iter != code_block->instructions().end(); ++inst_iter) {
795 E : const Instruction& inst = *inst_iter;
796 E : if (inst.has_label()) {
797 E : BlockGraph::Label label;
798 E : CHECK(original_block->GetLabel(inst_offset, &label));
799 E : CHECK(inst.label() == label);
800 E : labels_found[inst_offset] = true;
801 E : }
802 E : inst_offset += inst.size();
803 E : }
804 :
805 : // Account for labels attached to successors.
806 : BasicBlock::Successors::const_iterator succ_iter =
807 E : code_block->successors().begin();
808 E : for (; succ_iter != code_block->successors().end(); ++succ_iter) {
809 E : const Successor& succ = *succ_iter;
810 E : if (succ.has_label()) {
811 E : BlockGraph::Label label;
812 E : CHECK_NE(0U, succ.instruction_size());
813 E : CHECK(original_block->GetLabel(inst_offset, &label));
814 E : CHECK(succ.label() == label);
815 E : labels_found[inst_offset] = true;
816 E : }
817 E : inst_offset += succ.instruction_size();
818 E : }
819 : }
820 E : }
821 :
822 : // We should have the right number of labels_found (check if we added
823 : // something to the wrong place).
824 E : CHECK_EQ(original_labels.size(), labels_found.size());
825 :
826 : // Make sure all of the items in labels_found have been set to true.
827 E : std::map<Offset, bool>::const_iterator found_iter = labels_found.begin();
828 E : for (; found_iter != labels_found.end(); ++found_iter) {
829 E : CHECK(found_iter->second);
830 E : }
831 E : }
832 :
833 : bool BasicBlockDecomposer::InsertBasicBlockRange(Offset offset,
834 : size_t size,
835 E : BasicBlockType type) {
836 E : DCHECK_LE(0, offset);
837 E : DCHECK_LT(0U, size);
838 E : DCHECK_LE(offset + size, block_->size());
839 E : DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_instructions_.empty());
840 E : DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_successors_.empty());
841 :
842 : // Find or create a name for this basic block. Reserve the label, if any,
843 : // to propagate to the basic block if there are no instructions in the
844 : // block to carry the label(s).
845 E : BlockGraph::Label label;
846 E : std::string basic_block_name;
847 E : if (block_->GetLabel(offset, &label)) {
848 E : basic_block_name = label.ToString();
849 E : } else {
850 : basic_block_name =
851 : base::StringPrintf("<%s+%04X-%s>",
852 : block_->name().c_str(),
853 : offset,
854 E : BasicBlock::BasicBlockTypeToString(type));
855 : }
856 :
857 : // Pre-flight address space insertion to make sure there's no
858 : // pre-existing conflicting range.
859 E : Range byte_range(offset, size);
860 : if (original_address_space_.FindFirstIntersection(byte_range) !=
861 E : original_address_space_.end()) {
862 i : LOG(ERROR) << "Attempted to insert overlapping basic block.";
863 i : return false;
864 : }
865 :
866 E : if (type == BasicBlock::BASIC_CODE_BLOCK) {
867 : // Create the code block.
868 E : BasicCodeBlock* code_block = subgraph_->AddBasicCodeBlock(basic_block_name);
869 E : if (code_block == NULL)
870 i : return false;
871 E : CHECK(original_address_space_.Insert(byte_range, code_block));
872 :
873 : // Populate code basic-block with instructions and successors.
874 E : code_block->set_offset(offset);
875 E : code_block->instructions().swap(current_instructions_);
876 E : code_block->successors().swap(current_successors_);
877 E : } else {
878 E : DCHECK(type == BasicBlock::BASIC_DATA_BLOCK);
879 :
880 : // Create the data block.
881 : BasicDataBlock* data_block = subgraph_->AddBasicDataBlock(
882 E : basic_block_name, size, block_->data() + offset);
883 E : if (data_block == NULL)
884 i : return false;
885 E : CHECK(original_address_space_.Insert(byte_range, data_block));
886 :
887 : // Capture the source range (if any) for the data block.
888 E : data_block->set_source_range(GetSourceRange(offset, size));
889 :
890 : // Data basic-blocks carry their labels at the head of the basic blocks.
891 : // A padding basic-block might also be labeled if the block contains
892 : // unreachable code (for example, INT3 or NOP instructions following a call
893 : // to a non-returning function).
894 E : data_block->set_offset(offset);
895 E : data_block->set_label(label);
896 : }
897 :
898 E : return true;
899 E : }
900 :
901 E : void BasicBlockDecomposer::SplitCodeBlocksAtBranchTargets() {
902 E : JumpTargets::const_iterator jump_target_iter(jump_targets_.begin());
903 E : for (; jump_target_iter != jump_targets_.end(); ++jump_target_iter) {
904 : // Resolve the target basic-block.
905 E : Offset target_offset = *jump_target_iter;
906 E : BasicBlock* target_bb = NULL;
907 E : Range target_bb_range;
908 E : CHECK(FindBasicBlock(target_offset, &target_bb, &target_bb_range));
909 :
910 : // If we're jumping to the start of a basic block, there isn't any work
911 : // to do.
912 E : if (target_offset == target_bb_range.start())
913 E : continue;
914 :
915 : // The target must be a code block.
916 E : BasicCodeBlock* target_code_block = BasicCodeBlock::Cast(target_bb);
917 E : CHECK(target_code_block != NULL);
918 :
919 : // Otherwise, we have found a basic-block that we need to split.
920 : // Let's contract the range the original occupies in the basic-block
921 : // address space, then add a second block at the target offset.
922 E : size_t left_split_size = target_offset - target_bb_range.start();
923 E : bool removed = original_address_space_.Remove(target_bb_range);
924 E : DCHECK(removed);
925 :
926 E : Range left_split_range(target_bb_range.start(), left_split_size);
927 : bool inserted =
928 E : original_address_space_.Insert(left_split_range, target_code_block);
929 E : DCHECK(inserted);
930 :
931 : // Now we split up containing_range into two new ranges and replace
932 : // containing_range with the two new entries.
933 :
934 : // Slice the trailing half of the instructions and the successors
935 : // off the block.
936 E : DCHECK(current_instructions_.empty());
937 E : DCHECK(current_successors_.empty());
938 : bool split = SplitInstructionListAt(left_split_size,
939 : &target_code_block->instructions(),
940 E : ¤t_instructions_);
941 E : DCHECK(split);
942 E : target_code_block->successors().swap(current_successors_);
943 :
944 : // Set-up the flow-through successor for the first "half".
945 : BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
946 : 1, // Size is immaterial in successors.
947 : const_cast<Block*>(block_),
948 : target_offset,
949 E : target_offset);
950 : target_code_block->successors().push_back(
951 E : Successor(Successor::kConditionTrue, ref, 0));
952 :
953 : // This shouldn't fail because the range used to exist, and we just resized
954 : // it.
955 : CHECK(InsertBasicBlockRange(target_offset,
956 : target_bb_range.size() - left_split_size,
957 E : target_code_block->type()));
958 E : }
959 E : }
960 :
961 E : void BasicBlockDecomposer::CopyExternalReferrers() {
962 E : const BlockGraph::Block::ReferrerSet& referrers = block_->referrers();
963 E : BlockGraph::Block::ReferrerSet::const_iterator iter = referrers.begin();
964 E : for (; iter != referrers.end(); ++iter) {
965 : // Find the reference this referrer record describes.
966 E : const BlockGraph::Block* referrer = iter->first;
967 E : DCHECK(referrer != NULL);
968 :
969 : // We only care about external referrers.
970 E : if (referrer == block_)
971 E : continue;
972 :
973 : // This is an external referrer. Find the reference in the referring block.
974 E : Offset source_offset = iter->second;
975 E : BlockGraph::Reference reference;
976 E : bool found = referrer->GetReference(source_offset, &reference);
977 E : DCHECK(found);
978 :
979 : // Find the basic block the reference refers to.
980 E : BasicBlock* target_bb = GetBasicBlockAt(reference.base());
981 E : DCHECK(target_bb != NULL);
982 :
983 : // Insert the referrer into the target bb's referrer set. Note that there
984 : // is no corresponding reference update to the referring block. The
985 : // target bb will track these so a BlockBuilder can properly update
986 : // the referrers when merging a subgraph back into the block-graph.
987 : bool inserted = target_bb->referrers().insert(
988 E : BasicBlockReferrer(referrer, source_offset)).second;
989 E : DCHECK(inserted);
990 E : }
991 E : }
992 :
993 : void BasicBlockDecomposer::CopyReferences(
994 E : Offset item_offset, Size item_size, BasicBlockReferenceMap* refs) {
995 E : DCHECK_LE(0, item_offset);
996 E : DCHECK_LT(0U, item_size);
997 E : DCHECK(refs != NULL);
998 :
999 : // Figure out the bounds of item.
1000 E : BlockGraph::Offset end_offset = item_offset + item_size;
1001 :
1002 : // Get iterators encompassing all references within the bounds of item.
1003 : BlockGraph::Block::ReferenceMap::const_iterator ref_iter =
1004 E : block_->references().lower_bound(item_offset);
1005 : BlockGraph::Block::ReferenceMap::const_iterator end_iter =
1006 E : block_->references().lower_bound(end_offset);
1007 :
1008 E : for (; ref_iter != end_iter; ++ref_iter) {
1009 : // Calculate the local offset of this reference within item.
1010 E : BlockGraph::Offset local_offset = ref_iter->first - item_offset;
1011 E : const BlockGraph::Reference& reference = ref_iter->second;
1012 :
1013 : // We expect long references for everything except flow control.
1014 E : CHECK_EQ(4U, reference.size());
1015 E : DCHECK_LE(local_offset + reference.size(), static_cast<Size>(end_offset));
1016 :
1017 E : if (reference.referenced() != block_) {
1018 : // For external references, we can directly reference the other block.
1019 : bool inserted = refs->insert(std::make_pair(
1020 : local_offset,
1021 : BasicBlockReference(reference.type(), reference.size(),
1022 : reference.referenced(), reference.offset(),
1023 E : reference.base()))).second;
1024 E : DCHECK(inserted);
1025 E : } else {
1026 : // For intra block_ references, find the corresponding basic block in
1027 : // the basic block address space.
1028 E : BasicBlock* target_bb = GetBasicBlockAt(reference.base());
1029 E : DCHECK(target_bb != NULL);
1030 :
1031 : // Create target basic-block relative values for the base and offset.
1032 : // TODO(chrisha): Make BasicBlockReferences handle indirect references.
1033 E : CHECK_EQ(reference.offset(), reference.base());
1034 :
1035 : // Insert a reference to the target basic block.
1036 : bool inserted = refs->insert(std::make_pair(
1037 : local_offset,
1038 : BasicBlockReference(reference.type(),
1039 : reference.size(),
1040 E : target_bb))).second;
1041 E : DCHECK(inserted);
1042 : }
1043 E : }
1044 E : }
1045 :
1046 E : void BasicBlockDecomposer::CopyReferences() {
1047 : // Copy the references for the source range of each basic-block (by
1048 : // instruction for code basic-blocks). External referrers and successors are
1049 : // handled in separate passes.
1050 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
1051 E : subgraph_->basic_blocks().begin();
1052 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
1053 E : BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
1054 E : if (code_block != NULL) {
1055 E : DCHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, code_block->type());
1056 :
1057 E : Offset inst_offset = code_block->offset();
1058 : BasicBlock::Instructions::iterator inst_iter =
1059 E : code_block->instructions().begin();
1060 E : for (; inst_iter != code_block->instructions().end(); ++inst_iter) {
1061 : CopyReferences(inst_offset,
1062 : inst_iter->size(),
1063 E : &inst_iter->references());
1064 E : inst_offset += inst_iter->size();
1065 E : }
1066 : }
1067 :
1068 E : BasicDataBlock* data_block = BasicDataBlock::Cast(*bb_iter);
1069 E : if (data_block != NULL) {
1070 E : DCHECK_NE(BasicBlock::BASIC_CODE_BLOCK, data_block->type());
1071 : CopyReferences(data_block->offset(),
1072 : data_block->size(),
1073 E : &data_block->references());
1074 : }
1075 E : }
1076 E : }
1077 :
1078 E : void BasicBlockDecomposer::ResolveSuccessors() {
1079 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
1080 E : subgraph_->basic_blocks().begin();
1081 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
1082 : // Only code basic-blocks have successors and instructions.
1083 E : BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
1084 E : if (code_block == NULL)
1085 E : continue;
1086 :
1087 : BasicBlock::Successors::iterator succ_iter =
1088 E : code_block->successors().begin();
1089 : BasicBlock::Successors::iterator succ_iter_end =
1090 E : code_block->successors().end();
1091 E : for (; succ_iter != succ_iter_end; ++succ_iter) {
1092 E : if (succ_iter->reference().block() != block_)
1093 E : continue;
1094 :
1095 : // Find the basic block the successor references.
1096 : BasicBlock* target_code_block =
1097 E : GetBasicBlockAt(succ_iter->reference().offset());
1098 E : DCHECK(target_code_block != NULL);
1099 :
1100 : // We transform all successor branches into 4-byte pc-relative targets.
1101 : succ_iter->set_reference(
1102 : BasicBlockReference(
1103 E : BlockGraph::PC_RELATIVE_REF, 4, target_code_block));
1104 E : DCHECK(succ_iter->reference().IsValid());
1105 E : }
1106 E : }
1107 E : }
1108 :
1109 E : void BasicBlockDecomposer::MarkUnreachableCodeAsPadding() {
1110 E : BasicBlockSubGraph::ReachabilityMap rm;
1111 E : subgraph_->GetReachabilityMap(&rm);
1112 E : DCHECK_EQ(rm.size(), subgraph_->basic_blocks().size());
1113 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
1114 E : subgraph_->basic_blocks().begin();
1115 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
1116 E : BasicCodeBlock* code_bb = BasicCodeBlock::Cast(*bb_iter);
1117 E : if (code_bb != NULL) {
1118 E : if (!subgraph_->IsReachable(rm, code_bb))
1119 E : code_bb->MarkAsPadding();
1120 : }
1121 E : }
1122 E : }
1123 :
1124 : } // namespace block_graph
|