1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of basic block decomposer.
16 :
17 : #include "syzygy/block_graph/basic_block_decomposer.h"
18 :
19 : #include <algorithm>
20 : #include <vector>
21 :
22 : #include "base/logging.h"
23 : #include "base/stringprintf.h"
24 : #include "syzygy/block_graph/basic_block.h"
25 : #include "syzygy/block_graph/basic_block_subgraph.h"
26 : #include "syzygy/block_graph/block_graph.h"
27 : #include "syzygy/block_graph/block_util.h"
28 :
29 : #include "mnemonics.h" // NOLINT
30 :
31 : namespace block_graph {
32 :
33 : namespace {
34 :
35 : using block_graph::BasicBlock;
36 : using block_graph::BasicBlockReference;
37 : using block_graph::BasicBlockReferrer;
38 : using block_graph::BasicBlockSubGraph;
39 : using block_graph::BlockGraph;
40 : using block_graph::Instruction;
41 : using block_graph::Successor;
42 : using core::Disassembler;
43 :
44 : typedef BlockGraph::Block Block;
45 : typedef BlockGraph::Offset Offset;
46 : typedef BlockGraph::Size Size;
47 : typedef core::AddressSpace<Offset, size_t, BasicBlock*> BBAddressSpace;
48 : typedef BBAddressSpace::Range Range;
49 : typedef BBAddressSpace::RangeMap RangeMap;
50 : typedef BBAddressSpace::RangeMapConstIter RangeMapConstIter;
51 : typedef BBAddressSpace::RangeMapIter RangeMapIter;
52 :
53 : const size_t kPointerSize = BlockGraph::Reference::kMaximumSize;
54 :
55 : // We use a (somewhat) arbitrary value as the disassembly address for a block
56 : // so we can tell the difference between a reference to the beginning of the
57 : // block (offset=0) and a null address.
58 : const size_t kDisassemblyAddress = 65536;
59 :
60 : // Look up the reference made from an instruction's byte range within the
61 : // given block. The reference should start AFTER the instruction starts
62 : // and there should be exactly 1 reference in the byte range.
63 : // Returns true if the reference was found, false otherwise.
64 : bool GetReferenceOfInstructionAt(const Block* block,
65 : Offset instr_offset,
66 : Size instr_size,
67 E : BlockGraph::Reference* ref) {
68 E : DCHECK(block != NULL);
69 E : DCHECK_LE(0, instr_offset);
70 E : DCHECK_LT(0U, instr_size);
71 E : DCHECK(ref != NULL);
72 :
73 : // Find the first reference following the instruction offset.
74 : Block::ReferenceMap::const_iterator ref_iter =
75 E : block->references().upper_bound(instr_offset);
76 :
77 : // If no reference is found then we're done.
78 E : if (ref_iter == block->references().end())
79 E : return false;
80 :
81 : // If the reference occurs outside the instruction then we're done.
82 E : Offset next_instr_offset = instr_offset + instr_size;
83 E : if (ref_iter->first >= next_instr_offset)
84 E : return false;
85 :
86 : // Otherwise, the reference should fit into the instruction.
87 : CHECK_LE(static_cast<size_t>(next_instr_offset),
88 E : ref_iter->first + ref_iter->second.size());
89 :
90 : // And it should be the only reference in the instruction.
91 E : if (ref_iter != block->references().begin()) {
92 E : Block::ReferenceMap::const_iterator prev_iter = ref_iter;
93 E : --prev_iter;
94 : CHECK_GE(static_cast<size_t>(instr_offset),
95 E : prev_iter->first + prev_iter->second.size());
96 : }
97 E : Block::ReferenceMap::const_iterator next_iter = ref_iter;
98 E : ++next_iter;
99 : CHECK(next_iter == block->references().end() ||
100 E : next_iter->first >= next_instr_offset);
101 :
102 E : *ref = ref_iter->second;
103 E : return true;
104 E : }
105 :
106 : // Transfer instructions from original to tail, starting with the instruction
107 : // starting at offset.
108 : bool SplitInstructionListAt(Offset offset,
109 : BasicBlock::Instructions* original,
110 E : BasicBlock::Instructions* tail) {
111 E : DCHECK(original != NULL);
112 E : DCHECK(tail != NULL && tail->empty());
113 :
114 E : BasicBlock::Instructions::iterator it(original->begin());
115 E : while (offset > 0 && it != original->end()) {
116 E : offset -= it->size();
117 E : ++it;
118 E : }
119 :
120 : // Did we terminate at an instruction boundary?
121 E : if (offset != 0)
122 i : return false;
123 :
124 E : tail->splice(tail->end(), *original, it, original->end());
125 E : return true;
126 E : }
127 :
128 E : bool GetCodeRange(const BlockGraph::Block* block, Offset* begin, Offset* end) {
129 E : DCHECK(block != NULL);
130 E : DCHECK(begin != NULL);
131 E : DCHECK(end != NULL);
132 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
133 :
134 : // By default, we assume the entire block is code.
135 E : *begin = 0;
136 E : *end = block->size();
137 :
138 : // We then move up the end offset if the block ends in data.
139 : BlockGraph::Block::LabelMap::const_reverse_iterator it =
140 E : block->labels().rbegin();
141 E : for (; it != block->labels().rend(); ++it) {
142 E : const BlockGraph::Label& label = it->second;
143 E : if (!label.has_attributes(BlockGraph::DATA_LABEL))
144 E : break;
145 E : *end = it->first;
146 E : }
147 :
148 E : return TRUE;
149 E : }
150 :
151 : } // namespace
152 :
153 : BasicBlockDecomposer::BasicBlockDecomposer(const BlockGraph::Block* block,
154 : BasicBlockSubGraph* subgraph)
155 : : block_(block),
156 : subgraph_(subgraph),
157 : current_block_start_(0),
158 E : check_decomposition_results_(true) {
159 : // TODO(rogerm): Once we're certain this is stable for all input binaries
160 : // turn on check_decomposition_results_ by default only ifndef NDEBUG.
161 E : DCHECK(block != NULL);
162 E : DCHECK(block->type() == BlockGraph::CODE_BLOCK);
163 E : DCHECK(CodeBlockAttributesAreBasicBlockSafe(block));
164 E : DCHECK(subgraph != NULL);
165 E : }
166 :
167 E : bool BasicBlockDecomposer::Decompose() {
168 E : DCHECK(subgraph_->basic_blocks().empty());
169 E : DCHECK(subgraph_->block_descriptions().empty());
170 E : DCHECK(original_address_space_.empty());
171 E : subgraph_->set_original_block(block_);
172 :
173 E : if (!Disassemble())
174 i : return false;
175 :
176 : typedef BasicBlockSubGraph::BlockDescription BlockDescription;
177 E : subgraph_->block_descriptions().push_back(BlockDescription());
178 E : BlockDescription& desc = subgraph_->block_descriptions().back();
179 E : desc.name = block_->name();
180 E : desc.type = block_->type();
181 E : desc.alignment = block_->alignment();
182 E : desc.attributes = block_->attributes();
183 E : desc.section = block_->section();
184 :
185 : // Add the basic blocks to the block descriptor.
186 E : Offset offset = 0;
187 E : RangeMapConstIter it = original_address_space_.begin();
188 E : for (; it != original_address_space_.end(); ++it) {
189 E : DCHECK_EQ(it->first.start(), offset);
190 E : desc.basic_block_order.push_back(it->second);
191 :
192 : // Any data basic blocks (jump and case tables) with 0 mod 4 alignment
193 : // are marked so that the alignment is preserved by the block builder.
194 : if (desc.alignment >= kPointerSize &&
195 : it->second->type() == BasicBlock::BASIC_DATA_BLOCK &&
196 E : (offset % kPointerSize) == 0) {
197 E : it->second->set_alignment(kPointerSize);
198 : }
199 :
200 E : offset += it->first.size();
201 E : }
202 :
203 E : return true;
204 E : }
205 :
206 : bool BasicBlockDecomposer::DecodeInstruction(Offset offset,
207 : Offset code_end_offset,
208 E : Instruction* instruction) const {
209 : // The entire offset range should fall within the extent of block_ and the
210 : // output instruction pointer must not be NULL.
211 E : DCHECK_LE(0, offset);
212 E : DCHECK_LT(offset, code_end_offset);
213 E : DCHECK_LE(static_cast<Size>(code_end_offset), block_->size());
214 E : DCHECK(instruction != NULL);
215 :
216 : // Decode the instruction.
217 E : const uint8* buffer = block_->data() + offset;
218 E : size_t max_length = code_end_offset - offset;
219 E : if (!Instruction::FromBuffer(buffer, max_length, instruction)) {
220 i : LOG(ERROR) << "Failed to decode instruction at offset " << offset
221 : << " of block '" << block_->name() << "'.";
222 :
223 : // Dump the bytes to aid in debugging.
224 i : std::string dump;
225 i : size_t dump_length = std::min(max_length, Instruction::kMaxSize);
226 i : for (size_t i = 0; i < dump_length; ++i)
227 i : base::StringAppendF(&dump, " %02X", buffer[i]);
228 i : LOG(ERROR) << ".text =" << dump << (dump_length < max_length ? "..." : ".");
229 :
230 : // Return false to indicate an error.
231 i : return false;
232 : }
233 :
234 E : VLOG(3) << "Disassembled " << instruction->GetName()
235 : << " instruction (" << instruction->size()
236 : << " bytes) at offset " << offset << ".";
237 :
238 : // Track the source range.
239 : instruction->set_source_range(
240 E : GetSourceRange(offset, instruction->size()));
241 :
242 : // If the block is labeled, preserve the label.
243 E : BlockGraph::Label label;
244 E : if (block_->GetLabel(offset, &label)) {
245 : // If this instruction has run into known data, then we have a problem!
246 E : CHECK(!label.has_attributes(BlockGraph::DATA_LABEL))
247 : << "Disassembling into data at offset " << offset << " of "
248 : << block_->name() << ".";
249 E : instruction->set_label(label);
250 : }
251 :
252 E : return true;
253 E : }
254 :
255 : BasicBlockDecomposer::SourceRange BasicBlockDecomposer::GetSourceRange(
256 E : Offset offset, Size size) const {
257 : // Find the source range for the original bytes. We may not have a data
258 : // range for bytes that were synthesized in other transformations. As a
259 : // rule, however, there should be a covered data range for each instruction,
260 : // successor, that relates back to the original image.
261 : const Block::SourceRanges::RangePair* range_pair =
262 E : block_->source_ranges().FindRangePair(offset, size);
263 : // Return an empty range if we found nothing.
264 E : if (range_pair == NULL)
265 E : return SourceRange();
266 :
267 E : const Block::DataRange& data_range = range_pair->first;
268 E : const Block::SourceRange& source_range = range_pair->second;
269 E : if (offset == data_range.start() && size == data_range.size()) {
270 : // We match a data range exactly, so let's use the entire
271 : // matching source range.
272 E : return source_range;
273 : }
274 :
275 : // The data range doesn't match exactly, so let's slice the corresponding
276 : // source range. The assumption here is that no transformation will ever
277 : // slice the data or source ranges for an instruction, so we should always
278 : // have a covering data and source ranges.
279 E : DCHECK_GE(offset, data_range.start());
280 E : DCHECK_LE(offset + size, data_range.start() + data_range.size());
281 :
282 E : Offset start_offs = offset - data_range.start();
283 E : return SourceRange(source_range.start() + start_offs, size);
284 E : }
285 :
286 : bool BasicBlockDecomposer::FindBasicBlock(Offset offset,
287 : BasicBlock** basic_block,
288 E : Range* range) const {
289 E : DCHECK_LE(0, offset);
290 E : DCHECK(basic_block != NULL);
291 E : DCHECK(range != NULL);
292 E : DCHECK(subgraph_->original_block() != NULL);
293 E : DCHECK_GT(subgraph_->original_block()->size(), static_cast<size_t>(offset));
294 :
295 : RangeMapConstIter bb_iter =
296 E : original_address_space_.FindFirstIntersection(Range(offset, 1));
297 :
298 E : if (bb_iter == original_address_space_.end())
299 i : return false;
300 :
301 E : *basic_block = bb_iter->second;
302 E : *range = bb_iter->first;
303 E : return true;
304 E : }
305 :
306 E : BasicBlock* BasicBlockDecomposer::GetBasicBlockAt(Offset offset) const {
307 E : DCHECK_LE(0, offset);
308 E : DCHECK(subgraph_->original_block() != NULL);
309 E : DCHECK_GT(subgraph_->original_block()->size(), static_cast<size_t>(offset));
310 :
311 E : BasicBlock* bb = NULL;
312 E : Range range;
313 E : CHECK(FindBasicBlock(offset, &bb, &range));
314 E : DCHECK(bb != NULL);
315 E : DCHECK_EQ(offset, range.start());
316 E : return bb;
317 E : }
318 :
319 : void BasicBlockDecomposer::InitJumpTargets(Offset code_begin_offset,
320 E : Offset code_end_offset) {
321 E : DCHECK_LE(0, code_begin_offset);
322 E : DCHECK_LE(static_cast<Size>(code_end_offset), block_->size());
323 :
324 : // Make sure the jump target set is empty.
325 E : jump_targets_.clear();
326 :
327 : // For each referrer, check if it references code. If so, it's a jump target.
328 : BlockGraph::Block::ReferrerSet::const_iterator ref_iter =
329 E : block_->referrers().begin();
330 E : for (; ref_iter != block_->referrers().end(); ++ref_iter) {
331 E : BlockGraph::Reference ref;
332 E : bool found = ref_iter->first->GetReference(ref_iter->second, &ref);
333 E : DCHECK(found);
334 E : DCHECK_EQ(block_, ref.referenced());
335 E : DCHECK_LE(0, ref.base());
336 E : DCHECK_LT(static_cast<size_t>(ref.base()), block_->size());
337 E : DCHECK_EQ(ref.base(), ref.offset());
338 :
339 : // If the referred offset is within the code bounds, then it is a jump
340 : // target.
341 E : if (code_begin_offset <= ref.base() && ref.base() < code_end_offset)
342 E : jump_targets_.insert(ref.base());
343 E : }
344 E : }
345 :
346 : bool BasicBlockDecomposer::HandleInstruction(const Instruction& instruction,
347 E : Offset offset) {
348 : // We do not handle the SYS* instructions. These should ONLY occur inside
349 : // the OS system libraries, mediated by an OS system call. We expect that
350 : // they NEVER occur in application code.
351 E : if (instruction.IsSystemCall()) {
352 i : LOG(ERROR) << "Encountered an unexpected " << instruction.GetName()
353 : << " instruction at offset " << offset << " of block '"
354 : << block_->name() << "'.";
355 i : return false;
356 : }
357 :
358 : // Calculate the offset of the next instruction. We'll need this if this
359 : // instruction marks the end of a basic block.
360 E : Offset next_instruction_offset = offset + instruction.size();
361 :
362 : // If the instruction is not a branch then it needs to be appended to the
363 : // current basic block... which we close if the instruction is a return or
364 : // a call to a non-returning function.
365 E : if (!instruction.IsBranch()) {
366 E : current_instructions_.push_back(instruction);
367 E : if (instruction.IsReturn()) {
368 E : EndCurrentBasicBlock(next_instruction_offset);
369 E : } else if (instruction.IsCall()) {
370 E : BlockGraph::Reference ref;
371 : bool found = GetReferenceOfInstructionAt(
372 E : block_, offset, instruction.size(), &ref);
373 : if (found && Instruction::IsCallToNonReturningFunction(
374 E : instruction.representation(), ref.referenced(), ref.offset())) {
375 E : EndCurrentBasicBlock(next_instruction_offset);
376 : }
377 : }
378 E : return true;
379 : }
380 :
381 : // If the branch is not PC-Relative then it also needs to be appended to
382 : // the current basic block... which we then close.
383 E : if (!instruction.HasPcRelativeOperand(0)) {
384 E : current_instructions_.push_back(instruction);
385 E : EndCurrentBasicBlock(next_instruction_offset);
386 E : return true;
387 : }
388 :
389 : // Otherwise, we're dealing with a branch whose destination is explicit.
390 E : DCHECK(instruction.IsBranch());
391 E : DCHECK(instruction.HasPcRelativeOperand(0));
392 :
393 : // Make sure we understand the branching condition. If we don't, then
394 : // there's an instruction we have failed to consider.
395 : Successor::Condition condition = Successor::OpCodeToCondition(
396 E : instruction.opcode());
397 E : CHECK_NE(Successor::kInvalidCondition, condition)
398 : << "Received unknown condition for branch instruction: "
399 : << instruction.GetName() << ".";
400 :
401 : // If this is a conditional branch add the inverse conditional successor
402 : // to represent the fall-through. If we don't understand the inverse, then
403 : // there's an instruction we have failed to consider.
404 E : if (instruction.IsConditionalBranch()) {
405 : Successor::Condition inverse_condition =
406 E : Successor::InvertCondition(condition);
407 E : CHECK_NE(Successor::kInvalidCondition, inverse_condition)
408 : << "Non-invertible condition seen for branch instruction: "
409 : << instruction.GetName() << ".";
410 :
411 : // Create an (unresolved) successor pointing to the next instruction.
412 : BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
413 : 1, // The size is irrelevant in successors.
414 : const_cast<Block*>(block_),
415 : next_instruction_offset,
416 E : next_instruction_offset);
417 E : current_successors_.push_front(Successor(inverse_condition, ref, 0));
418 E : jump_targets_.insert(next_instruction_offset);
419 : }
420 :
421 : // Attempt to figure out where the branch is going by finding a
422 : // reference inside the instruction's byte range.
423 E : BlockGraph::Reference ref;
424 : bool found = GetReferenceOfInstructionAt(
425 E : block_, offset, instruction.size(), &ref);
426 :
427 : // If a reference was found, prefer its destination information to the
428 : // information conveyed by the bytes in the instruction. This should
429 : // handle all inter-block jumps (thunks, tail-call elimination, etc).
430 : // Otherwise, create a reference into the current block.
431 E : if (!found) {
432 : Offset target_offset =
433 E : next_instruction_offset + instruction.representation().imm.addr;
434 E : DCHECK_LE(0, target_offset);
435 E : DCHECK_LT(static_cast<Size>(target_offset), block_->size());
436 : ref = BlockGraph::Reference(BlockGraph::PC_RELATIVE_REF,
437 : 1, // Size is irrelevant in successors.
438 : const_cast<Block*>(block_),
439 : target_offset,
440 E : target_offset);
441 : }
442 :
443 : // If the reference points to the current block, track the target offset.
444 E : if (ref.referenced() == block_)
445 E : jump_targets_.insert(ref.offset());
446 :
447 : // Create the successor, preserving the source range and label.
448 : BasicBlockReference bb_ref(
449 E : ref.type(), ref.size(), ref.referenced(), ref.offset(), ref.base());
450 E : Successor succ(condition, bb_ref, instruction.size());
451 E : succ.set_source_range(instruction.source_range());
452 E : succ.set_label(instruction.label());
453 E : current_successors_.push_front(succ);
454 :
455 : // Having just branched, we need to end the current basic block.
456 E : EndCurrentBasicBlock(next_instruction_offset);
457 E : return true;
458 E : }
459 :
460 E : bool BasicBlockDecomposer::EndCurrentBasicBlock(Offset end_offset) {
461 : // We have reached the end of the current walk or we handled a conditional
462 : // branch. Let's mark this as the end of a basic block.
463 E : int basic_block_size = end_offset - current_block_start_;
464 E : DCHECK_LT(0, basic_block_size);
465 : if (!InsertBasicBlockRange(current_block_start_,
466 : basic_block_size,
467 E : BasicBlock::BASIC_CODE_BLOCK)) {
468 i : return false;
469 : }
470 :
471 : // Remember the end offset as the start of the next basic block.
472 E : current_block_start_ = end_offset;
473 E : return true;
474 E : }
475 :
476 E : bool BasicBlockDecomposer::ParseInstructions() {
477 : // Find the beginning and ending offsets of code bytes within the block.
478 E : Offset code_begin_offset = 0;
479 E : Offset code_end_offset = 0;
480 E : if (!GetCodeRange(block_, &code_begin_offset, &code_end_offset)) {
481 i : LOG(ERROR) << "Failed to determine code byte range.";
482 i : return false;
483 : }
484 :
485 : // Initialize jump_targets_ to include un-discoverable targets.
486 E : InitJumpTargets(code_begin_offset, code_end_offset);
487 :
488 : // Disassemble the instruction stream into rudimentary basic blocks.
489 E : Offset offset = code_begin_offset;
490 E : current_block_start_ = offset;
491 E : while (offset < code_end_offset) {
492 : // Decode the next instruction.
493 E : Instruction instruction;
494 E : if (!DecodeInstruction(offset, code_end_offset, &instruction))
495 i : return false;
496 :
497 : // Handle the decoded instruction.
498 E : if (!HandleInstruction(instruction, offset))
499 i : return false;
500 :
501 : // Advance the instruction offset.
502 E : offset += instruction.size();
503 E : }
504 :
505 : // If we get here then we must have successfully consumed the entire code
506 : // range; otherwise, we should have failed to decode a partial instruction.
507 E : CHECK_EQ(offset, code_end_offset);
508 :
509 : // If the last bb we were working on didn't end with a RET or branch then
510 : // we need to close it now. We can detect this if the current_block_start_
511 : // does not match the current (end) offset.
512 E : if (current_block_start_ != code_end_offset)
513 E : EndCurrentBasicBlock(code_end_offset);
514 :
515 E : return true;
516 E : }
517 :
518 E : bool BasicBlockDecomposer::Disassemble() {
519 : // Parse the code bytes into instructions and rudimentary basic blocks.
520 E : if (!ParseInstructions()) {
521 i : LOG(ERROR) << "Failed to parse instruction bytes.";
522 i : return false;
523 : }
524 :
525 : // Split the basic blocks at branch targets.
526 E : if (!SplitCodeBlocksAtBranchTargets()) {
527 i : LOG(ERROR) << "Failed to split code blocks at branch targets.";
528 i : return false;
529 : }
530 :
531 : // By this point, we should have basic blocks for all visited code.
532 E : CheckAllJumpTargetsStartABasicCodeBlock();
533 :
534 : // Demarcate the data basic blocks. There should be no overlap with code.
535 E : if (!FillInDataBlocks()) {
536 i : LOG(ERROR) << "Failed to fill in data basic-block ranges.";
537 i : return false;
538 : }
539 :
540 : // We should now have contiguous block ranges that cover every byte in the
541 : // macro block. Verify that this is so.
542 E : CheckHasCompleteBasicBlockCoverage();
543 :
544 : // We should have propagated all of the labels in the original block into
545 : // the basic-block subgraph.
546 E : CheckAllLabelsArePreserved();
547 :
548 : // Populate the referrers in the basic block data structures by copying
549 : // them from the original source block.
550 E : if (!CopyExternalReferrers()) {
551 i : LOG(ERROR) << "Failed to populate basic-block referrers.";
552 i : return false;
553 : }
554 :
555 : // Populate the references in the basic block data structures by copying
556 : // them from the original source block. This does not handle the successor
557 : // references.
558 E : if (!CopyReferences()) {
559 i : LOG(ERROR) << "Failed to populate basic-block references.";
560 i : return false;
561 : }
562 :
563 : // Wire up the basic-block successors. These are not handled by
564 : // CopyReferences(), above.
565 E : if (!ResolveSuccessors()) {
566 i : LOG(ERROR) << "Failed to resolve basic-block successors.";
567 i : return false;
568 : }
569 :
570 : // All the control flow we have derived should be valid.
571 E : CheckAllControlFlowIsValid();
572 :
573 : // Mark all unreachable code blocks as padding.
574 E : MarkUnreachableCodeAsPadding();
575 :
576 : // ... and we're done.
577 E : return true;
578 E : }
579 :
580 E : void BasicBlockDecomposer::CheckAllJumpTargetsStartABasicCodeBlock() const {
581 E : if (!check_decomposition_results_)
582 i : return;
583 :
584 E : JumpTargets::const_iterator offset_iter(jump_targets_.begin());
585 E : for (; offset_iter != jump_targets_.end(); ++offset_iter) {
586 : // The target basic-block should be a code basic-block.
587 E : BasicBlock* target_bb = GetBasicBlockAt(*offset_iter);
588 E : CHECK(target_bb != NULL);
589 E : CHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, target_bb->type());
590 E : }
591 E : }
592 :
593 E : void BasicBlockDecomposer::CheckHasCompleteBasicBlockCoverage() const {
594 E : if (!check_decomposition_results_)
595 i : return;
596 :
597 : // Walk through the basic-block address space.
598 E : Offset next_start = 0;
599 E : RangeMapConstIter it(original_address_space_.begin());
600 E : for (; it != original_address_space_.end(); ++it) {
601 E : CHECK_EQ(it->first.start(), next_start);
602 E : CHECK_EQ(it->first.start(), it->second->offset());
603 :
604 E : BasicDataBlock* data_block = BasicDataBlock::Cast(it->second);
605 E : if (data_block != NULL) {
606 : // Data block's size should match the address segment exactly.
607 E : CHECK_EQ(it->first.size(), data_block->size());
608 : }
609 E : BasicCodeBlock* code_block = BasicCodeBlock::Cast(it->second);
610 E : if (code_block != NULL) {
611 : // Code blocks may be short the trailing successor instruction.
612 : BasicCodeBlock::Successors::const_iterator succ_it(
613 E : code_block->successors().begin());
614 E : Size block_size = code_block->GetInstructionSize();
615 E : for (; succ_it != code_block->successors().end(); ++succ_it)
616 E : block_size += succ_it->instruction_size();
617 :
618 E : CHECK_GE(it->first.size(), block_size);
619 : }
620 E : next_start += it->first.size();
621 E : }
622 :
623 : // At this point, if there were no gaps, next start will be the same as the
624 : // full size of the block we're decomposing.
625 E : CHECK_EQ(block_->size(), static_cast<size_t>(next_start));
626 E : }
627 :
628 E : void BasicBlockDecomposer::CheckAllControlFlowIsValid() const {
629 E : if (!check_decomposition_results_)
630 i : return;
631 :
632 : // Check that the subgraph is valid. This will make sure that the
633 : // instructions and successors generally make sense.
634 E : CHECK(subgraph_->IsValid());
635 :
636 : // The only thing left to check is that synthesized flow-through
637 : // successors refer to the adjacent basic-blocks.
638 E : RangeMapConstIter it(original_address_space_.begin());
639 E : for (; it != original_address_space_.end(); ++it) {
640 E : const BasicCodeBlock* bb = BasicCodeBlock::Cast(it->second);
641 E : if (bb == NULL)
642 E : continue;
643 :
644 E : const BasicBlock::Instructions& instructions = bb->instructions();
645 E : const BasicBlock::Successors& successors = bb->successors();
646 :
647 : // There may be at most 2 successors.
648 E : switch (successors.size()) {
649 : case 0:
650 E : break;
651 :
652 : case 1:
653 : // If the successor is synthesized, then flow is from this basic-block
654 : // to the next adjacent one.
655 E : if (successors.back().instruction_size() == 0) {
656 E : RangeMapConstIter next(it);
657 E : ++next;
658 E : CHECK(next != original_address_space_.end());
659 E : CHECK_EQ(successors.back().reference().basic_block(), next->second);
660 : }
661 E : break;
662 :
663 : case 2: {
664 : // Exactly one of the successors should have been synthesized.
665 E : bool front_synthesized = successors.front().instruction_size() == 0;
666 E : bool back_synthesized = successors.back().instruction_size() == 0;
667 E : CHECK_NE(front_synthesized, back_synthesized);
668 :
669 : // The synthesized successor flows from this basic-block to the next
670 : // adjacent one.
671 : const Successor& synthesized =
672 E : front_synthesized ? successors.front() : successors.back();
673 E : RangeMapConstIter next(it);
674 E : ++next;
675 E : CHECK(next != original_address_space_.end());
676 E : CHECK_EQ(synthesized.reference().basic_block(), next->second);
677 E : break;
678 : }
679 :
680 : default:
681 i : NOTREACHED();
682 : }
683 E : }
684 E : }
685 :
686 E : void BasicBlockDecomposer::CheckAllLabelsArePreserved() const {
687 E : if (!check_decomposition_results_)
688 i : return;
689 :
690 E : const Block* original_block = subgraph_->original_block();
691 E : if (original_block == NULL)
692 i : return;
693 :
694 : // Remove any labels that fall *after* the given block. This can happen for
695 : // scope and debug-end labels when the function has no epilog. It is rare, but
696 : // has been observed in the wild.
697 : // TODO(chrisha): Find a way to preserve these. We may need the notion of an
698 : // empty basic-block which gets assigned the label, or we may need to
699 : // augment BBs/instructions with the ability to have two labels: one tied
700 : // to the beginning of the object, and one to the end.
701 : Block::LabelMap::const_iterator it_past_block_end =
702 E : original_block->labels().lower_bound(original_block->size());
703 :
704 : // Grab a copy of the original labels (except any that are beyond the end of
705 : // the block data). We will be matching against these to ensure that they are
706 : // preserved in the BB decomposition.
707 : const Block::LabelMap original_labels(original_block->labels().begin(),
708 E : it_past_block_end);
709 E : if (original_labels.empty())
710 E : return;
711 :
712 : // A map to track which labels (by offset) have been found in the subgraph.
713 E : std::map<Offset, bool> labels_found;
714 :
715 : // Initialize the map of labels found in the subgraph.
716 E : Block::LabelMap::const_iterator label_iter = original_labels.begin();
717 E : for (; label_iter != original_labels.end(); ++label_iter)
718 E : labels_found.insert(std::make_pair(label_iter->first, false));
719 :
720 : // Walk through the subgraph and mark all of the labels found.
721 : BasicBlockSubGraph::BBCollection::const_iterator bb_iter =
722 E : subgraph_->basic_blocks().begin();
723 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
724 E : const BasicDataBlock* data_block = BasicDataBlock::Cast(*bb_iter);
725 E : if (data_block != NULL) {
726 : // Account for labels attached to basic-blocks.
727 E : if (data_block->has_label()) {
728 E : BlockGraph::Label label;
729 E : CHECK(original_block->GetLabel(data_block->offset(), &label));
730 E : CHECK(data_block->label() == label);
731 E : labels_found[data_block->offset()] = true;
732 E : }
733 : }
734 :
735 E : const BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
736 E : if (code_block != NULL) {
737 : // Account for labels attached to instructions.
738 : BasicBlock::Instructions::const_iterator inst_iter =
739 E : code_block->instructions().begin();
740 E : Offset inst_offset = code_block->offset();
741 E : for (; inst_iter != code_block->instructions().end(); ++inst_iter) {
742 E : const Instruction& inst = *inst_iter;
743 E : if (inst.has_label()) {
744 E : BlockGraph::Label label;
745 E : CHECK(original_block->GetLabel(inst_offset, &label));
746 E : CHECK(inst.label() == label);
747 E : labels_found[inst_offset] = true;
748 E : }
749 E : inst_offset += inst.size();
750 E : }
751 :
752 : // Account for labels attached to successors.
753 : BasicBlock::Successors::const_iterator succ_iter =
754 E : code_block->successors().begin();
755 E : for (; succ_iter != code_block->successors().end(); ++succ_iter) {
756 E : const Successor& succ = *succ_iter;
757 E : if (succ.has_label()) {
758 E : BlockGraph::Label label;
759 E : CHECK_NE(0U, succ.instruction_size());
760 E : CHECK(original_block->GetLabel(inst_offset, &label));
761 E : CHECK(succ.label() == label);
762 E : labels_found[inst_offset] = true;
763 E : }
764 E : inst_offset += succ.instruction_size();
765 E : }
766 : }
767 E : }
768 :
769 : // We should have the right number of labels_found (check if we added
770 : // something to the wrong place).
771 E : CHECK_EQ(original_labels.size(), labels_found.size());
772 :
773 : // Make sure all of the items in labels_found have been set to true.
774 E : std::map<Offset, bool>::const_iterator found_iter = labels_found.begin();
775 E : for (; found_iter != labels_found.end(); ++found_iter) {
776 E : CHECK(found_iter->second);
777 E : }
778 E : }
779 :
780 : bool BasicBlockDecomposer::InsertBasicBlockRange(Offset offset,
781 : size_t size,
782 E : BasicBlockType type) {
783 E : DCHECK_LE(0, offset);
784 E : DCHECK_LT(0U, size);
785 E : DCHECK_LE(offset + size, block_->size());
786 E : DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_instructions_.empty());
787 E : DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_successors_.empty());
788 :
789 : // Find or create a name for this basic block. Reserve the label, if any,
790 : // to propagate to the basic block if there are no instructions in the
791 : // block to carry the label(s).
792 E : BlockGraph::Label label;
793 E : std::string basic_block_name;
794 E : if (block_->GetLabel(offset, &label)) {
795 E : basic_block_name = label.ToString();
796 E : } else {
797 : basic_block_name =
798 : base::StringPrintf("<%s+%04X-%s>",
799 : block_->name().c_str(),
800 : offset,
801 E : BasicBlock::BasicBlockTypeToString(type));
802 : }
803 :
804 : // Pre-flight address space insertion to make sure there's no
805 : // pre-existing conflicting range.
806 E : Range byte_range(offset, size);
807 : if (original_address_space_.FindFirstIntersection(byte_range) !=
808 E : original_address_space_.end()) {
809 i : LOG(ERROR) << "Attempted to insert overlapping basic block.";
810 i : return false;
811 : }
812 :
813 E : if (type == BasicBlock::BASIC_CODE_BLOCK) {
814 : // Create the code block.
815 E : BasicCodeBlock* code_block = subgraph_->AddBasicCodeBlock(basic_block_name);
816 E : if (code_block == NULL)
817 i : return false;
818 E : CHECK(original_address_space_.Insert(byte_range, code_block));
819 :
820 : // Populate code basic-block with instructions and successors.
821 E : code_block->set_offset(offset);
822 E : code_block->instructions().swap(current_instructions_);
823 E : code_block->successors().swap(current_successors_);
824 E : } else {
825 E : DCHECK(type == BasicBlock::BASIC_DATA_BLOCK);
826 :
827 : // Create the data block.
828 : BasicDataBlock* data_block = subgraph_->AddBasicDataBlock(
829 E : basic_block_name, size, block_->data() + offset);
830 E : if (data_block == NULL)
831 i : return false;
832 E : CHECK(original_address_space_.Insert(byte_range, data_block));
833 :
834 : // Capture the source range (if any) for the data block.
835 E : data_block->set_source_range(GetSourceRange(offset, size));
836 :
837 : // Data basic-blocks carry their labels at the head of the basic blocks.
838 : // A padding basic-block might also be labeled if the block contains
839 : // unreachable code (for example, INT3 or NOP instructions following a call
840 : // to a non-returning function).
841 E : data_block->set_offset(offset);
842 E : data_block->set_label(label);
843 : }
844 :
845 E : return true;
846 E : }
847 :
848 E : bool BasicBlockDecomposer::SplitCodeBlocksAtBranchTargets() {
849 E : JumpTargets::const_iterator jump_target_iter(jump_targets_.begin());
850 E : for (; jump_target_iter != jump_targets_.end(); ++jump_target_iter) {
851 : // Resolve the target basic-block.
852 E : Offset target_offset = *jump_target_iter;
853 E : BasicBlock* target_bb = NULL;
854 E : Range target_bb_range;
855 E : CHECK(FindBasicBlock(target_offset, &target_bb, &target_bb_range));
856 :
857 : // If we're jumping to the start of a basic block, there isn't any work
858 : // to do.
859 E : if (target_offset == target_bb_range.start())
860 E : continue;
861 :
862 : // The target must be a code block.
863 E : BasicCodeBlock* target_code_block = BasicCodeBlock::Cast(target_bb);
864 E : CHECK(target_code_block != NULL);
865 :
866 : // Otherwise, we have found a basic-block that we need to split.
867 : // Let's contract the range the original occupies in the basic-block
868 : // address space, then add a second block at the target offset.
869 E : size_t left_split_size = target_offset - target_bb_range.start();
870 E : bool removed = original_address_space_.Remove(target_bb_range);
871 E : DCHECK(removed);
872 :
873 E : Range left_split_range(target_bb_range.start(), left_split_size);
874 : bool inserted =
875 E : original_address_space_.Insert(left_split_range, target_code_block);
876 E : DCHECK(inserted);
877 :
878 : // Now we split up containing_range into two new ranges and replace
879 : // containing_range with the two new entries.
880 :
881 : // Slice the trailing half of the instructions and the successors
882 : // off the block.
883 E : DCHECK(current_instructions_.empty());
884 E : DCHECK(current_successors_.empty());
885 : bool split = SplitInstructionListAt(left_split_size,
886 : &target_code_block->instructions(),
887 E : ¤t_instructions_);
888 E : DCHECK(split);
889 E : target_code_block->successors().swap(current_successors_);
890 :
891 : // Set-up the flow-through successor for the first "half".
892 : BasicBlockReference ref(BlockGraph::PC_RELATIVE_REF,
893 : 1, // Size is immaterial in successors.
894 : const_cast<Block*>(block_),
895 : target_offset,
896 E : target_offset);
897 : target_code_block->successors().push_back(
898 E : Successor(Successor::kConditionTrue, ref, 0));
899 :
900 : // Create the basic-block representing the second "half".
901 : if (!InsertBasicBlockRange(target_offset,
902 : target_bb_range.size() - left_split_size,
903 E : target_code_block->type())) {
904 i : LOG(ERROR) << "Failed to insert second half of split block.";
905 i : return false;
906 : }
907 E : }
908 :
909 E : return true;
910 E : }
911 :
912 E : bool BasicBlockDecomposer::FillInDataBlocks() {
913 E : BlockGraph::Block::LabelMap::const_iterator iter = block_->labels().begin();
914 E : BlockGraph::Block::LabelMap::const_iterator end = block_->labels().end();
915 E : for (; iter != end; ++iter) {
916 E : if (!iter->second.has_attributes(BlockGraph::DATA_LABEL))
917 E : continue;
918 :
919 E : BlockGraph::Block::LabelMap::const_iterator next = iter;
920 E : ++next;
921 :
922 E : BlockGraph::Offset bb_start = iter->first;
923 E : BlockGraph::Offset bb_end = (next == end) ? block_->size() : next->first;
924 E : size_t bb_size = bb_end - bb_start;
925 E : if (!InsertBasicBlockRange(bb_start, bb_size, BasicBlock::BASIC_DATA_BLOCK))
926 i : return false;
927 E : }
928 E : return true;
929 E : }
930 :
931 E : bool BasicBlockDecomposer::CopyExternalReferrers() {
932 E : const BlockGraph::Block::ReferrerSet& referrers = block_->referrers();
933 E : BlockGraph::Block::ReferrerSet::const_iterator iter = referrers.begin();
934 E : for (; iter != referrers.end(); ++iter) {
935 : // Find the reference this referrer record describes.
936 E : const BlockGraph::Block* referrer = iter->first;
937 E : DCHECK(referrer != NULL);
938 :
939 : // We only care about external referrers.
940 E : if (referrer == block_)
941 E : continue;
942 :
943 : // This is an external referrer. Find the reference in the referring block.
944 E : Offset source_offset = iter->second;
945 E : BlockGraph::Reference reference;
946 E : bool found = referrer->GetReference(source_offset, &reference);
947 E : DCHECK(found);
948 :
949 : // Find the basic block the reference refers to. It can only have an
950 : // offset that's different from the base if it's not a code block.
951 E : BasicBlock* target_bb = GetBasicBlockAt(reference.base());
952 E : DCHECK(target_bb != NULL);
953 : DCHECK(reference.base() == reference.offset() ||
954 E : target_bb->type() != BasicBlock::BASIC_CODE_BLOCK);
955 :
956 : // Insert the referrer into the target bb's referrer set. Note that there
957 : // is no corresponding reference update to the referring block. The
958 : // target bb will track these so a BlockBuilder can properly update
959 : // the referrers when merging a subgraph back into the block-graph.
960 : bool inserted = target_bb->referrers().insert(
961 E : BasicBlockReferrer(referrer, source_offset)).second;
962 E : DCHECK(inserted);
963 E : }
964 :
965 E : return true;
966 E : }
967 :
968 : bool BasicBlockDecomposer::CopyReferences(
969 E : Offset item_offset, Size item_size, BasicBlockReferenceMap* refs) {
970 E : DCHECK_LE(0, item_offset);
971 E : DCHECK_LT(0U, item_size);
972 E : DCHECK(refs != NULL);
973 :
974 : // Figure out the bounds of item.
975 E : BlockGraph::Offset end_offset = item_offset + item_size;
976 :
977 : // Get iterators encompassing all references within the bounds of item.
978 : BlockGraph::Block::ReferenceMap::const_iterator ref_iter =
979 E : block_->references().lower_bound(item_offset);
980 : BlockGraph::Block::ReferenceMap::const_iterator end_iter =
981 E : block_->references().lower_bound(end_offset);
982 :
983 E : for (; ref_iter != end_iter; ++ref_iter) {
984 : // Calculate the local offset of this reference within item.
985 E : BlockGraph::Offset local_offset = ref_iter->first - item_offset;
986 E : const BlockGraph::Reference& reference = ref_iter->second;
987 :
988 : // We expect long references for everything except flow control.
989 E : CHECK_EQ(4U, reference.size());
990 E : DCHECK_LE(local_offset + reference.size(), static_cast<Size>(end_offset));
991 :
992 E : if (reference.referenced() != block_) {
993 : // For external references, we can directly reference the other block.
994 : bool inserted = refs->insert(std::make_pair(
995 : local_offset,
996 : BasicBlockReference(reference.type(), reference.size(),
997 : reference.referenced(), reference.offset(),
998 E : reference.base()))).second;
999 E : DCHECK(inserted);
1000 E : } else {
1001 : // For intra block_ references, find the corresponding basic block in
1002 : // the basic block address space.
1003 E : BasicBlock* target_bb = GetBasicBlockAt(reference.base());
1004 E : DCHECK(target_bb != NULL);
1005 :
1006 : // Create target basic-block relative values for the base and offset.
1007 E : CHECK_EQ(reference.offset(), reference.base());
1008 :
1009 : // Insert a reference to the target basic block.
1010 : bool inserted = refs->insert(std::make_pair(
1011 : local_offset,
1012 : BasicBlockReference(reference.type(),
1013 : reference.size(),
1014 E : target_bb))).second;
1015 E : DCHECK(inserted);
1016 : }
1017 E : }
1018 E : return true;
1019 E : }
1020 :
1021 E : bool BasicBlockDecomposer::CopyReferences() {
1022 : // Copy the references for the source range of each basic-block (by
1023 : // instruction for code basic-blocks). External referrers and successors are
1024 : // handled in separate passes.
1025 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
1026 E : subgraph_->basic_blocks().begin();
1027 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
1028 E : BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
1029 E : if (code_block != NULL) {
1030 E : DCHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, code_block->type());
1031 :
1032 E : Offset inst_offset = code_block->offset();
1033 : BasicBlock::Instructions::iterator inst_iter =
1034 E : code_block->instructions().begin();
1035 E : for (; inst_iter != code_block->instructions().end(); ++inst_iter) {
1036 : if (!CopyReferences(inst_offset,
1037 : inst_iter->size(),
1038 E : &inst_iter->references())) {
1039 i : return false;
1040 : }
1041 :
1042 E : inst_offset += inst_iter->size();
1043 E : }
1044 : }
1045 :
1046 E : BasicDataBlock* data_block = BasicDataBlock::Cast(*bb_iter);
1047 E : if (data_block != NULL) {
1048 E : DCHECK_NE(BasicBlock::BASIC_CODE_BLOCK, data_block->type());
1049 :
1050 : if (!CopyReferences(data_block->offset(),
1051 : data_block->size(),
1052 E : &data_block->references())) {
1053 i : return false;
1054 : }
1055 : }
1056 E : }
1057 :
1058 E : return true;
1059 E : }
1060 :
1061 E : bool BasicBlockDecomposer::ResolveSuccessors() {
1062 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
1063 E : subgraph_->basic_blocks().begin();
1064 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
1065 : // Only code basic-blocks have successors and instructions.
1066 E : BasicCodeBlock* code_block = BasicCodeBlock::Cast(*bb_iter);
1067 E : if (code_block == NULL)
1068 E : continue;
1069 :
1070 : BasicBlock::Successors::iterator succ_iter =
1071 E : code_block->successors().begin();
1072 : BasicBlock::Successors::iterator succ_iter_end =
1073 E : code_block->successors().end();
1074 E : for (; succ_iter != succ_iter_end; ++succ_iter) {
1075 E : if (succ_iter->reference().block() != block_)
1076 E : continue;
1077 :
1078 : // Find the basic block the successor references.
1079 : BasicBlock* target_code_block =
1080 E : GetBasicBlockAt(succ_iter->reference().offset());
1081 E : DCHECK(target_code_block != NULL);
1082 :
1083 : // We transform all successor branches into 4-byte pc-relative targets.
1084 : succ_iter->set_reference(
1085 : BasicBlockReference(
1086 E : BlockGraph::PC_RELATIVE_REF, 4, target_code_block));
1087 E : DCHECK(succ_iter->reference().IsValid());
1088 E : }
1089 E : }
1090 :
1091 E : return true;
1092 E : }
1093 :
1094 E : void BasicBlockDecomposer::MarkUnreachableCodeAsPadding() {
1095 E : BasicBlockSubGraph::ReachabilityMap rm;
1096 E : subgraph_->GetReachabilityMap(&rm);
1097 E : DCHECK_EQ(rm.size(), subgraph_->basic_blocks().size());
1098 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
1099 E : subgraph_->basic_blocks().begin();
1100 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
1101 E : BasicCodeBlock* code_bb = BasicCodeBlock::Cast(*bb_iter);
1102 E : if (code_bb != NULL) {
1103 E : if (!subgraph_->IsReachable(rm, code_bb))
1104 E : code_bb->MarkAsPadding();
1105 : }
1106 E : }
1107 E : }
1108 :
1109 : } // namespace block_graph
|