1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of basic block decomposer.
16 : //
17 : // TODO(rogerm): Refactor this to just do a straight disassembly of all bytes
18 : // (up to the start of embedded data: jump and case tables) to a list of
19 : // instructions, then chop up the instruction list into basic blocks in
20 : // a second pass, splicing the instructions into the basic-block instruction
21 : // lists and generating successors.
22 :
23 : #include "syzygy/block_graph/basic_block_decomposer.h"
24 :
25 : #include <algorithm>
26 : #include <vector>
27 :
28 : #include "base/logging.h"
29 : #include "base/stringprintf.h"
30 : #include "syzygy/block_graph/basic_block.h"
31 : #include "syzygy/block_graph/basic_block_subgraph.h"
32 : #include "syzygy/block_graph/block_graph.h"
33 : #include "syzygy/block_graph/block_util.h"
34 :
35 : #include "mnemonics.h" // NOLINT
36 :
37 : namespace block_graph {
38 :
39 : namespace {
40 :
41 : using block_graph::BasicBlock;
42 : using block_graph::BasicBlockReference;
43 : using block_graph::BasicBlockReferrer;
44 : using block_graph::BasicBlockSubGraph;
45 : using block_graph::BlockGraph;
46 : using block_graph::Instruction;
47 : using block_graph::Successor;
48 : using core::Disassembler;
49 :
50 : typedef BlockGraph::Block Block;
51 : typedef BlockGraph::Offset Offset;
52 : typedef BlockGraph::Size Size;
53 : typedef BasicBlockSubGraph::BBAddressSpace BBAddressSpace;
54 : typedef BBAddressSpace::Range Range;
55 : typedef BBAddressSpace::RangeMap RangeMap;
56 : typedef BBAddressSpace::RangeMapConstIter RangeMapConstIter;
57 : typedef BBAddressSpace::RangeMapIter RangeMapIter;
58 :
59 : // We use a (somewhat) arbitrary value as the disassembly address for a block
60 : // so we can tell the difference between a reference to the beginning of the
61 : // block (offset=0) and a null address.
62 : const size_t kDisassemblyAddress = 65536;
63 :
64 : // Look up the reference made from an instruction's byte range within the
65 : // given block. The reference should start AFTER the instruction starts
66 : // and there should be exactly 1 reference in the byte range.
67 : // Returns true if the reference was found, false otherwise.
68 : bool GetReferenceOfInstructionAt(const Block* block,
69 : Offset instr_offset,
70 : Size instr_size,
71 E : BlockGraph::Reference* ref) {
72 E : DCHECK(block != NULL);
73 E : DCHECK_LE(0, instr_offset);
74 E : DCHECK_LT(0U, instr_size);
75 E : DCHECK(ref != NULL);
76 :
77 : // Find the first reference following the instruction offset.
78 : Block::ReferenceMap::const_iterator ref_iter =
79 E : block->references().upper_bound(instr_offset);
80 :
81 : // If no reference is found then we're done.
82 E : if (ref_iter == block->references().end())
83 i : return false;
84 :
85 : // If the reference occurs outside the instruction then we're done.
86 E : Offset next_instr_offset = instr_offset + instr_size;
87 E : if (ref_iter->first >= next_instr_offset)
88 E : return false;
89 :
90 : // Otherwise, the reference should fit into the instruction.
91 : CHECK_LE(static_cast<size_t>(next_instr_offset),
92 E : ref_iter->first + ref_iter->second.size());
93 :
94 : // And it should be the only reference in the instruction.
95 E : if (ref_iter != block->references().begin()) {
96 E : Block::ReferenceMap::const_iterator prev_iter = ref_iter;
97 E : --prev_iter;
98 : CHECK_GE(static_cast<size_t>(instr_offset),
99 E : prev_iter->first + prev_iter->second.size());
100 E : }
101 E : Block::ReferenceMap::const_iterator next_iter = ref_iter;
102 E : ++next_iter;
103 : CHECK(next_iter == block->references().end() ||
104 E : next_iter->first >= next_instr_offset);
105 :
106 E : *ref = ref_iter->second;
107 E : return true;
108 E : }
109 :
110 : } // namespace
111 :
112 : BasicBlockDecomposer::BasicBlockDecomposer(const BlockGraph::Block* block,
113 : BasicBlockSubGraph* subgraph)
114 : : Disassembler(block->data(),
115 : block->size(),
116 : AbsoluteAddress(kDisassemblyAddress),
117 : Disassembler::InstructionCallback()),
118 : block_(block),
119 : subgraph_(subgraph),
120 : current_block_start_(0),
121 E : check_decomposition_results_(true) {
122 : // TODO(rogerm): Once we're certain this is stable for all input binaries
123 : // turn on check_decomposition_results_ by default only ifndef NDEBUG.
124 E : DCHECK(block != NULL);
125 E : DCHECK(block->type() == BlockGraph::CODE_BLOCK);
126 E : DCHECK(CodeBlockAttributesAreBasicBlockSafe(block));
127 E : DCHECK(subgraph != NULL);
128 E : }
129 :
130 E : bool BasicBlockDecomposer::Decompose() {
131 E : DCHECK(subgraph_->basic_blocks().empty());
132 E : DCHECK(subgraph_->block_descriptions().empty());
133 E : DCHECK(subgraph_->original_address_space().empty());
134 E : subgraph_->set_original_block(block_);
135 :
136 E : InitUnvisitedAndJumpTargets();
137 :
138 E : WalkResult result = Walk();
139 : if (result != Disassembler::kWalkSuccess &&
140 E : result != Disassembler::kWalkIncomplete) {
141 i : return false;
142 : }
143 :
144 : typedef BasicBlockSubGraph::BlockDescription BlockDescription;
145 E : subgraph_->block_descriptions().push_back(BlockDescription());
146 E : BlockDescription& desc = subgraph_->block_descriptions().back();
147 E : desc.name = block_->name();
148 E : desc.type = block_->type();
149 E : desc.alignment = block_->alignment();
150 E : desc.attributes = block_->attributes();
151 E : desc.section = block_->section();
152 :
153 E : Offset offset = 0;
154 E : RangeMapConstIter it = subgraph_->original_address_space().begin();
155 E : for (; it != subgraph_->original_address_space().end(); ++it) {
156 E : DCHECK_EQ(it->first.start(), offset);
157 E : desc.basic_block_order.push_back(it->second);
158 E : offset += it->first.size();
159 E : }
160 :
161 E : return true;
162 E : }
163 :
164 E : void BasicBlockDecomposer::InitUnvisitedAndJumpTargets() {
165 E : jump_targets_.clear();
166 : // We initialize our jump_targets_ and unvisited sets to the set of
167 : // referenced code locations. This covers all locations which are
168 : // externally referenced, as well as those that are internally referenced
169 : // via a branching instruction or jump table.
170 : BlockGraph::Block::ReferrerSet::const_iterator ref_iter =
171 E : block_->referrers().begin();
172 E : for (; ref_iter != block_->referrers().end(); ++ref_iter) {
173 E : BlockGraph::Reference ref;
174 E : bool found = ref_iter->first->GetReference(ref_iter->second, &ref);
175 E : DCHECK(found);
176 E : DCHECK_EQ(block_, ref.referenced());
177 E : DCHECK_LE(0, ref.base());
178 E : DCHECK_LT(static_cast<size_t>(ref.base()), block_->size());
179 E : DCHECK_EQ(ref.base(), ref.offset());
180 :
181 : // Look for the first label past the reference. Back up if we can to the
182 : // previous label.
183 : BlockGraph::Block::LabelMap::const_iterator label_iter =
184 E : block_->labels().upper_bound(ref.base());
185 E : if (label_iter != block_->labels().begin())
186 E : --label_iter;
187 :
188 : // If there is no previous label, or it is not a data label, then this is
189 : // a safe jump target.
190 : if (label_iter == block_->labels().end() ||
191 : label_iter->first > ref.offset() ||
192 E : !label_iter->second.has_attributes(BlockGraph::DATA_LABEL)) {
193 E : AbsoluteAddress addr(code_addr_ + ref.base());
194 E : Unvisited(addr);
195 E : jump_targets_.insert(addr);
196 : }
197 E : }
198 E : }
199 :
200 : Disassembler::CallbackDirective BasicBlockDecomposer::OnInstruction(
201 E : AbsoluteAddress addr, const _DInst& inst) {
202 E : Offset offset = addr - code_addr_;
203 :
204 : // If this instruction has run into known data, then we have a problem in
205 : // the decomposer.
206 E : BlockGraph::Label label;
207 : CHECK(!block_->GetLabel(offset, &label) ||
208 E : !label.has_attributes(BlockGraph::DATA_LABEL))
209 : << "Disassembling into data at offset " << offset << " of "
210 : << block_->name() << ".";
211 :
212 E : VLOG(3) << "Disassembled " << GET_MNEMONIC_NAME(inst.opcode)
213 : << " instruction (" << static_cast<int>(inst.size)
214 : << " bytes) at offset " << offset << ".";
215 :
216 : current_instructions_.push_back(
217 E : Instruction(inst, offset, inst.size, code_ + offset));
218 E : if (label.IsValid())
219 E : current_instructions_.back().set_label(label);
220 :
221 : // If continuing this basic-block would disassemble into known data then
222 : // end the current basic-block.
223 : if (block_->GetLabel(offset + inst.size, &label) &&
224 E : label.has_attributes(BlockGraph::DATA_LABEL)) {
225 E : return kDirectiveTerminatePath;
226 : }
227 :
228 : // If this instruction is a call to a non-returning function, then this is
229 : // essentially a control flow operation, and we need to end this basic block.
230 : // We'll schedule the disassembly of any instructions which follow it as
231 : // a separate basic block, and mark that basic block as unreachable in a
232 : // post pass.
233 : if (META_GET_FC(inst.meta) == FC_CALL &&
234 E : (inst.ops[0].type == O_PC || inst.ops[0].type == O_DISP)) {
235 E : BlockGraph::Reference ref;
236 E : bool found = GetReferenceOfInstructionAt(block_, offset, inst.size, &ref);
237 E : CHECK(found);
238 : if (Instruction::CallsNonReturningFunction(inst, ref.referenced(),
239 E : ref.offset())) {
240 E : Unvisited(addr + inst.size);
241 E : return kDirectiveTerminatePath;
242 : }
243 : }
244 :
245 E : return kDirectiveContinue;
246 E : }
247 :
248 : Disassembler::CallbackDirective BasicBlockDecomposer::OnBranchInstruction(
249 E : AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
250 : // Note: Both addr and dest are fabricated addresses. The code_addr_ has
251 : // been selected such that addr will never be 0; similarly, dest should
252 : // only be 0 for control flow instructions having no explicit destination.
253 : // Do not use dest to resolve the destination, instead find the
254 : // corresponding reference in the byte range of the original instruction.
255 :
256 : // The branch instruction should have already been appended to the
257 : // instruction list.
258 : DCHECK_EQ(0, ::memcmp(¤t_instructions_.back().representation(),
259 : &inst,
260 E : sizeof(inst)));
261 :
262 : // Make sure we understand the branching condition. If we don't, then there's
263 : // an instruction we have failed to consider.
264 E : Successor::Condition condition = Successor::OpCodeToCondition(inst.opcode);
265 E : CHECK_NE(Successor::kInvalidCondition, condition)
266 : << "Received unknown condition for branch instruction: "
267 : << GET_MNEMONIC_NAME(inst.opcode) << ".";
268 :
269 : // If this is a conditional branch add the inverse conditional successor to
270 : // represent the fall-through. If we don't understand the inverse, then
271 : // there's an instruction we have failed to consider.
272 E : if (META_GET_FC(inst.meta) == FC_CND_BRANCH) {
273 : Successor::Condition inverse_condition =
274 E : Successor::InvertCondition(condition);
275 E : CHECK_NE(Successor::kInvalidCondition, inverse_condition)
276 : << "Non-invertible condition seen for branch instruction: "
277 : << GET_MNEMONIC_NAME(inst.opcode) << ".";
278 :
279 : // Create an (unresolved) successor pointing to the next instruction.
280 : current_successors_.push_front(
281 : Successor(inverse_condition,
282 : (addr + inst.size) - code_addr_,
283 : BasicBlock::kNoOffset,
284 E : 0));
285 E : jump_targets_.insert(addr + inst.size);
286 : }
287 :
288 : // Note that some control flow instructions have no explicit target (for
289 : // example, RET, SYS* and computed branches); for these dest will be 0.
290 : // We do not explicitly model these with successor relationships. Instead,
291 : // we leave the instruction (and its corresponding references, in the case
292 : // of computed jumps) intact and move on.
293 E : if (dest.value() != 0) {
294 : // Take the last instruction out of the instruction list, we'll represent
295 : // it as a successor instead.
296 E : Successor::Offset instr_offset = current_instructions_.back().offset();
297 E : Successor::Size instr_size = current_instructions_.back().size();
298 E : BlockGraph::Label instr_label = current_instructions_.back().label();
299 E : current_instructions_.pop_back();
300 E : DCHECK_EQ(addr - code_addr_, instr_offset);
301 E : DCHECK_EQ(inst.size, instr_size);
302 :
303 : // Figure out where the branch is going by finding the reference that's
304 : // inside the instruction's byte range.
305 E : BlockGraph::Reference ref;
306 : bool found = GetReferenceOfInstructionAt(
307 E : block_, instr_offset, instr_size, &ref);
308 :
309 : // Create the appropriate successor depending on whether or not the target
310 : // is intra- or inter-block.
311 E : if (!found || ref.referenced() == block_) {
312 : // This is an intra-block reference. The target basic block may not
313 : // exist yet, so we'll defer patching up this reference until later.
314 E : Offset target_offset = dest - code_addr_;
315 E : CHECK_LE(0, target_offset);
316 E : CHECK_LT(static_cast<size_t>(target_offset), code_size_);
317 : current_successors_.push_front(
318 E : Successor(condition, target_offset, instr_offset, instr_size));
319 E : jump_targets_.insert(dest);
320 E : } else {
321 : // This is an inter-block jump. We can create a fully resolved reference.
322 : BasicBlockReference bb_ref(
323 E : ref.type(), ref.size(), ref.referenced(), ref.offset(), ref.base());
324 : current_successors_.push_front(
325 E : Successor(condition, bb_ref, instr_offset, instr_size));
326 : }
327 :
328 E : if (instr_label.IsValid())
329 E : current_successors_.front().set_label(instr_label);
330 E : }
331 :
332 : // This marks the end of a basic block. Note that the disassembler will
333 : // handle ending the instruction run and beginning a new one for the next
334 : // basic block (including the branch-not-taken arc).
335 E : return kDirectiveContinue;
336 E : }
337 :
338 : // Called every time disassembly is started from a new address. Will be
339 : // called for at least every address in unvisited_.
340 : Disassembler::CallbackDirective BasicBlockDecomposer::OnStartInstructionRun(
341 E : AbsoluteAddress start_address) {
342 : // The address of the beginning of the current basic block.
343 E : current_block_start_ = start_address;
344 E : DCHECK(current_instructions_.empty());
345 E : DCHECK(current_successors_.empty());
346 E : return kDirectiveContinue;
347 E : }
348 :
349 : // Called when a walk from a given entry point has terminated.
350 : Disassembler::CallbackDirective BasicBlockDecomposer::OnEndInstructionRun(
351 E : AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
352 : // If an otherwise straight run of instructions is split because it crosses
353 : // a basic block boundary we need to set up the implicit control flow arc
354 : // here.
355 E : if (control_flow == kControlFlowContinues) {
356 E : DCHECK(current_successors_.empty());
357 E : DCHECK(!current_instructions_.empty());
358 E : DCHECK(!current_instructions_.back().IsImplicitControlFlow());
359 :
360 : current_successors_.push_front(
361 : Successor(Successor::kConditionTrue,
362 : (addr + inst.size) - code_addr_, // To be resolved later.
363 : BasicBlock::kNoOffset,
364 E : 0));
365 : }
366 :
367 : // We have reached the end of the current walk or we handled a conditional
368 : // branch. Let's mark this as the end of a basic block.
369 E : size_t basic_block_size = addr - current_block_start_ + inst.size;
370 E : DCHECK_LT(0U, basic_block_size);
371 : if (!InsertBasicBlockRange(current_block_start_,
372 : basic_block_size,
373 E : BasicBlock::BASIC_CODE_BLOCK)) {
374 i : return kDirectiveAbort;
375 : }
376 :
377 E : return kDirectiveContinue;
378 E : }
379 :
380 E : Disassembler::CallbackDirective BasicBlockDecomposer::OnDisassemblyComplete() {
381 : // Split code blocks at branch targets.
382 E : if (!SplitCodeBlocksAtBranchTargets()) {
383 i : LOG(ERROR) << "Failed to split code blocks at branch targets.";
384 i : return kDirectiveAbort;
385 : }
386 :
387 : // By this point, we should have basic blocks for all visited code.
388 E : CheckAllJumpTargetsStartABasicCodeBlock();
389 :
390 : // Demarcate the data basic blocks. There should be no overlap with code.
391 E : if (!FillInDataBlocks()) {
392 i : LOG(ERROR) << "Failed to fill in data basic-block ranges.";
393 i : return kDirectiveAbort;
394 : }
395 :
396 : // We may not have covered some ranges of the macro block. For all such
397 : // ranges, build basic blocks and mark them as padding. This might
398 : // include unreachable code in unoptimized input binaries.
399 E : if (!FillInPaddingBlocks()) {
400 i : LOG(ERROR) << "Failed to fill in padding basic-block ranges.";
401 i : return kDirectiveAbort;
402 : }
403 :
404 : // We should now have contiguous block ranges that cover every byte in the
405 : // macro block. Verify that this is so.
406 E : CheckHasCompleteBasicBlockCoverage();
407 :
408 : // We should have propagated all of the labels in the original block into
409 : // the basic-block subgraph.
410 E : CheckAllLabelsArePreserved();
411 :
412 : // Populate the referrers in the basic block data structures by copying
413 : // them from the original source block.
414 E : if (!CopyExternalReferrers()) {
415 i : LOG(ERROR) << "Failed to populate basic-block referrers.";
416 i : return kDirectiveAbort;
417 : }
418 :
419 : // Populate the references in the basic block data structures by copying
420 : // them from the original source block. This does not handle the successor
421 : // references.
422 E : if (!CopyReferences()) {
423 i : LOG(ERROR) << "Failed to populate basic-block references.";
424 i : return kDirectiveAbort;
425 : }
426 :
427 : // Wire up the the basic-block successors. These are not handled by
428 : // CopyReferences(), above.
429 E : if (!ResolveSuccessors()) {
430 i : LOG(ERROR) << "Failed to resolve basic-block successors.";
431 i : return kDirectiveAbort;
432 : }
433 :
434 : // All the control flow we have derived should be valid.
435 E : CheckAllControlFlowIsValid();
436 :
437 : // ... and we're done.
438 E : return kDirectiveContinue;
439 E : }
440 :
441 E : void BasicBlockDecomposer::CheckAllJumpTargetsStartABasicCodeBlock() const {
442 E : if (!check_decomposition_results_)
443 i : return;
444 :
445 E : AddressSet::const_iterator addr_iter(jump_targets_.begin());
446 E : for (; addr_iter != jump_targets_.end(); ++addr_iter) {
447 : // The target basic-block should be a code basic-block.
448 E : BasicBlock* target_bb = subgraph_->GetBasicBlockAt(*addr_iter - code_addr_);
449 E : CHECK(target_bb != NULL);
450 E : CHECK_EQ(BasicBlock::BASIC_CODE_BLOCK, target_bb->type());
451 E : }
452 E : }
453 :
454 E : void BasicBlockDecomposer::CheckHasCompleteBasicBlockCoverage() const {
455 E : if (!check_decomposition_results_)
456 i : return;
457 :
458 : const BBAddressSpace& basic_block_address_space =
459 E : subgraph_->original_address_space();
460 :
461 : // Walk through the basic-block address space.
462 E : Offset next_start = 0;
463 E : RangeMapConstIter it(basic_block_address_space.begin());
464 E : for (; it != basic_block_address_space.end(); ++it) {
465 E : CHECK_EQ(it->first.start(), next_start);
466 E : CHECK_EQ(it->first.size(), it->second->size());
467 E : next_start += it->first.size();
468 E : }
469 :
470 : // At this point, if there were no gaps, next start will be the same as the
471 : // full size of the block we're decomposing.
472 E : CHECK_EQ(code_size_, static_cast<size_t>(next_start));
473 E : }
474 :
475 E : void BasicBlockDecomposer::CheckAllControlFlowIsValid() const {
476 E : if (!check_decomposition_results_)
477 i : return;
478 :
479 : // Check that the subgraph is valid. This will make sure that the
480 : // instructions and successors generally make sense.
481 E : CHECK(subgraph_->IsValid());
482 :
483 : // The only thing left to check is that synthesized flow-through
484 : // successors refer to the adjacent basic-blocks.
485 E : RangeMapConstIter it(subgraph_->original_address_space().begin());
486 E : for (; it != subgraph_->original_address_space().end(); ++it) {
487 E : const BasicBlock* bb = it->second;
488 E : if (bb->type() != BasicBlock::BASIC_CODE_BLOCK)
489 E : continue;
490 :
491 E : const BasicBlock::Instructions& instructions = bb->instructions();
492 E : const BasicBlock::Successors& successors = bb->successors();
493 :
494 : // There may be at most 2 successors.
495 E : switch (successors.size()) {
496 : case 0:
497 E : break;
498 :
499 : case 1:
500 : // If the successor is synthesized, then flow is from this basic-block
501 : // to the next adjacent one.
502 E : if (successors.back().instruction_offset() == -1) {
503 E : RangeMapConstIter next(it);
504 E : ++next;
505 E : CHECK(next != subgraph_->original_address_space().end());
506 E : CHECK_EQ(successors.back().reference().basic_block(), next->second);
507 E : }
508 E : break;
509 :
510 : case 2: {
511 : // Exactly one of the successors should have been synthesized.
512 E : bool front_synthesized = successors.front().instruction_offset() == -1;
513 E : bool back_synthesized = successors.back().instruction_offset() == -1;
514 E : CHECK_NE(front_synthesized, back_synthesized);
515 :
516 : // The synthesized successor flows from this basic-block to the next
517 : // adjacent one.
518 : const Successor& synthesized =
519 E : front_synthesized ? successors.front() : successors.back();
520 E : RangeMapConstIter next(it);
521 E : ++next;
522 E : CHECK(next != subgraph_->original_address_space().end());
523 E : CHECK_EQ(synthesized.reference().basic_block(), next->second);
524 E : break;
525 : }
526 :
527 : default:
528 i : NOTREACHED();
529 : }
530 E : }
531 E : }
532 :
533 E : void BasicBlockDecomposer::CheckAllLabelsArePreserved() const {
534 E : if (!check_decomposition_results_)
535 i : return;
536 :
537 E : const Block* original_block = subgraph_->original_block();
538 E : if (original_block == NULL)
539 i : return;
540 :
541 E : const Block::LabelMap original_labels = original_block->labels();
542 E : if (original_labels.empty())
543 i : return;
544 :
545 : // A map to track which labels (by offset) have been found in the subgraph.
546 E : std::map<Offset, bool> labels_found;
547 :
548 : // Initialize the map of labels found in the subgraph.
549 E : Block::LabelMap::const_iterator label_iter = original_labels.begin();
550 E : for (; label_iter != original_labels.end(); ++label_iter)
551 E : labels_found.insert(std::make_pair(label_iter->first, false));
552 :
553 : // Walk through the subgraph and mark all of the labels found.
554 : BasicBlockSubGraph::BBCollection::const_iterator bb_iter =
555 E : subgraph_->basic_blocks().begin();
556 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
557 : // Account for labels attached to basic-blocks.
558 E : const BasicBlock& bb = bb_iter->second;
559 E : if (bb.has_label()) {
560 E : BlockGraph::Label label;
561 E : CHECK(original_block->GetLabel(bb.offset(), &label));
562 E : CHECK(bb.label() == label);
563 E : labels_found[bb.offset()] = true;
564 E : }
565 :
566 : // Account for labels attached to instructions.
567 : BasicBlock::Instructions::const_iterator inst_iter =
568 E : bb.instructions().begin();
569 E : for (; inst_iter != bb.instructions().end(); ++inst_iter) {
570 E : const Instruction& inst = *inst_iter;
571 E : if (inst.has_label()) {
572 E : BlockGraph::Label label;
573 E : CHECK(original_block->GetLabel(inst.offset(), &label));
574 E : CHECK(inst.label() == label);
575 E : labels_found[inst.offset()] = true;
576 E : }
577 E : }
578 :
579 : // Account for labels attached to successors.
580 : BasicBlock::Successors::const_iterator succ_iter =
581 E : bb.successors().begin();
582 E : for (; succ_iter != bb.successors().end(); ++succ_iter) {
583 E : const Successor& succ = *succ_iter;
584 E : if (succ.has_label()) {
585 E : BlockGraph::Label label;
586 E : CHECK_NE(BasicBlock::kNoOffset, succ.instruction_offset());
587 E : CHECK(original_block->GetLabel(succ.instruction_offset(), &label));
588 E : CHECK(succ.label() == label);
589 E : labels_found[succ.instruction_offset()] = true;
590 E : }
591 E : }
592 E : }
593 :
594 : // We should have the right number of labels_found (check if we added
595 : // something to the wrong place).
596 E : CHECK_EQ(original_labels.size(), labels_found.size());
597 :
598 : // Make sure all of the items in labels_found have been set to true.
599 E : std::map<Offset, bool>::const_iterator found_iter = labels_found.begin();
600 E : for (; found_iter != labels_found.end(); ++found_iter) {
601 E : CHECK(found_iter->second);
602 E : }
603 E : }
604 :
605 : bool BasicBlockDecomposer::InsertBasicBlockRange(AbsoluteAddress addr,
606 : size_t size,
607 E : BasicBlockType type) {
608 E : DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_instructions_.empty());
609 E : DCHECK(type == BasicBlock::BASIC_CODE_BLOCK || current_successors_.empty());
610 :
611 E : BasicBlock::Offset offset = addr - code_addr_;
612 E : DCHECK_LE(0, offset);
613 :
614 : // Find or create a name for this basic block. Reserve the label, if any,
615 : // to propagate to the basic block if there are no instructions in the
616 : // block to carry the label(s).
617 E : BlockGraph::Label label;
618 E : std::string basic_block_name;
619 E : if (block_->GetLabel(offset, &label)) {
620 E : basic_block_name = label.ToString();
621 E : } else {
622 : basic_block_name =
623 : base::StringPrintf("<anonymous-%04X-%s>",
624 : addr.value(),
625 E : BasicBlock::BasicBlockTypeToString(type));
626 : }
627 :
628 : // Create the basic block.
629 : BasicBlock* new_basic_block = subgraph_->AddBasicBlock(
630 E : basic_block_name, type, offset, size, code_ + offset);
631 E : if (new_basic_block == NULL)
632 i : return false;
633 :
634 : // Code basic-blocks carry their labels in their instructions and successors.
635 : // Data basic-blocks carry their labels at the head of the basic blocks.
636 : // A padding basic-block might also be labeled if the block contains
637 : // unreachable code (for example, INT3 or NOP instructions following a call
638 : // to a non-returning function).
639 E : if (type != BasicBlock::BASIC_CODE_BLOCK && label.IsValid()) {
640 E : new_basic_block->set_label(label);
641 : }
642 :
643 : // Populate code basic-block with instructions and successors.
644 E : if (type == BasicBlock::BASIC_CODE_BLOCK) {
645 E : new_basic_block->instructions().swap(current_instructions_);
646 E : new_basic_block->successors().swap(current_successors_);
647 : }
648 :
649 E : return true;
650 E : }
651 :
652 E : bool BasicBlockDecomposer::SplitCodeBlocksAtBranchTargets() {
653 : // TODO(rogerm): Refactor the basic-block splitting inner-function to the
654 : // BasicBlockSubGraph. Note that the subgraph currently maintains a
655 : // picture of the original address space of the source block. This should
656 : // also be factored out; the original address space is only relevant to
657 : // the BasicBlockDecomposer.
658 E : AddressSet::const_iterator jump_target_iter(jump_targets_.begin());
659 E : for (; jump_target_iter != jump_targets_.end(); ++jump_target_iter) {
660 : // Resolve the target basic-block.
661 E : Offset target_offset = *jump_target_iter - code_addr_;
662 E : BasicBlock* target_bb = NULL;
663 E : BBAddressSpace::Range target_bb_range;
664 : CHECK(subgraph_->FindBasicBlock(target_offset, &target_bb,
665 E : &target_bb_range));
666 :
667 : // If we're jumping to the start of a basic block, there isn't any work
668 : // to do.
669 E : if (target_offset == target_bb_range.start())
670 E : continue;
671 :
672 : // Otherwise, we have found a basic-block that we need to split. Let's
673 : // create a backup copy of the target basic-block and remove the original
674 : // from the basic-block address space. We'll replace it with two new
675 : // blocks split at the target offset.
676 E : size_t left_split_size = target_offset - target_bb_range.start();
677 E : BasicBlock target_bb_copy(*target_bb);
678 E : subgraph_->original_address_space().Remove(target_bb_range);
679 E : subgraph_->basic_blocks().erase(target_bb->id());
680 E : target_bb = &target_bb_copy;
681 :
682 : // Now we split up containing_range into two new ranges and replace
683 : // containing_range with the two new entries.
684 :
685 : // Setup the first "half" of the basic block. Note that we are reusing
686 : // current_instructions_ and current_successors_ so that we can use
687 : // InsertBlockRange to create the new basic-blocks.
688 E : DCHECK(current_instructions_.empty());
689 E : DCHECK(current_successors_.empty());
690 : while (!target_bb->instructions().empty() &&
691 E : target_bb->instructions().front().offset() < target_offset) {
692 : current_instructions_.splice(current_instructions_.end(),
693 : target_bb->instructions(),
694 E : target_bb->instructions().begin());
695 E : }
696 :
697 : // The next offset (to an instruction or successor) should correspond to
698 : // the target offset.
699 E : if (!target_bb->instructions().empty()) {
700 E : DCHECK_EQ(target_offset, target_bb->instructions().front().offset());
701 E : } else {
702 i : DCHECK(!target_bb->successors().empty());
703 : DCHECK_EQ(target_offset,
704 i : target_bb->successors().front().instruction_offset());
705 : }
706 :
707 : // Set-up the flow-through successor for the first "half".
708 : current_successors_.push_back(Successor(
709 E : Successor::kConditionTrue, target_offset, BasicBlock::kNoOffset, 0));
710 :
711 : // Create the basic-block representing the first "half".
712 : if (!InsertBasicBlockRange(code_addr_ + target_bb_range.start(),
713 : left_split_size,
714 E : target_bb->type())) {
715 i : LOG(ERROR) << "Failed to insert first half of split block.";
716 i : return false;
717 : }
718 :
719 : // Create the basic-block representing the second "half".
720 E : DCHECK(current_instructions_.empty());
721 E : DCHECK(current_successors_.empty());
722 E : current_instructions_.swap(target_bb->instructions());
723 E : current_successors_.swap(target_bb->successors());
724 : if (!InsertBasicBlockRange(code_addr_ + target_offset,
725 : target_bb_range.size() - left_split_size,
726 E : target_bb->type())) {
727 i : LOG(ERROR) << "Failed to insert second half of split block.";
728 i : return false;
729 : }
730 E : }
731 :
732 E : return true;
733 E : }
734 :
735 E : bool BasicBlockDecomposer::FillInDataBlocks() {
736 E : BlockGraph::Block::LabelMap::const_iterator iter = block_->labels().begin();
737 E : BlockGraph::Block::LabelMap::const_iterator end = block_->labels().end();
738 E : for (; iter != end; ++iter) {
739 E : if (!iter->second.has_attributes(BlockGraph::DATA_LABEL))
740 E : continue;
741 :
742 E : BlockGraph::Block::LabelMap::const_iterator next = iter;
743 E : ++next;
744 :
745 E : BlockGraph::Offset bb_start = iter->first;
746 E : BlockGraph::Offset bb_end = (next == end) ? block_->size() : next->first;
747 E : size_t bb_size = bb_end - bb_start;
748 E : AbsoluteAddress bb_addr(code_addr_ + bb_start);
749 E : if (!InsertBasicBlockRange(bb_addr, bb_size, BasicBlock::BASIC_DATA_BLOCK))
750 i : return false;
751 E : }
752 E : return true;
753 E : }
754 :
755 E : bool BasicBlockDecomposer::FillInPaddingBlocks() {
756 : BBAddressSpace& basic_block_address_space =
757 E : subgraph_->original_address_space();
758 :
759 : // Add an initial interstitial if needed.
760 : size_t interstitial_size = basic_block_address_space.empty() ?
761 E : code_size_ : basic_block_address_space.begin()->first.start();
762 E : DCHECK_LE(0U, interstitial_size);
763 E : if (interstitial_size > 0) {
764 : if (!InsertBasicBlockRange(code_addr_,
765 : interstitial_size,
766 i : BasicBlock::BASIC_PADDING_BLOCK)) {
767 i : LOG(ERROR) << "Failed to insert initial padding block at 0";
768 i : return false;
769 : }
770 : }
771 :
772 : // Handle all remaining gaps, including the end.
773 E : RangeMapConstIter curr_range = basic_block_address_space.begin();
774 E : for (; curr_range != basic_block_address_space.end(); ++curr_range) {
775 E : RangeMapConstIter next_range = curr_range;
776 E : ++next_range;
777 : AbsoluteAddress curr_range_end =
778 E : code_addr_ + curr_range->first.start() + curr_range->first.size();
779 :
780 E : interstitial_size = 0;
781 E : if (next_range == basic_block_address_space.end()) {
782 E : DCHECK_LE(curr_range_end, code_addr_ + code_size_);
783 E : interstitial_size = code_addr_ + code_size_ - curr_range_end;
784 E : } else {
785 E : DCHECK_LE(curr_range_end, code_addr_ + next_range->first.start());
786 : interstitial_size =
787 E : code_addr_ + next_range->first.start() - curr_range_end;
788 : }
789 :
790 E : if (interstitial_size > 0) {
791 : if (!InsertBasicBlockRange(curr_range_end,
792 : interstitial_size,
793 E : BasicBlock::BASIC_PADDING_BLOCK)) {
794 i : LOG(ERROR) << "Failed to insert padding block at "
795 : << curr_range_end.value();
796 i : return false;
797 : }
798 : }
799 E : }
800 :
801 E : return true;
802 E : }
803 :
804 E : bool BasicBlockDecomposer::CopyExternalReferrers() {
805 E : const BlockGraph::Block::ReferrerSet& referrers = block_->referrers();
806 E : BlockGraph::Block::ReferrerSet::const_iterator iter = referrers.begin();
807 E : for (; iter != referrers.end(); ++iter) {
808 : // Find the reference this referrer record describes.
809 E : const BlockGraph::Block* referrer = iter->first;
810 E : DCHECK(referrer != NULL);
811 :
812 : // We only care about external referrers. All intra-block referrers and
813 : // references will be handled when we populate block_'s references for
814 : // basic code/padding blocks, instructions and successors.
815 E : if (referrer == block_)
816 E : continue;
817 :
818 : // This is an external referrer. Find the reference in the referring block.
819 E : Offset source_offset = iter->second;
820 E : BlockGraph::Reference reference;
821 E : bool found = referrer->GetReference(source_offset, &reference);
822 E : DCHECK(found);
823 :
824 : // Find the basic block the reference refers to. It can only have an
825 : // offset that's different from the base if it's not a code block.
826 E : BasicBlock* target_bb = subgraph_->GetBasicBlockAt(reference.base());
827 E : DCHECK(target_bb != NULL);
828 : DCHECK(reference.base() == reference.offset() ||
829 E : target_bb->type() != BasicBlock::BASIC_CODE_BLOCK);
830 :
831 : // Insert the referrer into the target bb's referrer set. Note that there
832 : // is no corresponding reference update to the referring block. The
833 : // target bb will track these so a BlockBuilder can properly update
834 : // the referrers when merging a subgraph back into the block-graph.
835 : bool inserted = target_bb->referrers().insert(
836 E : BasicBlockReferrer(referrer, source_offset)).second;
837 E : DCHECK(inserted);
838 E : }
839 :
840 E : return true;
841 E : }
842 :
843 : template<typename ItemType>
844 E : bool BasicBlockDecomposer::CopyReferences(ItemType* item) {
845 E : DCHECK(item != NULL);
846 :
847 : // Figure out the bounds of item.
848 E : BlockGraph::Offset start_offset = item->offset();
849 E : BlockGraph::Offset end_offset = start_offset + item->size();
850 :
851 : // Get iterators encompassing all references within the bounds of item.
852 : BlockGraph::Block::ReferenceMap::const_iterator ref_iter =
853 E : block_->references().lower_bound(start_offset);
854 : BlockGraph::Block::ReferenceMap::const_iterator end_iter =
855 E : block_->references().lower_bound(end_offset);
856 :
857 E : for (; ref_iter != end_iter; ++ref_iter) {
858 : // Calculate the local offset of this reference within item.
859 E : BlockGraph::Offset local_offset = ref_iter->first - start_offset;
860 E : const BlockGraph::Reference& reference = ref_iter->second;
861 :
862 : // We expect long references for everything except flow control.
863 E : CHECK_EQ(4U, reference.size());
864 E : DCHECK_LE(local_offset + reference.size(), item->GetMaxSize());
865 :
866 E : if (reference.referenced() != block_) {
867 : // For external references, we can directly reference the other block.
868 : bool inserted = item->SetReference(
869 : local_offset,
870 : BasicBlockReference(reference.type(), reference.size(),
871 : reference.referenced(), reference.offset(),
872 E : reference.base()));
873 E : DCHECK(inserted);
874 E : } else {
875 : // For intra block_ references, find the corresponding basic block in
876 : // the basic block address space.
877 E : BasicBlock* target_bb = subgraph_->GetBasicBlockAt(reference.base());
878 E : DCHECK(target_bb != NULL);
879 :
880 : // Create target basic-block relative values for the base and offset.
881 E : CHECK_EQ(reference.offset(), reference.base());
882 :
883 : // Insert a reference to the target basic block.
884 : bool inserted = item->SetReference(
885 : local_offset,
886 E : BasicBlockReference(reference.type(), reference.size(), target_bb));
887 E : DCHECK(inserted);
888 : }
889 E : }
890 E : return true;
891 E : }
892 :
893 E : bool BasicBlockDecomposer::CopyReferences() {
894 : // Copy the references for the source range of each basic-block (by
895 : // instruction for code basic-blocks). External referrers and successors are
896 : // handled in separate passes.
897 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
898 E : subgraph_->basic_blocks().begin();
899 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
900 E : BasicBlock* bb = &bb_iter->second;
901 E : if (bb->type() == BasicBlock::BASIC_CODE_BLOCK) {
902 E : BasicBlock::Instructions::iterator inst_iter = bb->instructions().begin();
903 E : for (; inst_iter != bb->instructions().end(); ++inst_iter) {
904 E : if (!CopyReferences(&(*inst_iter)))
905 i : return false;
906 E : }
907 E : } else {
908 E : if (!CopyReferences(bb))
909 i : return false;
910 : }
911 E : }
912 E : return true;
913 E : }
914 :
915 E : bool BasicBlockDecomposer::ResolveSuccessors() {
916 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
917 E : subgraph_->basic_blocks().begin();
918 E : for (; bb_iter != subgraph_->basic_blocks().end(); ++bb_iter) {
919 : // Only code basic-blocks have successors and instructions.
920 E : BasicBlock* bb = &bb_iter->second;
921 E : if (bb->type() != BasicBlock::BASIC_CODE_BLOCK) {
922 E : DCHECK(bb->successors().empty());
923 E : DCHECK(bb->instructions().empty());
924 E : continue;
925 : }
926 :
927 E : BasicBlock::Successors::iterator succ_iter = bb->successors().begin();
928 E : BasicBlock::Successors::iterator succ_iter_end = bb->successors().end();
929 E : for (; succ_iter != succ_iter_end; ++succ_iter) {
930 E : if (succ_iter->reference().IsValid()) {
931 : // The branch target is already resolved; it must have been inter-block.
932 E : DCHECK(succ_iter->reference().block() != block_);
933 E : DCHECK(succ_iter->reference().block() != NULL);
934 E : continue;
935 : }
936 :
937 : // Find the basic block the successor references.
938 : BasicBlock* target_bb =
939 E : subgraph_->GetBasicBlockAt(succ_iter->bb_target_offset());
940 E : DCHECK(target_bb != NULL);
941 :
942 : // We transform all successor branches into 4-byte pc-relative targets.
943 : bool inserted = succ_iter->SetReference(
944 E : BasicBlockReference(BlockGraph::PC_RELATIVE_REF, 4, target_bb));
945 E : DCHECK(inserted);
946 E : DCHECK(succ_iter->reference().IsValid());
947 E : }
948 E : }
949 :
950 E : return true;
951 E : }
952 :
953 : } // namespace block_graph
|