1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/reorder/basic_block_optimizer.h"
16 :
17 : #include <algorithm>
18 : #include <deque>
19 : #include <set>
20 :
21 : #include "syzygy/block_graph/basic_block.h"
22 : #include "syzygy/block_graph/basic_block_decomposer.h"
23 : #include "syzygy/block_graph/basic_block_subgraph.h"
24 : #include "syzygy/pe/find.h"
25 : #include "syzygy/pe/pe_utils.h"
26 :
27 : #include "mnemonics.h" // NOLINT
28 :
29 : namespace reorder {
30 :
31 : namespace {
32 :
33 : using block_graph::BlockGraph;
34 : using block_graph::ConstBlockVector;
35 : using block_graph::BasicBlock;
36 : using block_graph::BasicCodeBlock;
37 : using block_graph::BasicDataBlock;
38 : using block_graph::BasicEndBlock;
39 : using block_graph::BasicBlockDecomposer;
40 : using block_graph::BasicBlockSubGraph;
41 : using block_graph::Successor;
42 : using grinder::basic_block_util::EntryCountType;
43 : using grinder::basic_block_util::IndexedFrequencyInformation;
44 : using grinder::basic_block_util::IndexedFrequencyOffset;
45 : using grinder::basic_block_util::IndexedFrequencyMap;
46 : using grinder::basic_block_util::ModuleInformation;
47 : using grinder::basic_block_util::RelativeAddress;
48 : using grinder::basic_block_util::RelativeAddressRange;
49 : using grinder::basic_block_util::RelativeAddressRangeVector;
50 : using pe::PEFile;
51 : using pe::ImageLayout;
52 :
53 : typedef Reorderer::Order Order;
54 : typedef BlockGraph::Offset Offset;
55 : typedef BlockGraph::Size Size;
56 : typedef BlockGraph::AddressSpace::RangeMapConstIter RangeMapConstIter;
57 : typedef BlockGraph::AddressSpace::RangeMapConstIterPair RangeMapConstIterPair;
58 :
59 : const char kDefaultColdSectionName[] = ".ctext";
60 :
61 : // A helper to fill in (retaining relative ordering) any sections that are in
62 : // the original @p image_layout which are not mentioned in @p order.
63 : void PopulateMissingSections(
64 E : const ImageLayout& image_layout, Order* order) {
65 E : DCHECK(order != NULL);
66 :
67 : // Remember all of the sections mentioned by id.
68 E : std::set<BlockGraph::SectionId> mentioned;
69 E : for (size_t i = 0; i < order->sections.size(); ++i) {
70 E : if (order->sections[i].id != Order::SectionSpec::kNewSectionId) {
71 E : bool inserted = mentioned.insert(order->sections[i].id).second;
72 E : DCHECK(inserted);
73 : }
74 E : }
75 :
76 : // Iterate over all of the sections and append those not mentioned by ID.
77 E : for (BlockGraph::SectionId id = 0; id < image_layout.sections.size(); ++id) {
78 E : const ImageLayout::SectionInfo& info = image_layout.sections[id];
79 E : if (mentioned.count(id) == 0) {
80 E : order->sections.push_back(Order::SectionSpec());
81 E : order->sections.back().id = id;
82 E : order->sections.back().name = info.name;
83 E : order->sections.back().characteristics = info.characteristics;
84 : }
85 E : }
86 E : }
87 :
88 : // Initialize a collection of blocks which are explicitly mentioned by the
89 : // given @p order.
90 E : bool InitExplicitBlocks(const Order* order, ConstBlockVector* explicit_blocks) {
91 E : DCHECK(order != NULL);
92 E : DCHECK(explicit_blocks != NULL);
93 E : explicit_blocks->clear();
94 :
95 : // Put all of the explicitly referenced blocks into explicit_blocks.
96 E : for (size_t i = 0; i < order->sections.size(); ++i) {
97 E : const Order::SectionSpec& section = order->sections[i];
98 E : for (size_t k = 0; k < section.blocks.size(); ++k) {
99 E : if (!section.blocks[k].basic_block_offsets.empty()) {
100 i : LOG(ERROR) << "Expected a block-only ordering.";
101 i : return false;
102 : }
103 E : explicit_blocks->push_back(section.blocks[k].block);
104 E : }
105 E : }
106 :
107 : // If there are no explicit blocks then we are finished.
108 E : if (explicit_blocks->empty())
109 E : return true;
110 :
111 : // Sort the set of explicitly placed blocks. This lets us quickly check if
112 : // a block is in explicit_blocks using binary search.
113 E : std::sort(explicit_blocks->begin(), explicit_blocks->end());
114 :
115 : // Verify that no block has been mentioned more than once.
116 E : ConstBlockVector::const_iterator next_iter = explicit_blocks->begin();
117 E : ConstBlockVector::const_iterator end_iter = --explicit_blocks->end();
118 E : while (next_iter != end_iter) {
119 E : ConstBlockVector::const_iterator curr_iter = next_iter++;
120 E : if (*curr_iter == *next_iter) {
121 i : LOG(ERROR) << "Ordering references a block multiple times.";
122 i : return false;
123 : }
124 E : }
125 :
126 : // And we're done.
127 E : return true;
128 E : }
129 :
130 : // Check if @p block is in @p explicit_blocks.
131 : bool IsExplicitBlock(const ConstBlockVector& explicit_blocks,
132 E : const BlockGraph::Block* block) {
133 : return std::binary_search(explicit_blocks.begin(),
134 : explicit_blocks.end(),
135 E : block);
136 E : }
137 :
138 : // A helper to "cast" the given successor as a BasicCodeBlock.
139 E : const BasicCodeBlock* GetSuccessorBB(const Successor& successor) {
140 E : const BasicBlock* bb = successor.reference().basic_block();
141 :
142 : // This might be in inter block reference (i.e., refers to a block not
143 : // a basic-block).
144 E : if (bb == NULL)
145 E : return NULL;
146 :
147 : // If it's a basic-block then it must be a code basic-block.
148 E : const BasicCodeBlock* code_bb = BasicCodeBlock::Cast(bb);
149 E : DCHECK(code_bb != NULL);
150 E : return code_bb;
151 E : }
152 :
153 : typedef std::pair<EntryCountType, const BasicCodeBlock*> CountForBasicBlock;
154 :
155 : bool HasHigherEntryCount(const CountForBasicBlock& lhs,
156 E : const CountForBasicBlock& rhs) {
157 E : return lhs.first > rhs.first;
158 E : }
159 :
160 : bool GetEntryCountByOffset(const IndexedFrequencyInformation& entry_counts,
161 : const RelativeAddress& base_rva,
162 : Offset offset,
163 E : EntryCountType* entry_count) {
164 E : DCHECK_LE(0, offset);
165 E : DCHECK(entry_count != NULL);
166 :
167 E : *entry_count = 0;
168 E : IndexedFrequencyOffset key = std::make_pair(base_rva + offset, 0);
169 : IndexedFrequencyMap::const_iterator it =
170 E : entry_counts.frequency_map.find(key);
171 E : if (it != entry_counts.frequency_map.end())
172 E : *entry_count = it->second;
173 E : return true;
174 E : }
175 :
176 : } // namespace
177 :
178 : BasicBlockOptimizer::BasicBlockOrderer::BasicBlockOrderer(
179 : const BasicBlockSubGraph& subgraph,
180 : const RelativeAddress& addr,
181 : Size size,
182 : const IndexedFrequencyInformation& entry_counts)
183 : : subgraph_(subgraph),
184 : addr_(addr),
185 : size_(size),
186 E : entry_counts_(entry_counts) {
187 E : DCHECK_LT(0U, size);
188 E : }
189 :
190 : bool BasicBlockOptimizer::BasicBlockOrderer::GetBlockEntryCount(
191 E : EntryCountType* entry_count) const {
192 E : DCHECK(entry_count != NULL);
193 :
194 E : if (!GetEntryCountByOffset(entry_counts_, addr_, 0, entry_count))
195 i : return false;
196 :
197 E : return true;
198 E : }
199 :
200 : // TODO(rogerm): There are better longest path based orderings in the
201 : // literature, but they require more than just entry-count ordering.
202 : bool BasicBlockOptimizer::BasicBlockOrderer::GetBasicBlockOrderings(
203 : Order::OffsetVector* warm_basic_blocks,
204 E : Order::OffsetVector* cold_basic_blocks) const {
205 E : DCHECK(warm_basic_blocks != NULL);
206 E : DCHECK(cold_basic_blocks != NULL);
207 :
208 : // TODO(rogerm): Create weighted paths by entry-count following the successor
209 : // and predecessor links.
210 : //
211 : // Here's an outline for a potential improvement:
212 : // 1. Set the first bb aside.
213 : // 2. Find the bb with the largest entry_count, then expand out from
214 : // there by surrounding it with its largest predecessor and
215 : // successor. Continuing expanding on both ends until there are no
216 : // more successor/predecessor links to follow.
217 : // 3. If there are unplaced basic blocks, goto 2. This will create as
218 : // many chains as required to represent all of the bbs.
219 : // 4. The resulting ordering should be the first bb followed by each
220 : // path found above.
221 :
222 E : warm_basic_blocks->clear();
223 E : cold_basic_blocks->clear();
224 :
225 : // Place the basic-blocks in order such that each block is followed by its
226 : // successor having the greatest number of entries. We start with the bb
227 : // having offset 0.
228 E : DCHECK_EQ(1U, subgraph_.block_descriptions().size());
229 : const BasicBlockSubGraph::BasicBlockOrdering& original_order =
230 E : subgraph_.block_descriptions().front().basic_block_order;
231 :
232 : // Create a copy of the original ordering to serve as a queue. Henceforth
233 : // known as the BB Queue or bbq. Muahhahaha!
234 : std::deque<const BasicBlock*> bbq(original_order.begin(),
235 E : original_order.end());
236 E : DCHECK(!bbq.empty());
237 E : DCHECK_EQ(0, bbq.front()->offset());
238 :
239 : // The set of basic blocks that we have already placed. Warm basic blocks
240 : // may jump the queue if they were the most common successor of a previously
241 : // seen basic block.
242 E : BasicBlockSet placed_bbs;
243 :
244 : // The set of data basic-blocks referenced from an instruction in any
245 : // warm basic-block. This is used to determine where to place data blocks.
246 E : BasicBlockSet warm_references;
247 :
248 : // Consume the bbq.
249 E : bool have_seen_data_basic_block = false;
250 E : while (!bbq.empty()) {
251 : // Get the next basic block.
252 E : const BasicBlock* bb = bbq.front();
253 E : bbq.pop_front();
254 E : DCHECK(bb != NULL);
255 :
256 : // Insert it into the set of basic-blocks that have already been placed. If
257 : // the insertion is not a new element, then we've already handled this bb.
258 E : bool already_handled = placed_bbs.insert(bb).second == false;
259 E : if (already_handled)
260 E : continue;
261 :
262 : // If it's a code basic block then we send it to the warm or cold list
263 : // based on whether or not its entry count is zero. We also push its
264 : // most common successor to the head of bbq to let it be the next placed
265 : // basic-block. We should not see any more code basic blocks once we
266 : // have encountered a data basic block.
267 E : const BasicCodeBlock* code_bb = BasicCodeBlock::Cast(bb);
268 E : if (code_bb != NULL) {
269 E : DCHECK(!have_seen_data_basic_block);
270 :
271 E : EntryCountType entry_count = 0;
272 E : if (!GetBasicBlockEntryCount(code_bb, &entry_count)) {
273 i : LOG(ERROR) << "Failed to get entry count for " << code_bb->name()
274 : << " (offset=" << code_bb->offset() << ").";
275 i : return false;
276 : }
277 :
278 E : if (entry_count == 0) {
279 : // This is a cold basic-block. We simply add it to the cold
280 : // basic-block ordering.
281 : // TODO(rogerm): Sort cold blocks for minimal encoding size.
282 E : cold_basic_blocks->push_back(code_bb->offset());
283 E : } else {
284 : // This is a warm basic-block. Add it to the warm basic-block ordering.
285 E : warm_basic_blocks->push_back(code_bb->offset());
286 :
287 : // Remember the data blocks referenced from its instructions.
288 E : if (!AddWarmDataReferences(code_bb, &warm_references))
289 i : return false;
290 :
291 : // If the basic-block has one or more successors, schedule the warmest
292 : // not-yet-placed successor (if any) to be next. Otherwise, if the
293 : // basic-block ended with an indirect jump through a jump table, queue
294 : // up the destinations in decreasing entry count order.
295 E : if (!code_bb->successors().empty()) {
296 E : const BasicBlock* successor = NULL;
297 E : if (!GetWarmestSuccessor(code_bb, placed_bbs, &successor))
298 i : return false;
299 E : if (successor != NULL)
300 E : bbq.push_front(successor);
301 E : } else {
302 : // If the instruction is a jump, look for a jump table reference and
303 : // (if one is found) enqueue its referenced basic blocks in sorted
304 : // (by decreasing entry count) order.
305 E : DCHECK(!code_bb->instructions().empty());
306 E : const block_graph::Instruction& inst = code_bb->instructions().back();
307 E : if (inst.representation().opcode == I_JMP) {
308 E : std::vector<const BasicCodeBlock*> targets;
309 E : if (!GetSortedJumpTargets(inst, &targets))
310 i : return false;
311 E : bbq.insert(bbq.begin(), targets.begin(), targets.end());
312 E : }
313 : }
314 : }
315 : }
316 :
317 : // If it's a data basic block then we send it to the warm or cold list
318 : // depending on whether or not it was referenced by something in the warm
319 : // list. Note that the data basic-blocks are at the end of the basic-
320 : // block layout so we will have already seen all of the warm referrers
321 : // by the time this case succeeds.
322 E : const BasicDataBlock* data_bb = BasicDataBlock::Cast(bb);
323 E : if (data_bb != NULL) {
324 E : have_seen_data_basic_block = true;
325 E : if (warm_references.count(bb) != 0)
326 E : warm_basic_blocks->push_back(data_bb->offset());
327 E : else
328 E : cold_basic_blocks->push_back(data_bb->offset());
329 : }
330 :
331 : // If it's an end basic-block we simply ignore it.
332 E : const BasicEndBlock* end_bb = BasicEndBlock::Cast(bb);
333 :
334 E : DCHECK(code_bb != NULL || data_bb != NULL || end_bb != NULL);
335 E : }
336 :
337 : // TODO(rogerm): If we find that we haven't perturbed the basic-block
338 : // ordering then we could clear both lists and let the block simply
339 : // be copied/moved as is.
340 :
341 : DCHECK_EQ(subgraph_.basic_blocks().size(),
342 E : warm_basic_blocks->size() + cold_basic_blocks->size() + 1);
343 E : return true;
344 E : }
345 :
346 : bool BasicBlockOptimizer::BasicBlockOrderer::GetBasicBlockEntryCount(
347 E : const BasicCodeBlock* code_bb, EntryCountType* entry_count) const {
348 E : DCHECK(code_bb != NULL);
349 E : DCHECK(entry_count != NULL);
350 :
351 : if (!GetEntryCountByOffset(
352 E : entry_counts_, addr_, code_bb->offset(), entry_count)) {
353 i : return false;
354 : }
355 :
356 E : return true;
357 E : }
358 :
359 : bool BasicBlockOptimizer::BasicBlockOrderer::GetWarmestSuccessor(
360 : const BasicCodeBlock* code_bb,
361 : const BasicBlockSet& placed_bbs,
362 E : const BasicBlock** succ_bb) const {
363 E : DCHECK(code_bb != NULL);
364 E : DCHECK(succ_bb != NULL);
365 :
366 E : *succ_bb = NULL;
367 :
368 : // If there are no successors then there certainly isn't a warmest one.
369 E : if (code_bb->successors().empty())
370 i : return true;
371 :
372 : // If the first successor is a basic-block but it has already been seen,
373 : // then it is not a candidate for the warmest.
374 E : const BasicCodeBlock* succ1 = GetSuccessorBB(code_bb->successors().front());
375 E : if (succ1 != NULL && placed_bbs.count(succ1) != 0)
376 E : succ1 = NULL;
377 :
378 : // If that was the only successor, then return whatever we have thus far.
379 E : if (code_bb->successors().size() == 1) {
380 E : *succ_bb = succ1;
381 E : return true;
382 : }
383 :
384 E : DCHECK_EQ(2U, code_bb->successors().size());
385 :
386 : // If the second successor is a basic-block but it has already been seen,
387 : // then it is not a candidate for the warmest.
388 E : const BasicCodeBlock* succ2 = GetSuccessorBB(code_bb->successors().back());
389 E : if (succ2 != NULL && placed_bbs.count(succ2) != 0)
390 E : succ2 = NULL;
391 :
392 : // If the first successor is not a candidate, then we can return whatever we
393 : // have for the second successor.
394 E : if (succ1 == NULL) {
395 E : *succ_bb = succ2;
396 E : return true;
397 : }
398 :
399 E : DCHECK(succ1 != NULL);
400 :
401 : // If the second successor is not a candidate, then we can return whatever we
402 : // have for the first successor.
403 E : if (succ2 == NULL) {
404 E : *succ_bb = succ1;
405 E : return true;
406 : }
407 :
408 : // Both successors are valid candidates. We choose the one with the highest
409 : // entry count. Note that we keep the successors in the same order if the
410 : // entry counts are the same. By default we know that succ2 represents the
411 : // fall-through (branch-not-taken) path that immediately follows the
412 : // conditional.
413 E : DCHECK(succ1 != NULL);
414 E : DCHECK(succ2 != NULL);
415 :
416 E : EntryCountType succ1_entry_count = 0;
417 E : if (!GetBasicBlockEntryCount(succ1, &succ1_entry_count)) {
418 i : LOG(ERROR) << "Failed to get entry count for " << succ1->name()
419 : << " (offset=" << succ1->offset() << ").";
420 i : return false;
421 : }
422 :
423 E : EntryCountType succ2_entry_count = 0;
424 E : if (!GetBasicBlockEntryCount(succ2, &succ2_entry_count)) {
425 i : LOG(ERROR) << "Failed to get entry count for " << succ2->name()
426 : << " (offset=" << succ2->offset() << ").";
427 i : return false;
428 : }
429 :
430 E : if (succ1_entry_count > succ2_entry_count)
431 E : *succ_bb = succ1;
432 E : else
433 E : *succ_bb = succ2;
434 :
435 E : return true;
436 E : }
437 :
438 : bool BasicBlockOptimizer::BasicBlockOrderer::GetSortedJumpTargets(
439 : const block_graph::Instruction& jmp_inst,
440 E : std::vector<const BasicCodeBlock*>* targets) const {
441 E : DCHECK_EQ(I_JMP, jmp_inst.representation().opcode);
442 E : DCHECK(targets != NULL);
443 :
444 E : targets->clear();
445 :
446 : // We store the targets and their entry counts in a temporary vector that
447 : // we can sort by entry counts.
448 : typedef std::vector<CountForBasicBlock> TempTargets;
449 E : TempTargets temp_targets;
450 E : temp_targets.reserve(jmp_inst.references().size());
451 :
452 : // Find the jump-table reference.
453 : BasicBlock::BasicBlockReferenceMap::const_iterator ref_iter =
454 E : jmp_inst.references().begin();
455 E : for (; ref_iter != jmp_inst.references().end(); ++ref_iter) {
456 : // We're only interested in referred data basic blocks that are marked
457 : // as being a jump table.
458 : const BasicDataBlock* ref_bb =
459 E : BasicDataBlock::Cast(ref_iter->second.basic_block());
460 : if (ref_bb == NULL ||
461 : !ref_bb->label().IsValid() ||
462 E : !ref_bb->label().has_attributes(BlockGraph::JUMP_TABLE_LABEL)) {
463 i : continue;
464 : }
465 :
466 E : DCHECK(ref_bb != NULL);
467 E : DCHECK(ref_bb->label().IsValid());
468 E : DCHECK(ref_bb->label().has_attributes(BlockGraph::JUMP_TABLE_LABEL));
469 :
470 : // Populate temp_targets with each target and it's entry count.
471 : BasicBlock::BasicBlockReferenceMap::const_iterator target_iter =
472 E : ref_bb->references().begin();
473 E : for (; target_iter != ref_bb->references().end(); ++target_iter) {
474 : const BasicCodeBlock* target_bb =
475 E : BasicCodeBlock::Cast(target_iter->second.basic_block());
476 E : if (target_bb == NULL) {
477 i : LOG(ERROR) << "Found non-code-basic-block reference in a jump table.";
478 i : return false;
479 : }
480 : // Get the entry count.
481 E : EntryCountType entry_count = 0;
482 E : if (!GetBasicBlockEntryCount(target_bb, &entry_count))
483 i : return false;
484 :
485 : // Append the entry count and the target into the temp target vector.
486 E : temp_targets.push_back(std::make_pair(entry_count, target_bb));
487 E : }
488 E : }
489 :
490 : // Perform a stable sort of the temp target vector by decreasing entry count.
491 : std::stable_sort(
492 E : temp_targets.begin(), temp_targets.end(), &HasHigherEntryCount);
493 :
494 : // Copy the resulting basic block ordering into the target vector.
495 E : targets->reserve(temp_targets.size());
496 E : TempTargets::const_iterator temp_target_iter = temp_targets.begin();
497 E : for (; temp_target_iter != temp_targets.end(); ++temp_target_iter) {
498 E : if (temp_target_iter->first > 0)
499 E : targets->push_back(temp_target_iter->second);
500 E : }
501 :
502 E : return true;
503 E : }
504 :
505 : bool BasicBlockOptimizer::BasicBlockOrderer::AddWarmDataReferences(
506 E : const BasicCodeBlock* code_bb, BasicBlockSet* warm_references) const {
507 E : DCHECK(code_bb != NULL);
508 E : DCHECK(warm_references != NULL);
509 :
510 : // Iterate over all of the instructions.
511 : BasicBlock::Instructions::const_iterator inst_iter =
512 E : code_bb->instructions().begin();
513 E : for (; inst_iter != code_bb->instructions().end(); ++inst_iter) {
514 : // For each instruction, iterate over all references it makes.
515 : BasicBlock::BasicBlockReferenceMap::const_iterator ref_iter =
516 E : inst_iter->references().begin();
517 E : for (; ref_iter != inst_iter->references().end(); ++ref_iter) {
518 : // We're only interested in references that refer to basic-blocks.
519 E : const BasicBlock* bb = ref_iter->second.basic_block();
520 E : if (bb == NULL)
521 E : continue;
522 :
523 : // We tolerate code->code references only for the special case of
524 : // self-recursive functions.
525 E : const BasicCodeBlock* code_bb = BasicCodeBlock::Cast(bb);
526 E : if (code_bb != NULL) {
527 i : if (code_bb->offset() == 0) {
528 i : continue;
529 : }
530 :
531 i : DCHECK_NE(0, code_bb->offset());
532 i : LOG(ERROR) << "Invalid code to code reference from instruction.";
533 i : return false;
534 : }
535 :
536 : // For each data reference we recursively add all
537 : // basic data blocks reachable.
538 E : const BasicDataBlock* data_bb = BasicDataBlock::Cast(bb);
539 E : DCHECK(data_bb != NULL);
540 :
541 E : AddRecursiveDataReferences(data_bb, warm_references);
542 E : }
543 E : }
544 E : return true;
545 E : }
546 :
547 : void BasicBlockOptimizer::BasicBlockOrderer::AddRecursiveDataReferences(
548 E : const BasicDataBlock* data_bb, BasicBlockSet* warm_references) const {
549 E : DCHECK(data_bb != NULL);
550 E : DCHECK(warm_references != NULL);
551 :
552 : // Mark the current data basic-block as a warm reference. If this basic-
553 : // block is already in the warm references set, then we don't need to
554 : // recurse its references.
555 E : bool is_new_insertion = !warm_references->insert(data_bb).second;
556 E : if (!is_new_insertion)
557 E : return;
558 :
559 : // Otherwise, iterate over all of the references made from this data
560 : // basic-block. Note that the reference might point to code (a jump
561 : // table, for example). If the reference is to another data basic-block,
562 : // then recursively add its data references.
563 : BasicBlock::BasicBlockReferenceMap::const_iterator ref_iter =
564 i : data_bb->references().begin();
565 i : for (; ref_iter != data_bb->references().end(); ++ref_iter) {
566 : // We're only interested in references that refer to basic-blocks.
567 i : const BasicBlock* bb = ref_iter->second.basic_block();
568 i : if (bb == NULL)
569 i : continue;
570 : // We recurse into data basic-blocks.
571 i : const BasicDataBlock* referred_data_bb = BasicDataBlock::Cast(bb);
572 i : if (referred_data_bb != NULL)
573 i : AddRecursiveDataReferences(referred_data_bb, warm_references);
574 i : }
575 E : }
576 :
577 : BasicBlockOptimizer::BasicBlockOptimizer()
578 E : : cold_section_name_(kDefaultColdSectionName) {
579 E : }
580 :
581 : bool BasicBlockOptimizer::Optimize(
582 : const ImageLayout& image_layout,
583 : const IndexedFrequencyInformation& entry_counts,
584 E : Order* order) {
585 E : DCHECK(order != NULL);
586 :
587 : if (entry_counts.data_type !=
588 : ::common::IndexedFrequencyData::BASIC_BLOCK_ENTRY &&
589 : entry_counts.data_type !=
590 E : ::common::IndexedFrequencyData::BRANCH) {
591 i : LOG(ERROR) << "Invalid frequency data type.";
592 i : return false;
593 : }
594 :
595 : // Keep track of which blocks have been explicitly ordered. This will be used
596 : // when implicitly placing blocks.
597 E : ConstBlockVector explicit_blocks;
598 E : if (!InitExplicitBlocks(order, &explicit_blocks))
599 i : return false;
600 :
601 : // Fill in any sections which were not mentioned by the ordering.
602 E : PopulateMissingSections(image_layout, order);
603 :
604 : // Remember how many sections we started with.
605 E : size_t num_sections = order->sections.size();
606 :
607 : // Add a new section in which to put the cold blocks and basic-blocks.
608 E : order->sections.push_back(Order::SectionSpec());
609 E : Order::SectionSpec* cold_section_spec = &order->sections.back();
610 E : cold_section_spec->name = cold_section_name_;
611 E : cold_section_spec->id = Order::SectionSpec::kNewSectionId;
612 E : cold_section_spec->characteristics = pe::kCodeCharacteristics;
613 :
614 E : pe::PETransformPolicy policy;
615 :
616 : // Iterate over the sections in the original order and update their basic-
617 : // block orderings.
618 E : for (size_t i = 0; i < num_sections; ++i) {
619 E : Order::SectionSpec* section_spec = &order->sections[i];
620 E : Order::BlockSpecVector warm_block_specs;
621 E : Order::BlockSpecVector cold_block_specs;
622 :
623 : // Get the collection of warm and cold block spec for this section.
624 : if (!OptimizeSection(policy,
625 : image_layout,
626 : entry_counts,
627 : explicit_blocks,
628 : section_spec,
629 : &warm_block_specs,
630 E : &cold_block_specs)) {
631 i : return false;
632 : }
633 :
634 : // Replace the block specs in the original section with those found to
635 : // be warm, and append the cold blocks to the end of the cold section.
636 E : section_spec->blocks.swap(warm_block_specs);
637 : cold_section_spec->blocks.insert(cold_section_spec->blocks.end(),
638 : cold_block_specs.begin(),
639 E : cold_block_specs.end());
640 E : }
641 :
642 E : return true;
643 E : }
644 :
645 : // Get an ordered list of warm and cold basic blocks for the given @p block.
646 : bool BasicBlockOptimizer::OptimizeBlock(
647 : const pe::PETransformPolicy& policy,
648 : const BlockGraph::Block* block,
649 : const ImageLayout& image_layout,
650 : const IndexedFrequencyInformation& entry_counts,
651 : Order::BlockSpecVector* warm_block_specs,
652 E : Order::BlockSpecVector* cold_block_specs) {
653 E : DCHECK(block != NULL);
654 E : DCHECK(warm_block_specs != NULL);
655 E : DCHECK(cold_block_specs != NULL);
656 :
657 : // Leave data blocks untouched.
658 E : if (block->type() != BlockGraph::CODE_BLOCK) {
659 E : warm_block_specs->push_back(Order::BlockSpec(block));
660 E : return true;
661 : }
662 :
663 : // Find the start address of the block.
664 E : RelativeAddress addr;
665 E : if (!image_layout.blocks.GetAddressOf(block, &addr)) {
666 i : LOG(ERROR) << "Failed to find " << block->name() << " in image layout.";
667 i : return false;
668 : }
669 :
670 : // Determine the number of times the block has been entered. We use the
671 : // start of the block (with a zero offset) to find it's entry count.
672 E : EntryCountType entry_count = 0;
673 E : if (!GetEntryCountByOffset(entry_counts, addr, 0, &entry_count)) {
674 i : LOG(ERROR) << "Failed to find entry count for '" << block->name() << "'.";
675 i : return false;
676 : }
677 :
678 : // If the function was never invoked, we just move it as is to the cold set.
679 : // We have no information on which to base a basic-block optimization.
680 E : if (entry_count == 0) {
681 E : cold_block_specs->push_back(Order::BlockSpec(block));
682 E : return true;
683 : }
684 :
685 : // Handle non-decomposable code blocks as large opaque basic-blocks. We've
686 : // already established that the block is warm, so just place it as is.
687 E : if (!policy.BlockIsSafeToBasicBlockDecompose(block)) {
688 E : warm_block_specs->push_back(Order::BlockSpec(block));
689 E : return true;
690 : }
691 :
692 : // The function was called at least once and is basic-block decomposable.
693 : // Let's decompose and optimize it.
694 E : BasicBlockSubGraph subgraph;
695 E : BasicBlockDecomposer decomposer(block, &subgraph);
696 E : if (!decomposer.Decompose())
697 i : return false;
698 :
699 : // Create the basic-block orderer.
700 E : BasicBlockOrderer orderer(subgraph, addr, block->size(), entry_counts);
701 E : Order::OffsetVector warm_basic_blocks;
702 E : Order::OffsetVector cold_basic_blocks;
703 : if (!orderer.GetBasicBlockOrderings(&warm_basic_blocks,
704 E : &cold_basic_blocks)) {
705 i : return false;
706 : }
707 :
708 : // Note that we allow the ordering function to return an empty set of
709 : // warm basic-blocks. This denotes that the block should be placed into
710 : // the warm block specs without modification and also implies that there
711 : // should be no cold basic-blocks.
712 : //
713 : // Therefore the following should be true:
714 : // * If there are cold basic-blocks returned then there are also
715 : // warm basic-blocks returned.
716 : // * Either both returned sets are empty or the sum of the warm and
717 : // cold basic-blocks and an end-block equals the total number of
718 : // basic-blocks in the subgraph.
719 E : DCHECK(cold_basic_blocks.empty() || !warm_basic_blocks.empty());
720 : DCHECK((warm_basic_blocks.empty() && cold_basic_blocks.empty()) ||
721 : (warm_basic_blocks.size() + cold_basic_blocks.size() + 1 ==
722 E : subgraph.basic_blocks().size()));
723 :
724 : // We know the function was called at least once. Some part of it should
725 : // be into warm_block_specs.
726 E : warm_block_specs->push_back(Order::BlockSpec(block));
727 E : warm_block_specs->back().basic_block_offsets.swap(warm_basic_blocks);
728 :
729 : // But, there may or may not be a cold part.
730 E : if (!cold_basic_blocks.empty()) {
731 E : cold_block_specs->push_back(Order::BlockSpec(block));
732 E : cold_block_specs->back().basic_block_offsets.swap(cold_basic_blocks);
733 : }
734 :
735 E : return true;
736 E : }
737 :
738 : bool BasicBlockOptimizer::OptimizeSection(
739 : const pe::PETransformPolicy& policy,
740 : const ImageLayout& image_layout,
741 : const IndexedFrequencyInformation& entry_counts,
742 : const ConstBlockVector& explicit_blocks,
743 : Order::SectionSpec* orig_section_spec,
744 : Order::BlockSpecVector* warm_block_specs,
745 E : Order::BlockSpecVector* cold_block_specs) {
746 E : DCHECK(orig_section_spec != NULL);
747 E : DCHECK(warm_block_specs != NULL);
748 E : DCHECK(cold_block_specs != NULL);
749 :
750 : // Place all of the explicitly ordered blocks.
751 E : for (size_t i = 0; i < orig_section_spec->blocks.size(); ++i) {
752 E : Order::BlockSpec* block_spec = &orig_section_spec->blocks[i];
753 E : DCHECK(block_spec->block != NULL);
754 E : DCHECK(block_spec->basic_block_offsets.empty());
755 E : DCHECK(IsExplicitBlock(explicit_blocks, block_spec->block));
756 :
757 : if (!OptimizeBlock(policy,
758 : block_spec->block,
759 : image_layout,
760 : entry_counts,
761 : warm_block_specs,
762 E : cold_block_specs)) {
763 i : return false;
764 : }
765 E : }
766 :
767 : // If we are updating a preexisting section, then account for the rest of
768 : // the blocks in the section. We leave these in their original relative
769 : // ordering.
770 E : if (orig_section_spec->id != Order::SectionSpec::kNewSectionId) {
771 E : DCHECK_GT(image_layout.sections.size(), orig_section_spec->id);
772 : const ImageLayout::SectionInfo& section_info =
773 E : image_layout.sections[orig_section_spec->id];
774 :
775 : // Get an iterator pair denoting all of the blocks in the section.
776 : RangeMapConstIterPair iter_pair(
777 : image_layout.blocks.GetIntersectingBlocks(
778 E : RelativeAddress(section_info.addr), section_info.size));
779 E : for (RangeMapConstIter it = iter_pair.first; it != iter_pair.second; ++it) {
780 : // If the block is explicitly mentioned in the ordering then we don't
781 : // have to handle it here. Note that explicit_blocks is populated
782 : // before placing any blocks, so it already accounts for blocks that
783 : // move between sections.
784 E : if (IsExplicitBlock(explicit_blocks, it->second))
785 E : continue;
786 :
787 : // We apply the same optimization as for explicitly placed blocks.
788 : if (!OptimizeBlock(policy,
789 : it->second,
790 : image_layout,
791 : entry_counts,
792 : warm_block_specs,
793 E : cold_block_specs)) {
794 i : return false;
795 : }
796 E : }
797 : }
798 :
799 E : return true;
800 E : }
801 :
802 : } // namespace reorder
|