1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/optimize/application_profile.h"
16 :
17 : #include <map>
18 : #include <queue>
19 :
20 : #include "syzygy/grinder/basic_block_util.h"
21 :
22 : namespace optimize {
23 :
24 : namespace {
25 :
26 : using block_graph::BlockGraph;
27 : using block_graph::BasicBlockSubGraph;
28 : using grinder::basic_block_util::IndexedFrequencyMap;
29 : using grinder::basic_block_util::IndexedFrequencyOffset;
30 :
31 : typedef ApplicationProfile::BlockProfile BlockProfile;
32 : typedef BasicBlockSubGraph::BasicBlockOrdering BasicBlockOrdering;
33 : typedef BasicBlockSubGraph::BasicCodeBlock BasicCodeBlock;
34 : typedef BasicBlockSubGraph::BlockDescriptionList BlockDescriptionList;
35 : typedef BasicBlockSubGraph::BasicBlock::Successors Successors;
36 : typedef BlockGraph::Offset Offset;
37 : typedef core::RelativeAddress RelativeAddress;
38 : typedef grinder::basic_block_util::EntryCountType EntryCountType;
39 : typedef pe::ImageLayout ImageLayout;
40 : typedef SubGraphProfile::BasicBlockProfile BasicBlockProfile;
41 :
42 : const size_t kEntryCountColumn = 0;
43 : const size_t kBranchTakenColumn = 1;
44 : const size_t kMissPredColumn = 2;
45 :
46 : // Compare two profiles. Used by STL containers.
47 : struct BlockProfileCompare {
48 E : bool operator()(const BlockProfile* a, const BlockProfile* b) const {
49 E : DCHECK_NE(reinterpret_cast<const BlockProfile*>(NULL), a);
50 E : DCHECK_NE(reinterpret_cast<const BlockProfile*>(NULL), b);
51 :
52 E : if (a->temperature() < b->temperature())
53 E : return true;
54 i : if (a->temperature() > b->temperature())
55 i : return false;
56 i : return a->count() < b->count();
57 E : }
58 : };
59 :
60 : // Retrieve a frequency in the IndexedFrequencyMap for |rva + offset, column|.
61 : bool GetFrequencyByOffset(const IndexedFrequencyMap& frequencies,
62 : const RelativeAddress& base_rva,
63 : Offset offset,
64 : size_t column,
65 E : EntryCountType* entry_count) {
66 E : DCHECK_LE(0, offset);
67 E : DCHECK_NE(reinterpret_cast<EntryCountType*>(NULL), entry_count);
68 :
69 E : *entry_count = 0;
70 E : IndexedFrequencyOffset key = std::make_pair(base_rva + offset, column);
71 E : IndexedFrequencyMap::const_iterator it = frequencies.find(key);
72 E : if (it != frequencies.end()) {
73 E : *entry_count = it->second;
74 E : return true;
75 : }
76 E : return false;
77 E : }
78 :
79 : // Retrieve the RVA of a block by looking in the image layout.
80 : bool GetAddressOfBlock(const BlockGraph::Block* block,
81 : const ImageLayout& image_layout,
82 E : RelativeAddress* addr) {
83 E : DCHECK_NE(reinterpret_cast<RelativeAddress*>(NULL), addr);
84 :
85 : // Find the start address of the block.
86 E : if (!image_layout.blocks.GetAddressOf(block, addr)) {
87 i : LOG(ERROR) << "Failed to find " << block->name() << " in image layout.";
88 i : return false;
89 : }
90 :
91 E : return true;
92 E : }
93 :
94 : } // namespace
95 :
96 : ApplicationProfile::ApplicationProfile(const ImageLayout* image_layout)
97 E : : image_layout_(image_layout), global_temperature_(0.0) {
98 E : empty_profile_.reset(new BlockProfile());
99 E : }
100 :
101 : const BlockProfile* ApplicationProfile::GetBlockProfile(
102 E : const BlockGraph::Block* block) const {
103 E : ProfileMap::const_iterator it = profiles_.find(block->id());
104 E : if (it != profiles_.end())
105 E : return &it->second;
106 :
107 E : return empty_profile_.get();
108 E : }
109 :
110 E : bool ApplicationProfile::ComputeGlobalProfile() {
111 E : DCHECK_NE(reinterpret_cast<const ImageLayout*>(NULL), image_layout_);
112 E : const BlockGraph* graph = image_layout_->blocks.graph();
113 E : DCHECK_NE(reinterpret_cast<const BlockGraph*>(NULL), graph);
114 :
115 : // Compute global temperature.
116 E : IndexedFrequencyMap::const_iterator freq = frequencies_.begin();
117 E : for (; freq != frequencies_.end(); ++freq) {
118 E : if (freq->first.second == kEntryCountColumn)
119 E : global_temperature_ += freq->second;
120 E : }
121 :
122 : // Compute profile for each block.
123 E : const BlockGraph::BlockMap& blocks = graph->blocks();
124 E : BlockGraph::BlockMap::const_iterator it = blocks.begin();
125 E : for (; it != blocks.end(); ++it) {
126 E : const BlockGraph::Block* block = &it->second;
127 E : bool valid = true;
128 :
129 : // Get the current block address.
130 E : RelativeAddress addr;
131 E : valid = GetAddressOfBlock(block, *image_layout_, &addr);
132 E : DCHECK(valid);
133 :
134 : // Retrieve the execution count of this function.
135 E : EntryCountType entry_count = 0;
136 : valid = GetFrequencyByOffset(frequencies_, addr, 0, kEntryCountColumn,
137 E : &entry_count);
138 :
139 : // Function is never executed.
140 E : if (!valid)
141 E : continue;
142 :
143 : // Compute the block temperature.
144 E : double temperature = 0;
145 E : IndexedFrequencyOffset key = std::make_pair(addr, kEntryCountColumn);
146 E : IndexedFrequencyMap::const_iterator it = frequencies_.find(key);
147 E : for (; it != frequencies_.end(); ++it) {
148 E : if (addr + block->size() <= it->first.first)
149 E : break;
150 E : if (it->first.second == kEntryCountColumn)
151 E : temperature += it->second;
152 E : }
153 :
154 : // An executed function must have a temperature higher than zero.
155 E : DCHECK_LT(0.0, temperature);
156 :
157 : // Insert the block profile into the profile map.
158 E : BlockProfile block_profile(entry_count, temperature);
159 :
160 : std::pair<ProfileMap::iterator, bool> result =
161 E : profiles_.insert(std::make_pair(block->id(), block_profile));
162 E : DCHECK(result.second);
163 E : }
164 :
165 : // Build a heap of profiles ordered by temperature.
166 : typedef std::priority_queue<BlockProfile*,
167 : std::vector<BlockProfile*>,
168 : BlockProfileCompare> ProfileQueue;
169 E : ProfileQueue queue;
170 E : ProfileMap::iterator profile = profiles_.begin();
171 E : for (; profile != profiles_.end(); ++profile)
172 E : queue.push(&profile->second);
173 :
174 : // Update the percentile by temperature order.
175 E : double sum = 0;
176 E : while (!queue.empty()) {
177 E : BlockProfile* top = queue.top();
178 E : queue.pop();
179 E : DCHECK_NE(reinterpret_cast<const BlockProfile*>(NULL), top);
180 :
181 E : top->set_percentile(sum / global_temperature_);
182 E : sum += top->temperature();
183 E : }
184 :
185 : // Force block never executed to be at the last percentile.
186 E : empty_profile_->set_percentile(1.0);
187 :
188 E : return true;
189 E : }
190 :
191 : bool ApplicationProfile::ImportFrequencies(
192 E : const IndexedFrequencyMap& frequencies) {
193 : // TODO(etienneb): Support importing multiple sets.
194 E : frequencies_ = frequencies;
195 E : return true;
196 E : }
197 :
198 : void ApplicationProfile::ComputeSubGraphProfile(
199 : const BasicBlockSubGraph* subgraph,
200 E : scoped_ptr<SubGraphProfile>* profile) {
201 E : DCHECK_NE(reinterpret_cast<const BasicBlockSubGraph*>(NULL), subgraph);
202 E : DCHECK_NE(reinterpret_cast<scoped_ptr<SubGraphProfile>*>(NULL), profile);
203 :
204 : // Create the resulting subgraph profile.
205 E : profile->reset(new SubGraphProfile());
206 :
207 : // Retrieve the original block.
208 E : const BlockGraph::Block* block = subgraph->original_block();
209 E : DCHECK_NE(reinterpret_cast<const BlockGraph::Block*>(NULL), block);
210 :
211 : // Get the current block address.
212 E : RelativeAddress addr;
213 E : bool valid = GetAddressOfBlock(block, *image_layout_, &addr);
214 E : DCHECK(valid);
215 :
216 E : const BlockDescriptionList& descriptions = subgraph->block_descriptions();
217 E : BlockDescriptionList::const_iterator descr_iter = descriptions.begin();
218 E : for (; descr_iter != descriptions.end(); ++descr_iter) {
219 E : const BasicBlockOrdering& original_order = descr_iter->basic_block_order;
220 E : BasicBlockOrdering::const_iterator order = original_order.begin();
221 E : for (; order != original_order.end(); ++order) {
222 : // Get the basic block.
223 E : const BasicCodeBlock* bb = BasicCodeBlock::Cast(*order);
224 E : if (bb == NULL)
225 E : continue;
226 :
227 : // Retrieve basic block information.
228 E : Offset offset = bb->offset();
229 E : EntryCountType count = 0;
230 E : EntryCountType taken = 0;
231 E : EntryCountType mispredicted = 0;
232 : GetFrequencyByOffset(frequencies_, addr, offset, kEntryCountColumn,
233 E : &count);
234 : GetFrequencyByOffset(frequencies_, addr, offset, kBranchTakenColumn,
235 E : &taken);
236 : GetFrequencyByOffset(frequencies_, addr, offset, kMissPredColumn,
237 E : &mispredicted);
238 :
239 E : DCHECK_GE(count, taken);
240 E : EntryCountType untaken = (count - taken);
241 :
242 : // Fill the basic block profile with the information.
243 E : BasicBlockProfile& bb_profile = (*profile)->basic_blocks_[bb];
244 E : bb_profile.count_ = count;
245 E : bb_profile.mispredicted_ = mispredicted;
246 :
247 : // Fill successors information.
248 E : BasicBlockOrdering::const_iterator next_order = order;
249 E : ++next_order;
250 E : const Successors& successors = bb->successors();
251 E : Successors::const_iterator succ = successors.begin();
252 E : for (; succ != successors.end(); ++succ) {
253 : const BasicCodeBlock* next_bb =
254 E : BasicCodeBlock::Cast(succ->reference().basic_block());
255 : bool is_untaken = (next_order != original_order.end() &&
256 E : BasicCodeBlock::Cast(*next_order) == next_bb);
257 E : bb_profile.successors_[next_bb] = (is_untaken ? untaken : taken);
258 E : }
259 E : }
260 E : }
261 E : }
262 :
263 E : SubGraphProfile::SubGraphProfile() {
264 E : empty_profile_.reset(new BasicBlockProfile());
265 E : }
266 :
267 : const BasicBlockProfile* SubGraphProfile::GetBasicBlockProfile(
268 E : const BasicCodeBlock* block) const {
269 E : DCHECK_NE(reinterpret_cast<const BasicCodeBlock*>(NULL), block);
270 : DCHECK_NE(reinterpret_cast<const BasicBlockProfile*>(NULL),
271 E : empty_profile_.get());
272 E : BasicBlockProfileMap::const_iterator look = basic_blocks_.find(block);
273 E : if (look == basic_blocks_.end())
274 E : return empty_profile_.get();
275 E : return &look->second;
276 E : }
277 :
278 E : double SubGraphProfile::BasicBlockProfile::GetMispredictedRatio() const {
279 E : return ((double)mispredicted_) / count_;
280 E : }
281 :
282 : EntryCountType SubGraphProfile::BasicBlockProfile::GetSuccessorCount(
283 E : const BasicCodeBlock* successor) const {
284 E : DCHECK_NE(reinterpret_cast<const BasicCodeBlock*>(NULL), successor);
285 E : SuccessorsCountMap::const_iterator look = successors_.find(successor);
286 E : if (look != successors_.end())
287 E : return look->second;
288 E : return 0;
289 E : }
290 :
291 : double SubGraphProfile::BasicBlockProfile::GetSuccessorRatio(
292 E : const BasicCodeBlock* successor) const {
293 E : DCHECK_NE(reinterpret_cast<const BasicCodeBlock*>(NULL), successor);
294 E : return ((double)GetSuccessorCount(successor)) / count_;
295 E : }
296 :
297 : } // namespace optimize
|