1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : #ifndef SYZYGY_GRINDER_GRINDERS_PROFILE_GRINDER_H_
15 : #define SYZYGY_GRINDER_GRINDERS_PROFILE_GRINDER_H_
16 :
17 : #include <dia2.h>
18 : #include <iostream>
19 : #include <map>
20 :
21 : #include "base/files/file_path.h"
22 : #include "base/win/scoped_comptr.h"
23 : #include "syzygy/grinder/grinder.h"
24 :
25 : namespace grinder {
26 : namespace grinders {
27 :
28 : typedef uint32 RVA;
29 :
30 : // A worker class to sink profile trace events and output the aggregate data in
31 : // KCacheGrind-compatible format.
32 : //
33 : // The profiler instrumentation captures the wall-clock time from entry to exit
34 : // for each pair of caller/function for each invocation. This is termed
35 : // "inclusive" time, as it includes the time spent in other functions called.
36 : //
37 : // The KCacheGrind file format also requires listing "exclusive" time for each
38 : // function, where exclusive time is the amount of time spent executing the
39 : // function itself, e.g. exclusive of the time spent calling other functions.
40 : //
41 : // The profile data is captured in a trace log. The trace log is a run of
42 : // records where each record in the log is associated with a particular thread
43 : // (and process), and contains a set of invocation records.
44 : // Each invocation record contains inclusive wall-clock time (and potentially
45 : // other inclusive metrics) for one or more invocations from a particular
46 : // caller address, to a particular function.
47 : // Note that the same caller/function pair may occur multiple times in a trace
48 : // log, even for the same thread, as the profile instrumentation spills a trace
49 : // record to the log when the number of caller/function pairs it's seen exceeds
50 : // the size of the thread-local buffer used to aggregate the data.
51 : //
52 : // This class aggregates the data from a trace log, and builds a graph of
53 : // function nodes and call edges. For each call edge, it aggregates the data
54 : // from one or more log records, by summing up the call counts and inclusive
55 : // metrics. For each function node, it also computes the exclusive cost, by
56 : // summing up the cost of the incoming edges, and subtracting the cost of the
57 : // outgoing edges.
58 : //
59 : // For information on the KCacheGrind file format, see:
60 : // http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
61 : class ProfileGrinder : public GrinderInterface {
62 : public:
63 : ProfileGrinder();
64 : ~ProfileGrinder();
65 :
66 : // @name Accessors and mutators.
67 : // @{
68 : // If thread_parts is true, the grinder will aggregate and output
69 : // separate parts for each thread seen in the trace file(s).
70 E : bool thread_parts() const { return thread_parts_; }
71 : void set_thread_parts(bool thread_parts) { thread_parts_ = thread_parts; }
72 : // @}
73 :
74 : // @name GrinderInterface implementation.
75 : // @{
76 : virtual bool ParseCommandLine(const CommandLine* command_line) OVERRIDE;
77 : virtual void SetParser(Parser* parser) OVERRIDE;
78 : virtual bool Grind() OVERRIDE;
79 : virtual bool OutputData(FILE* file) OVERRIDE;
80 : // @}
81 :
82 : // @name ParseEventHandler overrides.
83 : // @{
84 : virtual void OnInvocationBatch(
85 : base::Time time,
86 : DWORD process_id,
87 : DWORD thread_id,
88 : size_t num_invocations,
89 : const TraceBatchInvocationInfo* data) OVERRIDE;
90 : virtual void OnThreadName(base::Time time,
91 : DWORD process_id,
92 : DWORD thread_id,
93 : const base::StringPiece& thread_name) OVERRIDE;
94 : virtual void OnDynamicSymbol(DWORD process_id,
95 : uint32 symbol_id,
96 : const base::StringPiece& symbol_name) OVERRIDE;
97 : // @}
98 :
99 : protected:
100 : Parser* parser_;
101 :
102 : typedef sym_util::ModuleInformation ModuleInformation;
103 :
104 : // Forward declarations.
105 : struct PartData;
106 : class CodeLocation;
107 :
108 : // Represents the caller of a caller/callee pair.
109 : struct CallerLocation;
110 : // Represents the function of a caller/callee pair.
111 : struct FunctionLocation;
112 :
113 : struct Metrics;
114 : struct InvocationNode;
115 : struct InvocationEdge;
116 :
117 : // The key to the dynamic symbol map i
118 : typedef std::pair<uint32, uint32> DynamicSymbolKey;
119 : typedef std::map<DynamicSymbolKey, std::string> DynamicSymbolMap;
120 : typedef std::set<ModuleInformation,
121 : bool (*)(const ModuleInformation& a, const ModuleInformation& b)>
122 : ModuleInformationSet;
123 : typedef std::map<FunctionLocation, InvocationNode> InvocationNodeMap;
124 : typedef std::pair<FunctionLocation, CallerLocation> InvocationEdgeKey;
125 : typedef std::map<InvocationEdgeKey, InvocationEdge> InvocationEdgeMap;
126 :
127 : typedef base::win::ScopedComPtr<IDiaSession> SessionPtr;
128 : typedef std::map<const ModuleInformation*, SessionPtr> ModuleSessionMap;
129 :
130 : bool GetSessionForModule(const ModuleInformation* module,
131 : IDiaSession** session_out);
132 :
133 : // Finds or creates the part data for the given @p thread_id.
134 : PartData* FindOrCreatePart(DWORD process_id, DWORD thread_id);
135 :
136 : // Retrieves the function containing @p address.
137 : // @param symbol on success returns the function's private symbol, or
138 : // public symbol if no private symbol is available.
139 : // @returns true on success.
140 : bool GetFunctionSymbolByRVA(IDiaSession* session,
141 : RVA address,
142 : IDiaSymbol** symbol);
143 :
144 : // Resolves the function and line number a particular caller belongs to.
145 : // @param caller the location of the caller.
146 : // @param function on success returns the caller's function location.
147 : // @param line on success returns the caller's line number in @p function.
148 : bool GetFunctionForCaller(const CallerLocation& caller,
149 : FunctionLocation* function,
150 : size_t* line);
151 :
152 : bool GetInfoForFunction(const FunctionLocation& function,
153 : std::wstring* function_name,
154 : std::wstring* file_name,
155 : size_t* line);
156 :
157 : // Converts an absolute address to an RVA.
158 : void ConvertToModuleRVA(uint32 process_id,
159 : trace::parser::AbsoluteAddress64 addr,
160 : CodeLocation* rva);
161 :
162 : // Aggregates a single invocation info and/or creates a new node and edge.
163 : void AggregateEntryToPart(const FunctionLocation& function,
164 : const CallerLocation& caller,
165 : const InvocationInfo& info,
166 : PartData* part);
167 :
168 : // This functions adds all caller edges to each function node's linked list of
169 : // callers. In so doing, it also computes each function node's inclusive cost.
170 : // @returns true on success, false on failure.
171 : bool ResolveCallers();
172 :
173 : // Resolves callers for @p part.
174 : bool ResolveCallersForPart(PartData* part);
175 :
176 : // Outputs data for @p part to @p file.
177 : bool OutputDataForPart(const PartData& part, FILE* file);
178 :
179 : // Keeps track of the dynamic symbols seen.
180 : DynamicSymbolMap dynamic_symbols_;
181 :
182 : // Stores the modules we encounter.
183 : ModuleInformationSet modules_;
184 :
185 : // Stores the DIA session objects we have going for each module.
186 : ModuleSessionMap module_sessions_;
187 :
188 : // The parts we store. If thread_parts_ is false, we store only a single
189 : // part with id 0. The parts are keyed on process id/thread id.
190 : typedef std::pair<uint32, uint32> PartKey;
191 : typedef std::map<PartKey, PartData> PartDataMap;
192 : PartDataMap parts_;
193 :
194 : // If true, data is aggregated and output per-thread.
195 : bool thread_parts_;
196 : };
197 :
198 : // The data we store for each part.
199 : struct ProfileGrinder::PartData {
200 : PartData();
201 :
202 : // The thread name for this part.
203 : std::string thread_name_;
204 :
205 : // The process ID for this part.
206 : uint32 process_id_;
207 :
208 : // The thread ID for this part.
209 : uint32 thread_id_;
210 :
211 : // Stores the invocation nodes, aka the functions.
212 : InvocationNodeMap nodes_;
213 :
214 : // Stores the invocation edges.
215 : InvocationEdgeMap edges_;
216 : };
217 :
218 : // A code location is one of two things:
219 : //
220 : // 1. An RVA in a module, e.g. a module + offset.
221 : // 2. A ProcessId/SymbolId pair with an optional offset.
222 : //
223 : // The first represents native code, where module/RVA makes a canonical "name"
224 : // for a code location (whether function or call site) across multiple
225 : // processes. Note that the module should be a canonical pointer to the module
226 : // information to make this comparable against other RVAs in the same module.
227 : //
228 : // The second represents a dynamic symbol, which is always scoped by process
229 : // here represented by process id.
230 : class ProfileGrinder::CodeLocation {
231 : public:
232 : // Initializes an empty code location.
233 : CodeLocation();
234 :
235 : // Set to a symbol location with @p process_id, @p symbol_id and
236 : // @p symbol_offset.
237 : void Set(uint32 process_id, uint32 symbol_id, size_t symbol_offset);
238 : // Set to a module/rva location with @p module and @p rva.
239 : void Set(const sym_util::ModuleInformation* module, RVA rva);
240 :
241 : // Returns true iff the code location is valid.
242 : bool IsValid() { return is_symbol_ || (rva_ != 0 && module_ != NULL); }
243 :
244 : // Returns a human-readable string representing this instance.
245 : std::string ToString() const;
246 :
247 : // @name Accessors
248 : // @{
249 E : bool is_symbol() const { return is_symbol_; }
250 :
251 : // @name Only valid when is_symbol() == true.
252 E : uint32 process_id() const { return process_id_; }
253 E : uint32 symbol_id() const { return symbol_id_; }
254 E : size_t symbol_offset() const { return symbol_offset_; }
255 :
256 : // @name Only valid when is_symbol() == false.
257 E : const sym_util::ModuleInformation* module() const { return module_; }
258 E : RVA rva() const { return rva_; }
259 : // @}
260 :
261 : bool operator<(const CodeLocation& o) const;
262 : void operator=(const CodeLocation& o);
263 :
264 : bool operator>(const CodeLocation& o) const {
265 : return o < *this;
266 : }
267 E : bool operator==(const CodeLocation& o) const {
268 E : return !(o < *this || *this < o);
269 E : }
270 E : bool operator!=(const CodeLocation& o) const {
271 E : return !(*this == o);
272 E : }
273 :
274 : private:
275 : union {
276 : uint32 process_id_;
277 : const sym_util::ModuleInformation* module_;
278 : };
279 : union {
280 : RVA rva_;
281 : uint32 symbol_id_;
282 : };
283 : size_t symbol_offset_;
284 : bool is_symbol_;
285 : };
286 :
287 : // Reprents the address of a function.
288 : struct ProfileGrinder::FunctionLocation : public ProfileGrinder::CodeLocation {
289 : };
290 :
291 : // Reprents the address of a caller.
292 : struct ProfileGrinder::CallerLocation : public ProfileGrinder::CodeLocation {
293 : };
294 :
295 : // The metrics we capture per function and per caller.
296 : struct ProfileGrinder::Metrics {
297 E : Metrics() : num_calls(0), cycles_min(0), cycles_max(0), cycles_sum(0) {
298 E : }
299 :
300 : uint64 num_calls;
301 : uint64 cycles_min;
302 : uint64 cycles_max;
303 : uint64 cycles_sum;
304 : };
305 :
306 : // An invocation node represents a function.
307 : struct ProfileGrinder::InvocationNode {
308 E : InvocationNode() : first_call(NULL) {
309 E : }
310 :
311 : // Location of the function this instance represents.
312 : FunctionLocation function;
313 :
314 : // The metrics we've aggregated for this function.
315 : Metrics metrics;
316 :
317 : // Linked list of all the edges where the caller resolves to us.
318 : InvocationEdge* first_call;
319 : };
320 :
321 : // An invocation edge represents a caller->function pair.
322 : struct ProfileGrinder::InvocationEdge {
323 E : InvocationEdge() : caller_function(NULL), line(0), next_call(NULL) {
324 E : }
325 :
326 : // The function/caller pair we denote.
327 : FunctionLocation function;
328 : CallerLocation caller;
329 :
330 : // Line number of the caller.
331 : size_t line;
332 : Metrics metrics;
333 :
334 : // The calling function - resolved from caller.
335 : InvocationNode* caller_function;
336 : // Chains to the next edge resolving to the
337 : // same calling function.
338 : InvocationEdge* next_call;
339 : };
340 :
341 : } // namespace grinders
342 : } // namespace grinder
343 :
344 : #endif // SYZYGY_GRINDER_GRINDERS_PROFILE_GRINDER_H_
|