1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : #ifndef SYZYGY_GRINDER_GRINDERS_PROFILE_GRINDER_H_
15 : #define SYZYGY_GRINDER_GRINDERS_PROFILE_GRINDER_H_
16 :
17 : #include <dia2.h>
18 : #include <iostream>
19 : #include <map>
20 :
21 : #include "base/files/file_path.h"
22 : #include "base/win/scoped_comptr.h"
23 : #include "syzygy/grinder/grinder.h"
24 :
25 : namespace grinder {
26 : namespace grinders {
27 :
28 : typedef uint32 RVA;
29 :
30 : // A worker class to sink profile trace events and output the aggregate data in
31 : // KCacheGrind-compatible format.
32 : //
33 : // The profiler instrumentation captures the wall-clock time from entry to exit
34 : // for each pair of caller/function for each invocation. This is termed
35 : // "inclusive" time, as it includes the time spent in other functions called.
36 : //
37 : // The KCacheGrind file format also requires listing "exclusive" time for each
38 : // function, where exclusive time is the amount of time spent executing the
39 : // function itself, e.g. exclusive of the time spent calling other functions.
40 : //
41 : // The profile data is captured in a trace log. The trace log is a run of
42 : // records where each record in the log is associated with a particular thread
43 : // (and process), and contains a set of invocation records.
44 : // Each invocation record contains inclusive wall-clock time (and potentially
45 : // other inclusive metrics) for one or more invocations from a particular
46 : // caller address, to a particular function.
47 : // Note that the same caller/function pair may occur multiple times in a trace
48 : // log, even for the same thread, as the profile instrumentation spills a trace
49 : // record to the log when the number of caller/function pairs it's seen exceeds
50 : // the size of the thread-local buffer used to aggregate the data.
51 : //
52 : // This class aggregates the data from a trace log, and builds a graph of
53 : // function nodes and call edges. For each call edge, it aggregates the data
54 : // from one or more log records, by summing up the call counts and inclusive
55 : // metrics. For each function node, it also computes the exclusive cost, by
56 : // summing up the cost of the incoming edges, and subtracting the cost of the
57 : // outgoing edges.
58 : //
59 : // For information on the KCacheGrind file format, see:
60 : // http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
61 : class ProfileGrinder : public GrinderInterface {
62 : public:
63 : ProfileGrinder();
64 : ~ProfileGrinder();
65 :
66 : // @name Accessors and mutators.
67 : // @{
68 : // If thread_parts is true, the grinder will aggregate and output
69 : // separate parts for each thread seen in the trace file(s).
70 E : bool thread_parts() const { return thread_parts_; }
71 E : void set_thread_parts(bool thread_parts) { thread_parts_ = thread_parts; }
72 : // @}
73 :
74 : // @name GrinderInterface implementation.
75 : // @{
76 : virtual bool ParseCommandLine(const base::CommandLine* command_line) override;
77 : virtual void SetParser(Parser* parser) override;
78 : virtual bool Grind() override;
79 : virtual bool OutputData(FILE* file) override;
80 : // @}
81 :
82 : // @name ParseEventHandler overrides.
83 : // @{
84 : virtual void OnInvocationBatch(base::Time time,
85 : DWORD process_id,
86 : DWORD thread_id,
87 : size_t num_invocations,
88 : const TraceBatchInvocationInfo* data) override;
89 : virtual void OnThreadName(base::Time time,
90 : DWORD process_id,
91 : DWORD thread_id,
92 : const base::StringPiece& thread_name) override;
93 : virtual void OnDynamicSymbol(DWORD process_id,
94 : uint32 symbol_id,
95 : const base::StringPiece& symbol_name) override;
96 : // @}
97 :
98 : protected:
99 : Parser* parser_;
100 :
101 : typedef pe::ModuleInformation ModuleInformation;
102 :
103 : // Forward declarations.
104 : struct PartData;
105 : class CodeLocation;
106 :
107 : // Represents the caller of a caller/callee pair.
108 : struct CallerLocation;
109 : // Represents the function of a caller/callee pair.
110 : struct FunctionLocation;
111 :
112 : struct Metrics;
113 : struct InvocationNode;
114 : struct InvocationEdge;
115 :
116 : // The key to the dynamic symbol map i
117 : typedef std::pair<uint32, uint32> DynamicSymbolKey;
118 : typedef std::map<DynamicSymbolKey, std::string> DynamicSymbolMap;
119 : typedef std::set<ModuleInformation,
120 : bool (*)(const ModuleInformation& a, const ModuleInformation& b)>
121 : ModuleInformationSet;
122 : typedef std::map<FunctionLocation, InvocationNode> InvocationNodeMap;
123 : typedef std::pair<FunctionLocation, CallerLocation> InvocationEdgeKey;
124 : typedef std::map<InvocationEdgeKey, InvocationEdge> InvocationEdgeMap;
125 :
126 : typedef base::win::ScopedComPtr<IDiaSession> SessionPtr;
127 : typedef std::map<const ModuleInformation*, SessionPtr> ModuleSessionMap;
128 :
129 : bool GetSessionForModule(const ModuleInformation* module,
130 : IDiaSession** session_out);
131 :
132 : // Finds or creates the part data for the given @p thread_id.
133 : PartData* FindOrCreatePart(DWORD process_id, DWORD thread_id);
134 :
135 : // Retrieves the function containing @p address.
136 : // @param symbol on success returns the function's private symbol, or
137 : // public symbol if no private symbol is available.
138 : // @returns true on success.
139 : bool GetFunctionSymbolByRVA(IDiaSession* session,
140 : RVA address,
141 : IDiaSymbol** symbol);
142 :
143 : // Resolves the function and line number a particular caller belongs to.
144 : // @param caller the location of the caller.
145 : // @param function on success returns the caller's function location.
146 : // @param line on success returns the caller's line number in @p function.
147 : bool GetFunctionForCaller(const CallerLocation& caller,
148 : FunctionLocation* function,
149 : size_t* line);
150 :
151 : bool GetInfoForFunction(const FunctionLocation& function,
152 : std::wstring* function_name,
153 : std::wstring* file_name,
154 : size_t* line);
155 :
156 : // Converts an absolute address to an RVA.
157 : void ConvertToModuleRVA(uint32 process_id,
158 : trace::parser::AbsoluteAddress64 addr,
159 : CodeLocation* rva);
160 :
161 : // Aggregates a single invocation info and/or creates a new node and edge.
162 : void AggregateEntryToPart(const FunctionLocation& function,
163 : const CallerLocation& caller,
164 : const InvocationInfo& info,
165 : PartData* part);
166 :
167 : // This functions adds all caller edges to each function node's linked list of
168 : // callers. In so doing, it also computes each function node's inclusive cost.
169 : // @returns true on success, false on failure.
170 : bool ResolveCallers();
171 :
172 : // Resolves callers for @p part.
173 : bool ResolveCallersForPart(PartData* part);
174 :
175 : // Outputs data for @p part to @p file.
176 : bool OutputDataForPart(const PartData& part, FILE* file);
177 :
178 : // Keeps track of the dynamic symbols seen.
179 : DynamicSymbolMap dynamic_symbols_;
180 :
181 : // Stores the modules we encounter.
182 : ModuleInformationSet modules_;
183 :
184 : // Stores the DIA session objects we have going for each module.
185 : ModuleSessionMap module_sessions_;
186 :
187 : // The parts we store. If thread_parts_ is false, we store only a single
188 : // part with id 0. The parts are keyed on process id/thread id.
189 : typedef std::pair<uint32, uint32> PartKey;
190 : typedef std::map<PartKey, PartData> PartDataMap;
191 : PartDataMap parts_;
192 :
193 : // If true, data is aggregated and output per-thread.
194 : bool thread_parts_;
195 : };
196 :
197 : // The data we store for each part.
198 : struct ProfileGrinder::PartData {
199 : PartData();
200 :
201 : // The thread name for this part.
202 : std::string thread_name_;
203 :
204 : // The process ID for this part.
205 : uint32 process_id_;
206 :
207 : // The thread ID for this part.
208 : uint32 thread_id_;
209 :
210 : // Stores the invocation nodes, aka the functions.
211 : InvocationNodeMap nodes_;
212 :
213 : // Stores the invocation edges.
214 : InvocationEdgeMap edges_;
215 : };
216 :
217 : // A code location is one of two things:
218 : //
219 : // 1. An RVA in a module, e.g. a module + offset.
220 : // 2. A ProcessId/SymbolId pair with an optional offset.
221 : //
222 : // The first represents native code, where module/RVA makes a canonical "name"
223 : // for a code location (whether function or call site) across multiple
224 : // processes. Note that the module should be a canonical pointer to the module
225 : // information to make this comparable against other RVAs in the same module.
226 : //
227 : // The second represents a dynamic symbol, which is always scoped by process
228 : // here represented by process id.
229 : class ProfileGrinder::CodeLocation {
230 : public:
231 : // Initializes an empty code location.
232 : CodeLocation();
233 :
234 : // Set to a symbol location with @p process_id, @p symbol_id and
235 : // @p symbol_offset.
236 : void Set(uint32 process_id, uint32 symbol_id, size_t symbol_offset);
237 : // Set to a module/rva location with @p module and @p rva.
238 : void Set(const pe::ModuleInformation* module, RVA rva);
239 :
240 : // Returns true iff the code location is valid.
241 : bool IsValid() { return is_symbol_ || (rva_ != 0 && module_ != NULL); }
242 :
243 : // Returns a human-readable string representing this instance.
244 : std::string ToString() const;
245 :
246 : // @name Accessors
247 : // @{
248 E : bool is_symbol() const { return is_symbol_; }
249 :
250 : // @name Only valid when is_symbol() == true.
251 E : uint32 process_id() const { return process_id_; }
252 E : uint32 symbol_id() const { return symbol_id_; }
253 E : size_t symbol_offset() const { return symbol_offset_; }
254 :
255 : // @name Only valid when is_symbol() == false.
256 E : const pe::ModuleInformation* module() const { return module_; }
257 E : RVA rva() const { return rva_; }
258 : // @}
259 :
260 : bool operator<(const CodeLocation& o) const;
261 : void operator=(const CodeLocation& o);
262 :
263 : bool operator>(const CodeLocation& o) const {
264 : return o < *this;
265 : }
266 E : bool operator==(const CodeLocation& o) const {
267 E : return !(o < *this || *this < o);
268 E : }
269 E : bool operator!=(const CodeLocation& o) const {
270 E : return !(*this == o);
271 E : }
272 :
273 : private:
274 : union {
275 : uint32 process_id_;
276 : const pe::ModuleInformation* module_;
277 : };
278 : union {
279 : RVA rva_;
280 : uint32 symbol_id_;
281 : };
282 : size_t symbol_offset_;
283 : bool is_symbol_;
284 : };
285 :
286 : // Reprents the address of a function.
287 : struct ProfileGrinder::FunctionLocation : public ProfileGrinder::CodeLocation {
288 : };
289 :
290 : // Reprents the address of a caller.
291 : struct ProfileGrinder::CallerLocation : public ProfileGrinder::CodeLocation {
292 : };
293 :
294 : // The metrics we capture per function and per caller.
295 : struct ProfileGrinder::Metrics {
296 E : Metrics() : num_calls(0), cycles_min(0), cycles_max(0), cycles_sum(0) {
297 E : }
298 :
299 : uint64 num_calls;
300 : uint64 cycles_min;
301 : uint64 cycles_max;
302 : uint64 cycles_sum;
303 : };
304 :
305 : // An invocation node represents a function.
306 : struct ProfileGrinder::InvocationNode {
307 E : InvocationNode() : first_call(NULL) {
308 E : }
309 :
310 : // Location of the function this instance represents.
311 : FunctionLocation function;
312 :
313 : // The metrics we've aggregated for this function.
314 : Metrics metrics;
315 :
316 : // Linked list of all the edges where the caller resolves to us.
317 : InvocationEdge* first_call;
318 : };
319 :
320 : // An invocation edge represents a caller->function pair.
321 : struct ProfileGrinder::InvocationEdge {
322 E : InvocationEdge() : caller_function(NULL), line(0), next_call(NULL) {
323 E : }
324 :
325 : // The function/caller pair we denote.
326 : FunctionLocation function;
327 : CallerLocation caller;
328 :
329 : // Line number of the caller.
330 : size_t line;
331 : Metrics metrics;
332 :
333 : // The calling function - resolved from caller.
334 : InvocationNode* caller_function;
335 : // Chains to the next edge resolving to the
336 : // same calling function.
337 : InvocationEdge* next_call;
338 : };
339 :
340 : } // namespace grinders
341 : } // namespace grinder
342 :
343 : #endif // SYZYGY_GRINDER_GRINDERS_PROFILE_GRINDER_H_
|