1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : #ifndef SYZYGY_GRINDER_PROFILE_GRINDER_H_
15 : #define SYZYGY_GRINDER_PROFILE_GRINDER_H_
16 :
17 : #include <dia2.h>
18 : #include <iostream>
19 : #include <map>
20 :
21 : #include "base/files/file_path.h"
22 : #include "base/win/scoped_comptr.h"
23 : #include "syzygy/grinder/grinder.h"
24 :
25 : namespace grinder {
26 :
27 : typedef uint32 RVA;
28 :
29 : // A worker class to sink profile trace events and output the aggregate data in
30 : // KCacheGrind-compatible format.
31 : //
32 : // The profiler instrumentation captures the wall-clock time from entry to exit
33 : // for each pair of caller/function for each invocation. This is termed
34 : // "inclusive" time, as it includes the time spent in other functions called.
35 : //
36 : // The KCacheGrind file format also requires listing "exclusive" time for each
37 : // function, where exclusive time is the amount of time spent executing the
38 : // function itself, e.g. exclusive of the time spent calling other functions.
39 : //
40 : // The profile data is captured in a trace log. The trace log is a run of
41 : // records where each record in the log is associated with a particular thread
42 : // (and process), and contains a set of invocation records.
43 : // Each invocation record contains inclusive wall-clock time (and potentially
44 : // other inclusive metrics) for one or more invocations from a particular
45 : // caller address, to a particular function.
46 : // Note that the same caller/function pair may occur multiple times in a trace
47 : // log, even for the same thread, as the profile instrumentation spills a trace
48 : // record to the log when the number of caller/function pairs it's seen exceeds
49 : // the size of the thread-local buffer used to aggregate the data.
50 : //
51 : // This class aggregates the data from a trace log, and builds a graph of
52 : // function nodes and call edges. For each call edge, it aggregates the data
53 : // from one or more log records, by summing up the call counts and inclusive
54 : // metrics. For each function node, it also computes the exclusive cost, by
55 : // summing up the cost of the incoming edges, and subtracting the cost of the
56 : // outgoing edges.
57 : //
58 : // For information on the KCacheGrind file format, see:
59 : // http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
60 : class ProfileGrinder : public GrinderInterface {
61 : public:
62 : ProfileGrinder();
63 : ~ProfileGrinder();
64 :
65 : // @name Accessors and mutators.
66 : // @{
67 : // If thread_parts is true, the grinder will aggregate and output
68 : // separate parts for each thread seen in the trace file(s).
69 E : bool thread_parts() const { return thread_parts_; }
70 : void set_thread_parts(bool thread_parts) { thread_parts_ = thread_parts; }
71 : // @}
72 :
73 : // @name GrinderInterface implementation.
74 : // @{
75 : virtual bool ParseCommandLine(const CommandLine* command_line) OVERRIDE;
76 : virtual void SetParser(Parser* parser) OVERRIDE;
77 : virtual bool Grind() OVERRIDE;
78 : virtual bool OutputData(FILE* file) OVERRIDE;
79 : // @}
80 :
81 : // @name ParseEventHandler overrides.
82 : // @{
83 : virtual void OnInvocationBatch(
84 : base::Time time,
85 : DWORD process_id,
86 : DWORD thread_id,
87 : size_t num_invocations,
88 : const TraceBatchInvocationInfo* data) OVERRIDE;
89 : virtual void OnThreadName(base::Time time,
90 : DWORD process_id,
91 : DWORD thread_id,
92 : const base::StringPiece& thread_name) OVERRIDE;
93 : // @}
94 :
95 : protected:
96 : Parser* parser_;
97 :
98 : private:
99 : typedef sym_util::ModuleInformation ModuleInformation;
100 :
101 : // Forward declarations.
102 : struct PartData;
103 : struct ModuleRVA;
104 :
105 : // Represents the caller of a caller/callee pair.
106 : struct CallerAddress;
107 : // Represents the function of a caller/callee pair.
108 : struct FunctionAddress;
109 :
110 : struct Metrics;
111 : struct InvocationNode;
112 : struct InvocationEdge;
113 :
114 : typedef std::set<ModuleInformation,
115 : bool (*)(const ModuleInformation& a, const ModuleInformation& b)>
116 : ModuleInformationSet;
117 : typedef std::map<FunctionAddress, InvocationNode> InvocationNodeMap;
118 : typedef std::pair<FunctionAddress, CallerAddress> InvocationEdgeKey;
119 : typedef std::map<InvocationEdgeKey, InvocationEdge> InvocationEdgeMap;
120 :
121 : typedef base::win::ScopedComPtr<IDiaSession> SessionPtr;
122 : typedef std::map<const ModuleInformation*, SessionPtr> ModuleSessionMap;
123 :
124 : bool GetSessionForModule(const ModuleInformation* module,
125 : IDiaSession** session_out);
126 :
127 : // Finds or creates the part data for the given @p thread_id.
128 : PartData* FindOrCreatePart(DWORD process_id, DWORD thread_id);
129 :
130 : // Retrieves the function containing @p address.
131 : // @param symbol on success returns the function's private symbol, or
132 : // public symbol if no private symbol is available.
133 : // @returns true on success.
134 : bool GetFunctionSymbolByRVA(IDiaSession* session,
135 : RVA address,
136 : IDiaSymbol** symbol);
137 :
138 : // Resolves the function and line number a particular caller belongs to.
139 : // @param caller the location of the caller.
140 : // @param function on success returns the caller's function location.
141 : // @param line on success returns the caller's line number in @p function.
142 : bool GetFunctionForCaller(const CallerAddress& caller,
143 : FunctionAddress* function,
144 : size_t* line);
145 :
146 : bool GetInfoForFunction(const FunctionAddress& function,
147 : std::wstring* function_name,
148 : std::wstring* file_name,
149 : size_t* line);
150 :
151 : // Converts an absolute address to an RVA.
152 : void ConvertToModuleRVA(uint32 process_id,
153 : trace::parser::AbsoluteAddress64 addr,
154 : ModuleRVA* rva);
155 :
156 : // Aggregates a single invocation info and/or creates a new node and edge.
157 : void AggregateEntryToPart(const FunctionAddress& function,
158 : const CallerAddress& caller,
159 : const InvocationInfo& info,
160 : PartData* part);
161 :
162 : // This functions adds all caller edges to each function node's linked list of
163 : // callers. In so doing, it also computes each function node's inclusive cost.
164 : // @returns true on success, false on failure.
165 : bool ResolveCallers();
166 :
167 : // Resolves callers for @p part.
168 : bool ResolveCallersForPart(PartData* part);
169 :
170 : // Outputs data for @p part to @p file.
171 : bool OutputDataForPart(const PartData& part, FILE* file);
172 :
173 : // Stores the modules we encounter.
174 : ModuleInformationSet modules_;
175 :
176 : // Stores the DIA session objects we have going for each module.
177 : ModuleSessionMap module_sessions_;
178 :
179 : // The parts we store. If thread_parts_ is false, we store only a single
180 : // part with id 0.
181 : typedef std::map<uint32, PartData> PartDataMap;
182 : PartDataMap parts_;
183 :
184 : // If true, data is aggregated and output per-thread.
185 : bool thread_parts_;
186 : };
187 :
188 : // The data we store for each part.
189 : struct ProfileGrinder::PartData {
190 : PartData();
191 :
192 : // The thread name for this part.
193 : std::string thread_name_;
194 :
195 : // The process ID for this part.
196 : uint32 process_id_;
197 :
198 : // The thread ID for this part.
199 : uint32 thread_id_;
200 :
201 : // Stores the invocation nodes, aka the functions.
202 : InvocationNodeMap nodes_;
203 :
204 : // Stores the invocation edges.
205 : InvocationEdgeMap edges_;
206 : };
207 :
208 : // RVA in a module. The module should be a canonical pointer
209 : // to the module information to make this comparable against
210 : // other RVAs in the same module.
211 : struct ProfileGrinder::ModuleRVA {
212 E : ModuleRVA() : module(NULL), rva(0) {
213 E : }
214 :
215 E : bool operator < (const ModuleRVA& o) const {
216 E : if (module > o.module)
217 i : return false;
218 E : if (module < o.module)
219 i : return true;
220 E : return rva < o.rva;
221 E : }
222 : bool operator > (const ModuleRVA& o) const {
223 : return o < *this;
224 : }
225 : bool operator == (const ModuleRVA& o) const {
226 : return !(o < *this || *this < o);
227 : }
228 : bool operator != (const ModuleRVA& o) const {
229 : return !(*this == o);
230 : }
231 :
232 : const sym_util::ModuleInformation* module;
233 : RVA rva;
234 : };
235 :
236 : // Reprents the address of a function.
237 : struct ProfileGrinder::FunctionAddress : public ProfileGrinder::ModuleRVA {
238 : };
239 :
240 : // Reprents the address of a caller.
241 : struct ProfileGrinder::CallerAddress : public ProfileGrinder::ModuleRVA {
242 : };
243 :
244 : // The metrics we capture per function and per caller.
245 : struct ProfileGrinder::Metrics {
246 E : Metrics() : num_calls(0), cycles_min(0), cycles_max(0), cycles_sum(0) {
247 E : }
248 :
249 : uint64 num_calls;
250 : uint64 cycles_min;
251 : uint64 cycles_max;
252 : uint64 cycles_sum;
253 : };
254 :
255 : // An invocation node represents a function.
256 : struct ProfileGrinder::InvocationNode {
257 E : InvocationNode() : first_call(NULL) {
258 E : }
259 :
260 : // Location of the function this instance represents.
261 : FunctionAddress function;
262 :
263 : // The metrics we've aggregated for this function.
264 : Metrics metrics;
265 :
266 : // Linked list of all the edges where the caller resolves to us.
267 : InvocationEdge* first_call;
268 : };
269 :
270 : // An invocation edge represents a caller->function pair.
271 : struct ProfileGrinder::InvocationEdge {
272 E : InvocationEdge() : caller_function(NULL), line(0), next_call(NULL) {
273 E : }
274 :
275 : // The function/caller pair we denote.
276 : FunctionAddress function;
277 : CallerAddress caller;
278 :
279 : // Line number of the caller.
280 : size_t line;
281 : Metrics metrics;
282 :
283 : // The calling function - resolved from caller.
284 : InvocationNode* caller_function;
285 : // Chains to the next edge resolving to the
286 : // same calling function.
287 : InvocationEdge* next_call;
288 : };
289 :
290 : } // namespace grinder
291 :
292 : #endif // SYZYGY_GRINDER_PROFILE_GRINDER_H_
|