1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : #ifndef SYZYGY_GRINDER_PROFILE_GRINDER_H_
15 : #define SYZYGY_GRINDER_PROFILE_GRINDER_H_
16 :
17 : #include <dia2.h>
18 : #include <iostream>
19 : #include <map>
20 :
21 : #include "base/file_path.h"
22 : #include "base/win/scoped_comptr.h"
23 : #include "syzygy/grinder/grinder.h"
24 :
25 : namespace grinder {
26 :
27 : typedef uint32 RVA;
28 :
29 : // A worker class to sink profile trace events and output the aggregate data in
30 : // KCacheGrind-compatible format.
31 : //
32 : // The profiler instrumentation captures the wall-clock time from entry to exit
33 : // for each pair of caller/function for each invocation. This is termed
34 : // "inclusive" time, as it includes the time spent in other functions called.
35 : //
36 : // The KCacheGrind file format also requires listing "exclusive" time for each
37 : // function, where exclusive time is the amount of time spent executing the
38 : // function itself, e.g. exclusive of the time spent calling other functions.
39 : //
40 : // The profile data is captured in a trace log. The trace log is a run of
41 : // records where each record in the log is associated with a particular thread
42 : // (and process), and contains a set of invocation records.
43 : // Each invocation record contains inclusive wall-clock time (and potentially
44 : // other inclusive metrics) for one or more invocations from a particular
45 : // caller address, to a particular function.
46 : // Note that the same caller/function pair may occur multiple times in a trace
47 : // log, even for the same thread, as the profile instrumentation spills a trace
48 : // record to the log when the number of caller/function pairs it's seen exeeds
49 : // the size of the thread-local buffer used to aggregate the data.
50 : //
51 : // This class aggregates the data from a trace log, and builds a graph of
52 : // function nodes and call edges. For each call edge, it aggregates the data
53 : // from one or more log records, by summing up the call counts and inclusive
54 : // metrics. For each function node, it also computes the exclusive cost, by
55 : // summing up the cost of the incoming edges, and subtracting the cost of the
56 : // outgoing edges.
57 : //
58 : // For information on the KCacheGrind file format, see:
59 : // http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
60 : class ProfileGrinder : public GrinderInterface {
61 : public:
62 : ProfileGrinder();
63 : ~ProfileGrinder();
64 :
65 : // @name Accessors and mutators.
66 : // @{
67 : // If thread_parts is true, the grinder will aggregate and output
68 : // separate parts for each thread seen in the trace file(s).
69 : bool thread_parts() const { return thread_parts_; }
70 : void set_thread_parts(bool thread_parts) { thread_parts_ = thread_parts; }
71 : // @}
72 :
73 : // @name GrinderInterface implementation.
74 : // @{
75 : virtual bool ParseCommandLine(const CommandLine* command_line) OVERRIDE;
76 : virtual void SetParser(Parser* parser) OVERRIDE;
77 : virtual bool Grind() OVERRIDE;
78 : virtual bool OutputData(FILE* file) OVERRIDE;
79 : // @}
80 :
81 : // @name ParseEventHandler overrides.
82 : // @{
83 : virtual void OnInvocationBatch(
84 : base::Time time,
85 : DWORD process_id,
86 : DWORD thread_id,
87 : size_t num_invocations,
88 : const TraceBatchInvocationInfo* data) OVERRIDE;
89 : virtual void OnThreadName(base::Time time,
90 : DWORD process_id,
91 : DWORD thread_id,
92 : const base::StringPiece& thread_name) OVERRIDE;
93 : // @}
94 :
95 : private:
96 : typedef sym_util::ModuleInformation ModuleInformation;
97 :
98 : // Forward declarations.
99 : struct PartData;
100 : struct ModuleRVA;
101 : struct Metrics;
102 : struct InvocationNode;
103 : struct InvocationEdge;
104 :
105 : typedef std::set<ModuleInformation,
106 : bool (*)(const ModuleInformation& a, const ModuleInformation& b)>
107 : ModuleInformationSet;
108 : typedef std::map<ModuleRVA, InvocationNode> InvocationNodeMap;
109 : typedef std::pair<ModuleRVA, ModuleRVA> InvocationEdgeKey;
110 : typedef std::map<InvocationEdgeKey, InvocationEdge> InvocationEdgeMap;
111 :
112 : typedef base::win::ScopedComPtr<IDiaSession> SessionPtr;
113 : typedef std::map<const ModuleInformation*, SessionPtr> ModuleSessionMap;
114 :
115 : bool GetSessionForModule(const ModuleInformation* module,
116 : IDiaSession** session_out);
117 :
118 : // Finds or creates the part data for the given @p thread_id.
119 : PartData* FindOrCreatePart(DWORD process_id, DWORD thread_id);
120 :
121 : // Retrieves the function containing @p address.
122 : // @param symbol on success returns the function's private symbol, or
123 : // public symbol if no private symbol is available.
124 : // @returns true on success.
125 : bool GetFunctionByRVA(IDiaSession* session,
126 : RVA address,
127 : IDiaSymbol** symbol);
128 : bool GetInfoForCallerRVA(const ModuleRVA& caller,
129 : RVA* function_rva,
130 : size_t* line);
131 :
132 : bool GetInfoForFunctionRVA(const ModuleRVA& function,
133 : std::wstring* function_name,
134 : std::wstring* file_name,
135 : size_t* line);
136 :
137 : // Converts an absolute address to an RVA.
138 : void ConvertToModuleRVA(uint32 process_id,
139 : trace::parser::AbsoluteAddress64 addr,
140 : ModuleRVA* rva);
141 :
142 : // Aggregates a single invocation info and/or creates a new node and edge.
143 : void AggregateEntryToPart(const ModuleRVA& function_rva,
144 : const ModuleRVA& caller_rva,
145 : const InvocationInfo& info,
146 : PartData* part);
147 :
148 : // This functions adds all caller edges to each function node's linked list of
149 : // callers. In so doing, it also computes each function node's inclusive cost.
150 : // @returns true on success, false on failure.
151 : bool ResolveCallers();
152 :
153 : // Resolves callers for @p part.
154 : bool ResolveCallersForPart(PartData* part);
155 :
156 : // Outputs data for @p part to @p file.
157 : bool OutputDataForPart(const PartData& part, FILE* file);
158 :
159 : // Stores the modules we encounter.
160 : ModuleInformationSet modules_;
161 :
162 : // Stores the DIA session objects we have going for each module.
163 : ModuleSessionMap module_sessions_;
164 :
165 : // The parts we store. If thread_parts_ is false, we store only a single
166 : // part with id 0.
167 : typedef std::map<uint32, PartData> PartDataMap;
168 : PartDataMap parts_;
169 :
170 : // If true, data is aggregated and output per-thread.
171 : bool thread_parts_;
172 :
173 : Parser* parser_;
174 : };
175 :
176 : // The data we store for each part.
177 : struct ProfileGrinder::PartData {
178 : PartData();
179 :
180 : // The thread name for this part.
181 : std::string thread_name_;
182 :
183 : // The process ID for this part.
184 : uint32 process_id_;
185 :
186 : // The thread ID for this part.
187 : uint32 thread_id_;
188 :
189 : // Stores the invocation nodes, aka the functions.
190 : InvocationNodeMap nodes_;
191 :
192 : // Stores the invocation edges.
193 : InvocationEdgeMap edges_;
194 : };
195 :
196 : // RVA in a module. The module should be a canonical pointer
197 : // to the module information to make this comparable against
198 : // other RVAs in the same module.
199 : struct ProfileGrinder::ModuleRVA {
200 E : ModuleRVA() : module(NULL), rva(0) {
201 E : }
202 :
203 E : bool operator < (const ModuleRVA& o) const {
204 E : if (module > o.module)
205 i : return false;
206 E : if (module < o.module)
207 i : return true;
208 E : return rva < o.rva;
209 E : }
210 : bool operator > (const ModuleRVA& o) const {
211 : return o < *this;
212 : }
213 : bool operator == (const ModuleRVA& o) const {
214 : return !(o < *this || *this < o);
215 : }
216 : bool operator != (const ModuleRVA& o) const {
217 : return !(*this == o);
218 : }
219 :
220 : const sym_util::ModuleInformation* module;
221 : RVA rva;
222 : };
223 :
224 : // The metrics we capture per function and per caller.
225 : struct ProfileGrinder::Metrics {
226 E : Metrics() : num_calls(0), cycles_min(0), cycles_max(0), cycles_sum(0) {
227 E : }
228 :
229 : uint64 num_calls;
230 : uint64 cycles_min;
231 : uint64 cycles_max;
232 : uint64 cycles_sum;
233 : };
234 :
235 : // An invocation node represents a function.
236 : struct ProfileGrinder::InvocationNode {
237 E : InvocationNode() : first_call(NULL) {
238 E : }
239 :
240 : // RVA for the function this instance represents.
241 : ModuleRVA function;
242 :
243 : // The metrics we've aggregated for this function.
244 : Metrics metrics;
245 :
246 : // Linked list of all the edges where the caller resolves to us.
247 : InvocationEdge* first_call;
248 : };
249 :
250 : // An invocation edge represents a caller->function pair.
251 : struct ProfileGrinder::InvocationEdge {
252 E : InvocationEdge() : caller_function(NULL), line(0), next_call(NULL) {
253 E : }
254 :
255 : // The function/caller pair we denote.
256 : ModuleRVA function;
257 : ModuleRVA caller;
258 :
259 : // Line number of the caller.
260 : size_t line;
261 : Metrics metrics;
262 :
263 : // The calling function - resolved from caller.
264 : InvocationNode* caller_function;
265 : // Chains to the next edge resolving to the
266 : // same calling function.
267 : InvocationEdge* next_call;
268 : };
269 :
270 : } // namespace grinder
271 :
272 : #endif // SYZYGY_GRINDER_PROFILE_GRINDER_H_
|