1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : #ifndef SYZYGY_GRINDER_PROFILE_GRINDER_H_
15 : #define SYZYGY_GRINDER_PROFILE_GRINDER_H_
16 :
17 : #include <dia2.h>
18 : #include <iostream>
19 : #include <map>
20 :
21 : #include "base/file_path.h"
22 : #include "base/win/scoped_comptr.h"
23 : #include "syzygy/grinder/grinder.h"
24 :
25 : namespace grinder {
26 :
27 : typedef uint32 RVA;
28 :
29 : // A worker class to sink profile trace events and output the aggregate data in
30 : // KCacheGrind-compatible format.
31 : //
32 : // The profiler instrumentation captures the wall-clock time from entry to exit
33 : // for each pair of caller/function for each invocation. This is termed
34 : // "inclusive" time, as it includes the time spent in other functions called.
35 : //
36 : // The KCacheGrind file format also requires listing "exclusive" time for each
37 : // function, where exclusive time is the amount of time spent executing the
38 : // function itself, e.g. exclusive of the time spent calling other functions.
39 : //
40 : // The profile data is captured in a trace log. The trace log is a run of
41 : // records where each record in the log is associated with a particular thread
42 : // (and process), and contains a set of invocation records.
43 : // Each invocation record contains inclusive wall-clock time (and potentially
44 : // other inclusive metrics) for one or more invocations from a particular
45 : // caller address, to a particular function.
46 : // Note that the same caller/function pair may occur multiple times in a trace
47 : // log, even for the same thread, as the profile instrumentation spills a trace
48 : // record to the log when the number of caller/function pairs it's seen exceeds
49 : // the size of the thread-local buffer used to aggregate the data.
50 : //
51 : // This class aggregates the data from a trace log, and builds a graph of
52 : // function nodes and call edges. For each call edge, it aggregates the data
53 : // from one or more log records, by summing up the call counts and inclusive
54 : // metrics. For each function node, it also computes the exclusive cost, by
55 : // summing up the cost of the incoming edges, and subtracting the cost of the
56 : // outgoing edges.
57 : //
58 : // For information on the KCacheGrind file format, see:
59 : // http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
60 : class ProfileGrinder : public GrinderInterface {
61 : public:
62 : ProfileGrinder();
63 : ~ProfileGrinder();
64 :
65 : // @name Accessors and mutators.
66 : // @{
67 : // If thread_parts is true, the grinder will aggregate and output
68 : // separate parts for each thread seen in the trace file(s).
69 E : bool thread_parts() const { return thread_parts_; }
70 : void set_thread_parts(bool thread_parts) { thread_parts_ = thread_parts; }
71 : // @}
72 :
73 : // @name GrinderInterface implementation.
74 : // @{
75 : virtual bool ParseCommandLine(const CommandLine* command_line) OVERRIDE;
76 : virtual void SetParser(Parser* parser) OVERRIDE;
77 : virtual bool Grind() OVERRIDE;
78 : virtual bool OutputData(FILE* file) OVERRIDE;
79 : // @}
80 :
81 : // @name ParseEventHandler overrides.
82 : // @{
83 : virtual void OnInvocationBatch(
84 : base::Time time,
85 : DWORD process_id,
86 : DWORD thread_id,
87 : size_t num_invocations,
88 : const TraceBatchInvocationInfo* data) OVERRIDE;
89 : virtual void OnThreadName(base::Time time,
90 : DWORD process_id,
91 : DWORD thread_id,
92 : const base::StringPiece& thread_name) OVERRIDE;
93 : // @}
94 :
95 : protected:
96 : Parser* parser_;
97 :
98 : private:
99 : typedef sym_util::ModuleInformation ModuleInformation;
100 :
101 : // Forward declarations.
102 : struct PartData;
103 : struct ModuleRVA;
104 : struct Metrics;
105 : struct InvocationNode;
106 : struct InvocationEdge;
107 :
108 : typedef std::set<ModuleInformation,
109 : bool (*)(const ModuleInformation& a, const ModuleInformation& b)>
110 : ModuleInformationSet;
111 : typedef std::map<ModuleRVA, InvocationNode> InvocationNodeMap;
112 : typedef std::pair<ModuleRVA, ModuleRVA> InvocationEdgeKey;
113 : typedef std::map<InvocationEdgeKey, InvocationEdge> InvocationEdgeMap;
114 :
115 : typedef base::win::ScopedComPtr<IDiaSession> SessionPtr;
116 : typedef std::map<const ModuleInformation*, SessionPtr> ModuleSessionMap;
117 :
118 : bool GetSessionForModule(const ModuleInformation* module,
119 : IDiaSession** session_out);
120 :
121 : // Finds or creates the part data for the given @p thread_id.
122 : PartData* FindOrCreatePart(DWORD process_id, DWORD thread_id);
123 :
124 : // Retrieves the function containing @p address.
125 : // @param symbol on success returns the function's private symbol, or
126 : // public symbol if no private symbol is available.
127 : // @returns true on success.
128 : bool GetFunctionByRVA(IDiaSession* session,
129 : RVA address,
130 : IDiaSymbol** symbol);
131 : bool GetInfoForCallerRVA(const ModuleRVA& caller,
132 : RVA* function_rva,
133 : size_t* line);
134 :
135 : bool GetInfoForFunctionRVA(const ModuleRVA& function,
136 : std::wstring* function_name,
137 : std::wstring* file_name,
138 : size_t* line);
139 :
140 : // Converts an absolute address to an RVA.
141 : void ConvertToModuleRVA(uint32 process_id,
142 : trace::parser::AbsoluteAddress64 addr,
143 : ModuleRVA* rva);
144 :
145 : // Aggregates a single invocation info and/or creates a new node and edge.
146 : void AggregateEntryToPart(const ModuleRVA& function_rva,
147 : const ModuleRVA& caller_rva,
148 : const InvocationInfo& info,
149 : PartData* part);
150 :
151 : // This functions adds all caller edges to each function node's linked list of
152 : // callers. In so doing, it also computes each function node's inclusive cost.
153 : // @returns true on success, false on failure.
154 : bool ResolveCallers();
155 :
156 : // Resolves callers for @p part.
157 : bool ResolveCallersForPart(PartData* part);
158 :
159 : // Outputs data for @p part to @p file.
160 : bool OutputDataForPart(const PartData& part, FILE* file);
161 :
162 : // Stores the modules we encounter.
163 : ModuleInformationSet modules_;
164 :
165 : // Stores the DIA session objects we have going for each module.
166 : ModuleSessionMap module_sessions_;
167 :
168 : // The parts we store. If thread_parts_ is false, we store only a single
169 : // part with id 0.
170 : typedef std::map<uint32, PartData> PartDataMap;
171 : PartDataMap parts_;
172 :
173 : // If true, data is aggregated and output per-thread.
174 : bool thread_parts_;
175 : };
176 :
177 : // The data we store for each part.
178 : struct ProfileGrinder::PartData {
179 : PartData();
180 :
181 : // The thread name for this part.
182 : std::string thread_name_;
183 :
184 : // The process ID for this part.
185 : uint32 process_id_;
186 :
187 : // The thread ID for this part.
188 : uint32 thread_id_;
189 :
190 : // Stores the invocation nodes, aka the functions.
191 : InvocationNodeMap nodes_;
192 :
193 : // Stores the invocation edges.
194 : InvocationEdgeMap edges_;
195 : };
196 :
197 : // RVA in a module. The module should be a canonical pointer
198 : // to the module information to make this comparable against
199 : // other RVAs in the same module.
200 : struct ProfileGrinder::ModuleRVA {
201 E : ModuleRVA() : module(NULL), rva(0) {
202 E : }
203 :
204 E : bool operator < (const ModuleRVA& o) const {
205 E : if (module > o.module)
206 i : return false;
207 E : if (module < o.module)
208 i : return true;
209 E : return rva < o.rva;
210 E : }
211 : bool operator > (const ModuleRVA& o) const {
212 : return o < *this;
213 : }
214 : bool operator == (const ModuleRVA& o) const {
215 : return !(o < *this || *this < o);
216 : }
217 : bool operator != (const ModuleRVA& o) const {
218 : return !(*this == o);
219 : }
220 :
221 : const sym_util::ModuleInformation* module;
222 : RVA rva;
223 : };
224 :
225 : // The metrics we capture per function and per caller.
226 : struct ProfileGrinder::Metrics {
227 E : Metrics() : num_calls(0), cycles_min(0), cycles_max(0), cycles_sum(0) {
228 E : }
229 :
230 : uint64 num_calls;
231 : uint64 cycles_min;
232 : uint64 cycles_max;
233 : uint64 cycles_sum;
234 : };
235 :
236 : // An invocation node represents a function.
237 : struct ProfileGrinder::InvocationNode {
238 E : InvocationNode() : first_call(NULL) {
239 E : }
240 :
241 : // RVA for the function this instance represents.
242 : ModuleRVA function;
243 :
244 : // The metrics we've aggregated for this function.
245 : Metrics metrics;
246 :
247 : // Linked list of all the edges where the caller resolves to us.
248 : InvocationEdge* first_call;
249 : };
250 :
251 : // An invocation edge represents a caller->function pair.
252 : struct ProfileGrinder::InvocationEdge {
253 E : InvocationEdge() : caller_function(NULL), line(0), next_call(NULL) {
254 E : }
255 :
256 : // The function/caller pair we denote.
257 : ModuleRVA function;
258 : ModuleRVA caller;
259 :
260 : // Line number of the caller.
261 : size_t line;
262 : Metrics metrics;
263 :
264 : // The calling function - resolved from caller.
265 : InvocationNode* caller_function;
266 : // Chains to the next edge resolving to the
267 : // same calling function.
268 : InvocationEdge* next_call;
269 : };
270 :
271 : } // namespace grinder
272 :
273 : #endif // SYZYGY_GRINDER_PROFILE_GRINDER_H_
|