1 : // Copyright 2015 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Declares the MemReplayGrinder class, which processes trace files
16 : // containing the list of heap accesses and outputs a test scenario
17 : // for replay.
18 : #ifndef SYZYGY_GRINDER_GRINDERS_MEM_REPLAY_GRINDER_H_
19 : #define SYZYGY_GRINDER_GRINDERS_MEM_REPLAY_GRINDER_H_
20 :
21 : #include <deque>
22 : #include <map>
23 : #include <memory>
24 : #include <set>
25 : #include <string>
26 : #include <unordered_map>
27 : #include <unordered_set>
28 : #include <utility>
29 : #include <vector>
30 :
31 : #include "syzygy/bard/event.h"
32 : #include "syzygy/bard/story.h"
33 : #include "syzygy/bard/events/linked_event.h"
34 : #include "syzygy/grinder/grinder.h"
35 :
36 : namespace grinder {
37 : namespace grinders {
38 :
39 : // This class processes trace files containing the raw history of
40 : // of heap allocations and deallocations, and generates a reduced
41 : // trace file to be used as a test scenario.
42 : class MemReplayGrinder : public GrinderInterface {
43 : public:
44 : MemReplayGrinder();
45 E : ~MemReplayGrinder() override {}
46 :
47 : // @name GrinderInterface implementation.
48 : // @{
49 : bool ParseCommandLine(const base::CommandLine* command_line) override;
50 : void SetParser(Parser* parser) override;
51 : bool Grind() override;
52 : bool OutputData(FILE* file) override;
53 : // @}
54 :
55 : // @name ParserEventHandler implementation.
56 : // @{
57 : void OnFunctionNameTableEntry(
58 : base::Time time,
59 : DWORD process_id,
60 : const TraceFunctionNameTableEntry* data) override;
61 : void OnDetailedFunctionCall(base::Time time,
62 : DWORD process_id,
63 : DWORD thread_id,
64 : const TraceDetailedFunctionCall* data) override;
65 : void OnProcessHeap(base::Time time,
66 : DWORD process_id,
67 : const TraceProcessHeap* data) override;
68 : // @}
69 :
70 : // Protected for unittesting.
71 : protected:
72 : using EventInterface = bard::EventInterface;
73 : using EventType = EventInterface::EventType;
74 :
75 : // See below for comments and definitions.
76 : class PendingDetailedFunctionCall;
77 : class ObjectInfo;
78 : struct EventObjects;
79 : struct ProcessData;
80 : struct ThreadData;
81 : struct ThreadDataIterator;
82 : struct ThreadDataIteratorHashFunctor;
83 :
84 : using PendingDetailedFunctionCalls = std::deque<PendingDetailedFunctionCall>;
85 : // Associates objects by addresses in the trace file to the event in which
86 : // they are created/destroyed, and if they are alive or not. The event is
87 : // encoded by the ThreadDataIterator referring to it.
88 : using ObjectMap = std::unordered_map<const void*, ObjectInfo>;
89 : // A collection of objects describing a dependency.
90 : using Deps =
91 : std::unordered_set<ThreadDataIterator, ThreadDataIteratorHashFunctor>;
92 : // Tracks dependencies that have already been explicitly encoded. A thread
93 : // |i| that has already waited on a thread |j| will store the most recent
94 : // event waited on in the map associated with key (i, j).
95 : using PlotLinePair =
96 : std::pair<const bard::Story::PlotLine*, const bard::Story::PlotLine*>;
97 : using WaitedMap = std::map<PlotLinePair, ThreadDataIterator>;
98 :
99 : // Loads the function_enum_map_ with SyzyASan function names.
100 : void LoadAsanFunctionNames();
101 : // Parses a detailed function call record.
102 : bool ParseDetailedFunctionCall(base::Time time,
103 : DWORD thread_id,
104 : const TraceDetailedFunctionCall* data,
105 : ProcessData* proc_data);
106 : // Sets parse_error_ to true.
107 : void SetParseError();
108 : // Finds or creates the process data for a given process.
109 : // @param process_id The ID of the process.
110 : // @returns the associated ProcessData.
111 : ProcessData* FindOrCreateProcessData(DWORD process_id);
112 : // Finds or creates the thread data in the provided process data, for the
113 : // provided thread.
114 : // @param proc_data The process data.
115 : // @param thread_id The ID of the thread.
116 : // @returns the associated ThreadData.
117 : ThreadData* FindOrCreateThreadData(ProcessData* proc_data, DWORD thread_id);
118 :
119 : // Ensures that the given event is a LinkedEvent, and thus able to support
120 : // dependencies.
121 : // @param iter The iterator pointing to the event to be converted.
122 : void EnsureLinkedEvent(const ThreadDataIterator& iter);
123 : // Populates the list of objects that are created/destroyed/used by a given
124 : // event.
125 : // @param iter The iterator pointing to the event to be queried.
126 : // @param objects Pointer to the list of objects to be populated.
127 : void GetEventObjects(const ThreadDataIterator& iter, EventObjects* objects);
128 : // Gets the set of dependencies for the given event from the given object
129 : // map.
130 : // @param iter The iterator pointing to the event to be queried.
131 : // @param objects The objects created/destroyed/used by the event.
132 : // @param object_map The map describing the state of all known objects.
133 : // @param deps A list of events that are dependencies to the event in
134 : // @p iter.
135 : // @returns true on success, false otherwise.
136 : bool GetDeps(const ThreadDataIterator& iter,
137 : const EventObjects& objects,
138 : const ObjectMap& object_map,
139 : Deps* deps);
140 : // Updates @p deps with a dependency from @p iter to the provided @p input.
141 : // Does some analysis to omit adding redundant dependencies.
142 : // @param iter The iterator pointing to the event with dependencies.
143 : // @param input The iterator pointing to the input dependency event.
144 : // @param deps The list of dependencies to be modified.
145 : void AddDep(const ThreadDataIterator& iter,
146 : const ThreadDataIterator& input,
147 : Deps* deps);
148 : // Applies the given set of dependencies to provided event, updating the
149 : // @p waited_map.
150 : // @param iter The iterator pointing to the event with dependencies.
151 : // @param object_map The map describing the state of all known objects.
152 : // @param deps The list of input dependencies.
153 : // @param waited_map The map of already expressed dependencies to be updated.
154 : bool ApplyDeps(const ThreadDataIterator& iter,
155 : const ObjectMap& object_map,
156 : const Deps& deps,
157 : WaitedMap* waited_map);
158 : // Updates the provided @p live_object_map and @p dead_object_map with
159 : // information from the event pointed to by @p iter.
160 : // @param iter The iterator pointing to the event being processed.
161 : // @param objects The list of objects touched by the event.
162 : // @param object_map The map describing the state of all known objects that
163 : // is to be updated.
164 : // @returns true on success, false otherwise.
165 : bool UpdateObjectMap(const ThreadDataIterator& iter,
166 : const EventObjects& objects,
167 : ObjectMap* object_map);
168 :
169 : // A map of recognized function names to EventType. If it's name isn't
170 : // in this map before grinding starts then the function will not be parsed.
171 : std::map<std::string, EventType> function_enum_map_;
172 : // The set of unrecognized function names. Any functions that are encountered
173 : // but not found in |function_enum_map_| will be recorded here for logging
174 : // purposes.
175 : std::set<std::string> missing_events_;
176 :
177 : // Storage for stories and plotlines. Stories are kept separate from the
178 : // ProcessData that indexes them so that the ProcessData can remain easily
179 : // copyable and compatible with STL containers.
180 : ScopedVector<bard::Story> stories_;
181 : std::map<DWORD, ProcessData> process_data_map_;
182 :
183 : // Set to true if a parse error occurs.
184 : bool parse_error_;
185 :
186 : private:
187 : DISALLOW_COPY_AND_ASSIGN(MemReplayGrinder);
188 : };
189 :
190 : // DetailedFunctionCall records can only be parsed if the required
191 : // FunctionNameTable record has already been parsed. Unfortunately, these can
192 : // arrive out of order so sometimes the function call parsing needs to be
193 : // deferred. This structure houses the necessary information (effectively the
194 : // input arguments to OnDetailedFunctionCall).
195 : class MemReplayGrinder::PendingDetailedFunctionCall {
196 : public:
197 : PendingDetailedFunctionCall(base::Time time,
198 : DWORD thread_id,
199 : const TraceDetailedFunctionCall* data);
200 :
201 E : const base::Time& time() const { return time_; }
202 E : DWORD thread_id() const { return thread_id_; }
203 E : const TraceDetailedFunctionCall* data() const {
204 E : return reinterpret_cast<const TraceDetailedFunctionCall*>(data_.data());
205 E : }
206 :
207 : private:
208 : base::Time time_;
209 : DWORD thread_id_;
210 : std::vector<uint8_t> data_;
211 : };
212 :
213 : // Houses timestamps and PlotLine data associated with a thread ID. This is
214 : // indexed by thread ID in a containing ProcessData.
215 : struct MemReplayGrinder::ThreadData {
216 E : ThreadData() : plot_line(nullptr) {}
217 :
218 : // The timestamps associated with the events in the plot line.
219 : std::vector<uint64_t> timestamps;
220 : // The PlotLine representing the events in this thread.
221 : bard::Story::PlotLine* plot_line;
222 : };
223 :
224 : // Houses all data associated with a single process during grinding. This is
225 : // indexed in a map by |process_id|.
226 : struct MemReplayGrinder::ProcessData {
227 E : ProcessData() : process_id(0), story(nullptr) {}
228 :
229 : // The process ID.
230 : DWORD process_id;
231 : // All pre-existing heaps. The first is the process heap.
232 : std::vector<const void*> existing_heaps;
233 : // Map from trace file function ID to EventType enumeration.
234 : std::map<uint32_t, EventType> function_id_map;
235 : // The set of function IDs for which definitions have not yet been seen.
236 : // When this set is drained all the pending_calls can be processed.
237 : std::unordered_set<uint32_t> pending_function_ids;
238 : // The list of detailed function calls that is pending processing.
239 : PendingDetailedFunctionCalls pending_calls;
240 : // The story holding events for this process. Ownership is external
241 : // to this object.
242 : bard::Story* story;
243 : // A map of thread ID to the associated thread data.
244 : std::map<DWORD, ThreadData> thread_data_map;
245 : };
246 :
247 : // An iterator-like object for events in a story, sorted by their associated
248 : // timestamp.
249 : struct MemReplayGrinder::ThreadDataIterator {
250 E : uint64_t timestamp() const { return thread_data->timestamps[index]; }
251 :
252 E : bard::Story::PlotLine* plot_line() const { return thread_data->plot_line; }
253 :
254 E : bard::EventInterface* event() const {
255 E : return (*thread_data->plot_line)[index];
256 E : }
257 :
258 E : const bard::EventInterface* inner_event() const {
259 E : auto evt = event();
260 E : if (evt->type() != EventInterface::kLinkedEvent)
261 E : return evt;
262 i : auto e = reinterpret_cast<const bard::events::LinkedEvent*>(evt);
263 i : return e->event();
264 E : }
265 :
266 : // STL heaps are max heaps, so the comparison operator is reversed to create
267 : // a min heap.
268 E : bool operator<(const ThreadDataIterator& rhs) const {
269 E : return timestamp() > rhs.timestamp();
270 E : }
271 :
272 : // Increments this iterator. Returns true if there are events remaining
273 : // in the associated plot line.
274 E : bool increment() {
275 E : ++index;
276 E : return index < thread_data->timestamps.size();
277 E : }
278 :
279 : // Comparison operator required for use in unordered_set.
280 i : bool operator==(const ThreadDataIterator& rhs) const {
281 i : return thread_data == rhs.thread_data && index == rhs.index;
282 i : }
283 :
284 : ThreadData* thread_data;
285 : size_t index;
286 : };
287 :
288 : struct MemReplayGrinder::ThreadDataIteratorHashFunctor {
289 E : size_t operator()(const ThreadDataIterator& tdi) const {
290 E : return reinterpret_cast<size_t>(tdi.thread_data) ^ tdi.index;
291 E : }
292 : };
293 :
294 : // A small structure for housing information about an object during
295 : // grinding.
296 : class MemReplayGrinder::ObjectInfo {
297 : public:
298 : using LastUseMap = std::unordered_map<ThreadData*, size_t>;
299 :
300 : explicit ObjectInfo(const ThreadDataIterator& iter);
301 :
302 : // Gets events associated with this object.
303 E : bool alive() const { return alive_; }
304 E : const ThreadDataIterator& created() const { return created_; }
305 i : const ThreadDataIterator& destroyed() const { return destroyed_; }
306 E : const LastUseMap& last_use() const { return last_use_; }
307 :
308 : void SetCreated(const ThreadDataIterator& iter);
309 : void SetLastUse(const ThreadDataIterator& iter);
310 : void SetDestroyed(const ThreadDataIterator& iter);
311 :
312 : private:
313 : // Disallow use of the default constructor.
314 : ObjectInfo() {}
315 :
316 : // Indicates whether or not the object is alive.
317 : bool alive_;
318 :
319 : // The events that create and destroy this object.
320 : ThreadDataIterator created_;
321 : ThreadDataIterator destroyed_;
322 :
323 : // Per thread, the most recent use of this object. This map uses an exploded
324 : // ThreadDataIterator as key and value.
325 : LastUseMap last_use_;
326 :
327 : // Copy and assignment is allowed in order to keep this object compatible
328 : // with STL containers.
329 : };
330 :
331 : // Houses inputs and outputs of an input, by type.
332 : struct MemReplayGrinder::EventObjects {
333 : void* created;
334 : void* destroyed;
335 : std::vector<void*> used;
336 : };
337 :
338 : } // namespace grinders
339 : } // namespace grinder
340 :
341 : #endif // SYZYGY_GRINDER_GRINDERS_MEM_REPLAY_GRINDER_H_
|