1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // The DiaBrowser browses a Debug Interface Access (DIA) data source according
16 : // to a set of registered patterns of interest, returning the encountered
17 : // symbols to the provided callback. Patterns are constructed using the
18 : // PatternBuilder class, which themselves are built using the factory functions
19 : // in the 'builder' namespace.
20 : //
21 : // For more information on DIA, refer to the MSDN documentation:
22 : // http://msdn.microsoft.com/en-us/library/x93ctkx8(v=vs.80).aspx
23 : //
24 : // TODO(chrisha): If needed, we could allow the assignment of a pattern to a
25 : // class, and use 'single visit per class' semantics. In the absence of
26 : // a class, each pattern would be given its own class.
27 :
28 : #ifndef SYZYGY_PE_DIA_BROWSER_H_
29 : #define SYZYGY_PE_DIA_BROWSER_H_
30 :
31 : #include <dia2.h>
32 : #include <limits.h>
33 : #include <windows.h>
34 : #include <bitset>
35 : #include <set>
36 : #include <utility>
37 : #include <vector>
38 :
39 : #include "base/callback.h"
40 : #include "base/memory/scoped_ptr.h"
41 : #include "base/win/scoped_comptr.h"
42 :
43 : namespace pe {
44 :
45 : // We declare a few constants to make it easy to iterate through the SymTagEnum
46 : // (declared in cvconst.h).
47 : enum SymTagConstants {
48 : kSymTagBegin = SymTagExe,
49 : kSymTagEnd = SymTagMax,
50 : kSymTagCount = kSymTagEnd - kSymTagBegin
51 : };
52 :
53 : typedef enum SymTagEnum SymTag;
54 : typedef std::bitset<kSymTagCount> SymTagBitSet;
55 :
56 : extern const SymTag kSymTagInvalid;
57 :
58 : namespace builder {
59 : class Proxy;
60 : } // namespace builder
61 :
62 : // The DiaBrowser browses a DIA data source; see the comment at the top of this
63 : // header file for more detail.
64 : class DiaBrowser {
65 : public:
66 : typedef base::win::ScopedComPtr<IDiaSymbol> SymbolPtr;
67 : typedef std::vector<SymbolPtr> SymbolPtrVector;
68 : typedef std::vector<SymTag> SymTagVector;
69 :
70 : // Used by the callback to provide feedback regarding how the search should
71 : // proceed from the point of a partial match checkpoint, or a full match.
72 : enum BrowserDirective {
73 : // Continue browsing as per normal.
74 : kBrowserContinue,
75 :
76 : // Stop browsing on this particular search path for this pattern.
77 : kBrowserTerminatePath,
78 :
79 : // Stop browsing for any further matches to this pattern.
80 : kBrowserTerminatePattern,
81 :
82 : // Stop the browser entirely.
83 : kBrowserTerminateAll,
84 :
85 : // Stop the browser and return in error.
86 : kBrowserAbort
87 : };
88 :
89 : // The match callback is invoked for each symbol on a matched pattern element
90 : // that has a callback. The callback receives the following parameters:
91 : // 1. const DiaBrowser& dia_browser the invoking DiaBrowser
92 : // 2. const SymTagVector& tag_lineage the stack of matched tags.
93 : // 3. const SymbolPtrVector& symbol_lineage the stack of matched symbols.
94 : // It returns a BrowserDirective, telling DiaBrowser how to proceed.
95 : typedef base::Callback<BrowserDirective(
96 : const DiaBrowser&,
97 : const SymTagVector&,
98 : const SymbolPtrVector&)> MatchCallback;
99 :
100 : // The basic element of a pattern.
101 : struct PatternElement;
102 :
103 : // Acts as a lightweight interface for building patterns.
104 : // Patterns are regex-like constructions that attempt to match paths in
105 : // a tree of IDiaSymbols.
106 : class PatternBuilder;
107 :
108 : ~DiaBrowser();
109 :
110 : // Adds a pattern to the DiaBrowser. Returns false if the given pattern
111 : // can't be added. Currently, this will only occur if the given pattern
112 : // allows a null match. More precisely, a pattern will match null if the
113 : // root node of the pattern is an exit node of the entire pattern. Such
114 : // patterns are strictly forbidden. An example of such a pattern:
115 : //
116 : // Opt(SymTagCompiland)
117 : //
118 : // Similarly, any pattern that can never match will be ignored. An example
119 : // of such a pattern is: Not(SymTagNull).
120 : //
121 : // For full details on how to construct patterns, see the comment preceding
122 : // the 'builder' namespace.
123 : // @param pattern_builder_proxy The pattern to register, built by an
124 : // instance of builder::Proxy.
125 : // @param push_callback The callback that will be invoked when the symbol is
126 : // matched and initially visited.
127 : // @param pop_callback If provided this callback will be invoked when the
128 : // matched element is popped off the stack of matches as the browser
129 : // retreats up the stack in its depth-first search. Allows clients to
130 : // maintain a shadow stack with metadata.
131 : bool AddPattern(const builder::Proxy& pattern_builder_proxy,
132 : const MatchCallback& push_callback);
133 : bool AddPattern(const builder::Proxy& pattern_builder_proxy,
134 : const MatchCallback& push_callback,
135 : const MatchCallback& pop_callback);
136 :
137 : // Browses through the DIA tree starting from the given root,
138 : // matching existing patterns and calling their callbacks.
139 : // Returns true when the browse terminates naturally, false if any errors
140 : // were encountered. You can not call Browse twice simultaneously on the
141 : // same DiaBrowser! (In other words, don't call it again from within a
142 : // callback.)
143 : bool Browse(IDiaSymbol* root);
144 :
145 : protected:
146 : // The following protected functions are intended for use by the GTest
147 : // fixture only.
148 :
149 : // Tests a vector of SymTags to see if they would match a given pattern.
150 : // Returns the number of ways this sequence matched any of the patterns.
151 : size_t TestMatch(const SymTagVector& sym_tags) const;
152 :
153 : private:
154 : // This initializes the search front and other bookkeeping structures.
155 : void PrepareForBrowse();
156 :
157 : // Cleans up our various bookkeeping data structures. After calling this
158 : // the DiaBrowser can be reused.
159 : void Reset();
160 :
161 : // This advances our search front by trying to advance the symbol with
162 : // @p sym_tag and @p symbol_id along all possible paths. Any associated
163 : // callbacks will also be invoked and their directives handled. It also
164 : // populates sym_tags with the set of tags that will match at the next level
165 : // of recursion in the search.
166 : // This can return a reduced subset of BrowserDirective, namely:
167 : // kBrowserContinue, kBrowserTerminatePath, kBrowserTerminateAll,
168 : // or kBrowserAbort.
169 : BrowserDirective PushMatch(SymTag sym_tag, uint32 symbol_id,
170 : SymTagBitSet* sym_tags);
171 :
172 : // This rolls back our search stack by one level, calling pop callbacks.
173 : DiaBrowser::BrowserDirective PopMatch(bool do_callbacks);
174 :
175 : // The actual implementation of Browse, modulo some startup stuff.
176 : // This can return a reduced subset of BrowserDirective, namely:
177 : // kBrowserContinue, kBrowserTerminateAll, or kBrowserAbort.
178 : BrowserDirective BrowseImpl(IDiaSymbol* root, size_t depth);
179 :
180 : // This iterates all symbols with symbol tag @p sym_tag that are immediate
181 : // descendants of @p root.
182 : // This can return a reduced subset of BrowserDirective, namely:
183 : // kBrowserContinue, kBrowserTerminateAll, or kBrowserAbort.
184 : BrowserDirective BrowseEnum(IDiaSymbol* root, size_t depth, SymTag sym_tag);
185 :
186 : // The set of visited nodes. The first parameter is the address of the
187 : // element that matched, the second is the actual ID of the visited node.
188 : // The PDB has cyclic connections, so we must use some form of limiting to
189 : // ensure that cycles get broken. However, we want the symbol to be reachable
190 : // via each matching pattern and not just via the first one walked.
191 : // TODO(chrisha): Use a hash_map here instead, to minimize allocations?
192 : std::set<std::pair<const PatternElement*, uint32> > visited_;
193 :
194 : // The search patterns we're using. All patterns stored here must be valid.
195 : // We manually manage memory because we require a 'delete []' to be called
196 : // on each pattern, something which scoped_array does not do. Similarly,
197 : // scoped_ptr is unsuitable for use in a std::vector.
198 : std::vector<PatternElement*> patterns_;
199 :
200 : // Stores the path of matched symbol tags.
201 : SymTagVector tag_lineage_;
202 : // Stores the path of matched symbols.
203 : SymbolPtrVector symbol_lineage_;
204 :
205 : // The front of our advancing search. This stores those PatternElements that
206 : // are active.
207 : std::vector<PatternElement*> front_;
208 :
209 : // This stores the size of the front at the given search stack depth. During
210 : // the search this will never be empty.
211 : std::vector<size_t> front_size_;
212 :
213 : // This indicates which of the search patterns have been stopped.
214 : std::vector<bool> stopped_;
215 :
216 : // A stack of SymTagBitSet objects used for guiding the search at each
217 : // level of recursion. We use a single vector of these to minimize
218 : // reallocation at every call to BrowseImpl.
219 : std::vector<SymTagBitSet> sym_tags_;
220 : };
221 :
222 : // The builder namespace contains the factory functions that are used to create
223 : // search patterns. They are in their own namespace so as not to pollute the
224 : // base namespace with their short and potentially common functions names, but
225 : // also so that they can be more easily used with 'using builder' or
226 : // 'using builder::<function name>' declarations.
227 : namespace builder {
228 :
229 : typedef DiaBrowser::PatternBuilder PatternBuilder;
230 : typedef DiaBrowser::MatchCallback MatchCallback;
231 :
232 : // A lightweight proxy class for inputting arguments to the PatternBuilder
233 : // factories. This proxy allows us to hide the PatternBuilder declaration in
234 : // the .cc file.
235 : class Proxy {
236 : public:
237 : Proxy();
238 : explicit Proxy(const PatternBuilder& proxy);
239 :
240 : // These are left implicit so that SymTags and SymTagBitSets can be used
241 : // natively in the 'builder' factories.
242 : Proxy(SymTag sym_tag); // NOLINT
243 : Proxy(SymTagBitSet sym_tags); // NOLINT
244 :
245 : ~Proxy();
246 :
247 : // Dereferencing and casting operators that give access to the underlying
248 : // PatternBuilder object.
249 E : const PatternBuilder* operator->() const { return pattern_builder_; }
250 : operator const PatternBuilder*() const { return pattern_builder_; }
251 E : const PatternBuilder& operator*() const { return *pattern_builder_; }
252 E : operator const PatternBuilder&() const { return *pattern_builder_; }
253 :
254 : private:
255 : PatternBuilder* pattern_builder_;
256 : };
257 :
258 : // The functions in this namespace act as factories for PatternBuilders.
259 : // Each DIA symbol has associated with it a path of symbol tags. For a full list
260 : // of DIA symbols, refer to cvconst.h or the MSDN documentation here:
261 : // http://msdn.microsoft.com/en-us/library/bkedss5f(v=vs.80).aspx
262 : //
263 : // By convention we represent a path of symbol tags in the following manner:
264 : //
265 : // Compiland.Function.Block.Block.Data
266 : //
267 : // PatternBuilder is used to build regex-like expressions over these paths,
268 : // where each tag is treated somewhat like a letter in a standard string
269 : // regex. For example, the following pattern would exactly match the example
270 : // path, and only the example path:
271 : //
272 : // Seq(Compiland, Function, Block, Block, Data)
273 : //
274 : // Suppose we also wanted to match
275 : //
276 : // Compiland.Function.Block.Data, and
277 : // Compiland.Function.Data
278 : //
279 : // To do so, we wish to make the Block tag free to be matched zero or more
280 : // times, accomplished by the following pattern:
281 : //
282 : // Seq(Compiland, Function, Star(Block), Data)
283 : //
284 : // The patterns created by the pattern builder have an implicit '^' anchor
285 : // at the beginning, forcing them to match from the beginning of the symbol
286 : // path. In all other senses, they behave identically to their standard regex
287 : // counterparts.
288 : //
289 : // We specifically disallow any pattern that would cause a successful match
290 : // of the null string. For example, all of the following patterns would
291 : // successfully match the null string:
292 : //
293 : // Opt(Compiland)
294 : // Or(Opt(Compiland),Opt(Data))
295 : // Opt(Or(Compiland, Data))
296 : // Star(Data)
297 : //
298 : // Such patterns can be created, but will fail to be inserted into a
299 : // DiaBrowser instance.
300 : //
301 : // In order to maintain consistency with IDiaSymbol::findChildren, we treat
302 : // the special value SymTagNull as a wild-card, matching any of the other
303 : // SymTagEnum values. See MSDN documentation for more info:
304 : // http://msdn.microsoft.com/en-us/library/yfx1573w(v=vs.80).aspx
305 :
306 : // Represents a pattern that matches a single SymTag. Equivalent to
307 : // regex /a/.
308 : Proxy Tag(SymTag sym_tag);
309 :
310 : // Represents a pattern that matches a set of SymTags represented by
311 : // a SymTagBitSet. Equivalent to regex /(a|b|c)/.
312 : Proxy Tags(SymTagBitSet sym_tags);
313 :
314 : // Represents a pattern that matches at least two tags. Equivalent to
315 : // regex /(a|b|c)/.
316 : Proxy Tags(SymTag st0, SymTag st1,
317 : SymTag st2 = kSymTagInvalid, SymTag st3 = kSymTagInvalid,
318 : SymTag st4 = kSymTagInvalid, SymTag st5 = kSymTagInvalid,
319 : SymTag st6 = kSymTagInvalid, SymTag st7 = kSymTagInvalid);
320 :
321 : // Represents a pattern that matches all but the SymTags represented
322 : // by the given SymTagBitSet. Equivalent to regex pattern /[^abc]/.
323 : Proxy Not(SymTagBitSet sym_tags);
324 :
325 : // Represents a pattern that matches all but the indicated SymTags.
326 : // Equivalent to regex pattern /[^abc]/. Careful, doing Not(SymTagNull)
327 : // will allow you to build an empty SymTagSet which will match nothing.
328 : // This will fail on AddPattern, however.
329 : Proxy Not(SymTag st0,
330 : SymTag st1 = kSymTagInvalid,
331 : SymTag st2 = kSymTagInvalid, SymTag st3 = kSymTagInvalid,
332 : SymTag st4 = kSymTagInvalid, SymTag st5 = kSymTagInvalid,
333 : SymTag st6 = kSymTagInvalid, SymTag st7 = kSymTagInvalid);
334 :
335 : // Represents a pattern that matches the given sub-patterns in order.
336 : // Equivalent to regex /abc/.
337 : Proxy Seq(const Proxy& p0, const Proxy& p1,
338 : const Proxy& p2 = Proxy(), const Proxy& p3 = Proxy(),
339 : const Proxy& p4 = Proxy(), const Proxy& p5 = Proxy(),
340 : const Proxy& p6 = Proxy(), const Proxy& p7 = Proxy());
341 :
342 : // Represents a pattern that matches exactly one of the given sub-patterns.
343 : // Equivalent to regex /(a|b|c)/.
344 : Proxy Or(const Proxy& p0, const Proxy& p1,
345 : const Proxy& p2 = Proxy(), const Proxy& p3 = Proxy(),
346 : const Proxy& p4 = Proxy(), const Proxy& p5 = Proxy(),
347 : const Proxy& p6 = Proxy(), const Proxy& p7 = Proxy());
348 :
349 : // Represents a pattern that may or may not match the given sub-pattern.
350 : // Equivalent to regex /a?/.
351 : Proxy Opt(const Proxy& p);
352 :
353 : // Represents a pattern that may be matched one or more times.
354 : // Equivalent to regex /a+/.
355 : Proxy Plus(const Proxy& p);
356 :
357 : // Represents a pattern that may be matched zero or more times.
358 : // Equivalent to regex /a*/.
359 : Proxy Star(const Proxy& p);
360 :
361 : // Represents a pattern which, when fully matched, will invoke the given
362 : // @p callback. This callback can direct the behaviour of the match,
363 : // causing it to terminate early. See BrowserDirective for more details.
364 : // If two callbacks are provided the first is called when the pattern is
365 : // matched and the second is called when the matching element is popped off
366 : // the symbol stack as the DFS retreats.
367 : Proxy Callback(const Proxy& p, const MatchCallback& push_callback);
368 : Proxy Callback(const Proxy& p,
369 : const MatchCallback& push_callback,
370 : const MatchCallback& pop_callback);
371 :
372 :
373 : } // namespace builder
374 :
375 : } // namespace pe
376 :
377 : #endif // SYZYGY_PE_DIA_BROWSER_H_
|