1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // The DiaBrowser browses a Debug Interface Access (DIA) data source according
16 : // to a set of registered patterns of interest, returning the encountered
17 : // symbols to the provided callback. Patterns are constructed using the
18 : // PatternBuilder class, which themselves are built using the factory functions
19 : // in the 'builder' namespace.
20 : //
21 : // For more information on DIA, refer to the MSDN documentation:
22 : // http://msdn.microsoft.com/en-us/library/x93ctkx8(v=vs.80).aspx
23 : //
24 : // TODO(chrisha): If needed, we could allow the assignment of a pattern to a
25 : // class, and use 'single visit per class' semantics. In the absence of
26 : // a class, each pattern would be given its own class.
27 :
28 : #ifndef SYZYGY_PE_DIA_BROWSER_H_
29 : #define SYZYGY_PE_DIA_BROWSER_H_
30 :
31 : #include <dia2.h>
32 : #include <limits.h>
33 : #include <windows.h>
34 : #include <bitset>
35 : #include <set>
36 : #include <utility>
37 : #include <vector>
38 :
39 : #include "base/callback.h"
40 : #include "base/memory/scoped_ptr.h"
41 : #include "base/win/scoped_comptr.h"
42 :
43 : namespace pe {
44 :
45 : // We declare a few constants to make it easy to iterate through the SymTagEnum
46 : // (declared in cvconst.h).
47 : enum SymTagConstants {
48 : kSymTagBegin = SymTagExe,
49 : kSymTagEnd = SymTagMax,
50 : kSymTagCount = kSymTagEnd - kSymTagBegin
51 : };
52 :
53 : typedef enum SymTagEnum SymTag;
54 : typedef std::bitset<kSymTagCount> SymTagBitSet;
55 :
56 : extern const SymTag kSymTagInvalid;
57 :
58 : namespace builder {
59 : class Proxy;
60 : } // namespace builder
61 :
62 : // The DiaBrowser browses a DIA data source; see the comment at the top of this
63 : // header file for more detail.
64 : class DiaBrowser {
65 : public:
66 : typedef base::win::ScopedComPtr<IDiaSymbol> SymbolPtr;
67 : typedef std::vector<SymbolPtr> SymbolPtrVector;
68 : typedef std::vector<SymTag> SymTagVector;
69 :
70 : // Used by the callback to provide feedback regarding how the search should
71 : // proceed from the point of a partial match checkpoint, or a full match.
72 : enum BrowserDirective {
73 : // Continue browsing as per normal.
74 : kBrowserContinue,
75 :
76 : // Stop browsing on this particular search path for this pattern.
77 : kBrowserTerminatePath,
78 :
79 : // Stop browsing for any further matches to this pattern.
80 : kBrowserTerminatePattern,
81 :
82 : // Stop the browser entirely.
83 : kBrowserTerminateAll,
84 :
85 : // Stop the browser and return in error.
86 : kBrowserAbort
87 : };
88 :
89 : // The match callback is invoked for each symbol on a matched pattern element
90 : // that has a callback. The callback receives the following parameters:
91 : // 1. const DiaBrowser& dia_browser the invoking DiaBrowser
92 : // 2. const SymTagVector& tag_lineage the stack of matched tags.
93 : // 3. const SymbolPtrVector& symbol_lineage the stack of matched symbols.
94 : // It returns a BrowserDirective, telling DiaBrowser how to proceed.
95 : typedef base::Callback<BrowserDirective(
96 : const DiaBrowser&,
97 : const SymTagVector&,
98 : const SymbolPtrVector&)> MatchCallback;
99 :
100 : // The basic element of a pattern.
101 : struct PatternElement;
102 :
103 : // Acts as a lightweight interface for building patterns.
104 : // Patterns are regex-like constructions that attempt to match paths in
105 : // a tree of IDiaSymbols.
106 : class PatternBuilder;
107 :
108 : ~DiaBrowser();
109 :
110 : // Adds a pattern to the DiaBrowser. Returns false if the given pattern
111 : // can't be added. Currently, this will only occur if the given pattern
112 : // allows a null match. More precisely, a pattern will match null if the
113 : // root node of the pattern is an exit node of the entire pattern. Such
114 : // patterns are strictly forbidden. An example of such a pattern:
115 : //
116 : // Opt(SymTagCompiland)
117 : //
118 : // Similarly, any pattern that can never match will be ignored. An example
119 : // of such a pattern is: Not(SymTagNull).
120 : //
121 : // For full details on how to construct patterns, see the comment preceding
122 : // the 'builder' namespace.
123 : bool AddPattern(const builder::Proxy& pattern_builder_proxy,
124 : const MatchCallback& callback);
125 :
126 : // Browses through the DIA tree starting from the given root,
127 : // matching existing patterns and calling their callbacks.
128 : // Returns true when the browse terminates naturally, false if any errors
129 : // were encountered. You can not call Browse twice simultaneously on the
130 : // same DiaBrowser! (In other words, don't call it again from within a
131 : // callback.)
132 : bool Browse(IDiaSymbol* root);
133 :
134 : protected:
135 : // The following protected functions are intended for use by the GTest
136 : // fixture only.
137 :
138 : // Tests a vector of SymTags to see if they would match a given pattern.
139 : // Returns the number of ways this sequence matched any of the patterns.
140 : size_t TestMatch(const SymTagVector& sym_tags) const;
141 :
142 : private:
143 : // This initializes the search front and other bookkeeping structures.
144 : void PrepareForBrowse();
145 :
146 : // Cleans up our various bookkeeping data structures. After calling this
147 : // the DiaBrowser can be reused.
148 : void Reset();
149 :
150 : // This advances our search front by trying to advance the symbol with
151 : // @p sym_tag and @p symbol_id along all possible paths. Any associated
152 : // callbacks will also be invoked and their directives handled. It also
153 : // populates sym_tags with the set of tags that will match at the next level
154 : // of recursion in the search.
155 : // This can return a reduced subset of BrowserDirective, namely:
156 : // kBrowserContinue, kBrowserTerminatePath, kBrowserTerminateAll,
157 : // or kBrowserAbort.
158 : BrowserDirective PushMatch(SymTag sym_tag, uint32 symbol_id,
159 : SymTagBitSet* sym_tags);
160 :
161 : // This rolls back our search stack by one level.
162 : void PopMatch();
163 :
164 : // The actual implementation of Browse, modulo some startup stuff.
165 : // This can return a reduced subset of BrowserDirective, namely:
166 : // kBrowserContinue, kBrowserTerminateAll, or kBrowserAbort.
167 : BrowserDirective BrowseImpl(IDiaSymbol* root, size_t depth);
168 :
169 : // This iterates all symbols with symbol tag @p sym_tag that are immediate
170 : // descendants of @p root.
171 : // This can return a reduced subset of BrowserDirective, namely:
172 : // kBrowserContinue, kBrowserTerminateAll, or kBrowserAbort.
173 : BrowserDirective BrowseEnum(IDiaSymbol* root, size_t depth, SymTag sym_tag);
174 :
175 : // The set of visited nodes. The first parameter is the address of the
176 : // element that matched, the second is the actual ID of the visited node.
177 : // The PDB has cyclic connections, so we must use some form of limiting to
178 : // ensure that cycles get broken. However, we want the symbol to be reachable
179 : // via each matching pattern and not just via the first one walked.
180 : // TODO(chrisha): Use a hash_map here instead, to minimize allocations?
181 : std::set<std::pair<const PatternElement*, uint32> > visited_;
182 :
183 : // The search patterns we're using. All patterns stored here must be valid.
184 : // We manually manage memory because we require a 'delete []' to be called
185 : // on each pattern, something which scoped_array does not do. Similarly,
186 : // scoped_ptr is unsuitable for use in a std::vector.
187 : std::vector<PatternElement*> patterns_;
188 :
189 : // Stores the path of matched symbol tags.
190 : SymTagVector tag_lineage_;
191 : // Stores the path of matched symbols.
192 : SymbolPtrVector symbol_lineage_;
193 :
194 : // The front of our advancing search. This stores those PatternElements that
195 : // are active.
196 : std::vector<PatternElement*> front_;
197 :
198 : // This stores the size of the front at the given search stack depth. During
199 : // the search this will never be empty.
200 : std::vector<size_t> front_size_;
201 :
202 : // This indicates which of the search patterns have been stopped.
203 : std::vector<bool> stopped_;
204 :
205 : // A stack of SymTagBitSet objects used for guiding the search at each
206 : // level of recursion. We use a single vector of these to minimize
207 : // reallocation at every call to BrowseImpl.
208 : std::vector<SymTagBitSet> sym_tags_;
209 : };
210 :
211 : // The builder namespace contains the factory functions that are used to create
212 : // search patterns. They are in their own namespace so as not to pollute the
213 : // base namespace with their short and potentially common functions names, but
214 : // also so that they can be more easily used with 'using builder' or
215 : // 'using builder::<function name>' declarations.
216 : namespace builder {
217 :
218 : typedef DiaBrowser::PatternBuilder PatternBuilder;
219 : typedef DiaBrowser::MatchCallback MatchCallback;
220 :
221 : // A lightweight proxy class for inputting arguments to the PatternBuilder
222 : // factories. This proxy allows us to hide the PatternBuilder declaration in
223 : // the .cc file.
224 : class Proxy {
225 : public:
226 : Proxy();
227 : explicit Proxy(const PatternBuilder& proxy);
228 :
229 : // These are left implicit so that SymTags and SymTagBitSets can be used
230 : // natively in the 'builder' factories.
231 : Proxy(SymTag sym_tag); // NOLINT
232 : Proxy(SymTagBitSet sym_tags); // NOLINT
233 :
234 : ~Proxy();
235 :
236 : // Dereferencing and casting operators that give access to the underlying
237 : // PatternBuilder object.
238 E : const PatternBuilder* operator->() const { return pattern_builder_; }
239 : operator const PatternBuilder*() const { return pattern_builder_; }
240 E : const PatternBuilder& operator*() const { return *pattern_builder_; }
241 E : operator const PatternBuilder&() const { return *pattern_builder_; }
242 :
243 : private:
244 : PatternBuilder* pattern_builder_;
245 : };
246 :
247 : // The functions in this namespace act as factories for PatternBuilders.
248 : // Each DIA symbol has associated with it a path of symbol tags. For a full list
249 : // of DIA symbols, refer to cvconst.h or the MSDN documentation here:
250 : // http://msdn.microsoft.com/en-us/library/bkedss5f(v=vs.80).aspx
251 : //
252 : // By convention we represent a path of symbol tags in the following manner:
253 : //
254 : // Compiland.Function.Block.Block.Data
255 : //
256 : // PatternBuilder is used to build regex-like expressions over these paths,
257 : // where each tag is treated somewhat like a letter in a standard string
258 : // regex. For example, the following pattern would exactly match the example
259 : // path, and only the example path:
260 : //
261 : // Seq(Compiland, Function, Block, Block, Data)
262 : //
263 : // Suppose we also wanted to match
264 : //
265 : // Compiland.Function.Block.Data, and
266 : // Compiland.Function.Data
267 : //
268 : // To do so, we wish to make the Block tag free to be matched zero or more
269 : // times, accomplished by the following pattern:
270 : //
271 : // Seq(Compiland, Function, Star(Block), Data)
272 : //
273 : // The patterns created by the pattern builder have an implicit '^' anchor
274 : // at the beginning, forcing them to match from the beginning of the symbol
275 : // path. In all other senses, they behave identically to their standard regex
276 : // counterparts.
277 : //
278 : // We specifically disallow any pattern that would cause a successful match
279 : // of the null string. For example, all of the following patterns would
280 : // successfully match the null string:
281 : //
282 : // Opt(Compiland)
283 : // Or(Opt(Compiland),Opt(Data))
284 : // Opt(Or(Compiland, Data))
285 : // Star(Data)
286 : //
287 : // Such patterns can be created, but will fail to be inserted into a
288 : // DiaBrowser instance.
289 : //
290 : // In order to maintain consistency with IDiaSymbol::findChildren, we treat
291 : // the special value SymTagNull as a wild-card, matching any of the other
292 : // SymTagEnum values. See MSDN documentation for more info:
293 : // http://msdn.microsoft.com/en-us/library/yfx1573w(v=vs.80).aspx
294 :
295 : // Represents a pattern that matches a single SymTag. Equivalent to
296 : // regex /a/.
297 : Proxy Tag(SymTag sym_tag);
298 :
299 : // Represents a pattern that matches a set of SymTags represented by
300 : // a SymTagBitSet. Equivalent to regex /(a|b|c)/.
301 : Proxy Tags(SymTagBitSet sym_tags);
302 :
303 : // Represents a pattern that matches at least two tags. Equivalent to
304 : // regex /(a|b|c)/.
305 : Proxy Tags(SymTag st0, SymTag st1,
306 : SymTag st2 = kSymTagInvalid, SymTag st3 = kSymTagInvalid,
307 : SymTag st4 = kSymTagInvalid, SymTag st5 = kSymTagInvalid,
308 : SymTag st6 = kSymTagInvalid, SymTag st7 = kSymTagInvalid);
309 :
310 : // Represents a pattern that matches all but the SymTags represented
311 : // by the given SymTagBitSet. Equivalent to regex pattern /[^abc]/.
312 : Proxy Not(SymTagBitSet sym_tags);
313 :
314 : // Represents a pattern that matches all but the indicated SymTags.
315 : // Equivalent to regex pattern /[^abc]/. Careful, doing Not(SymTagNull)
316 : // will allow you to build an empty SymTagSet which will match nothing.
317 : // This will fail on AddPattern, however.
318 : Proxy Not(SymTag st0,
319 : SymTag st1 = kSymTagInvalid,
320 : SymTag st2 = kSymTagInvalid, SymTag st3 = kSymTagInvalid,
321 : SymTag st4 = kSymTagInvalid, SymTag st5 = kSymTagInvalid,
322 : SymTag st6 = kSymTagInvalid, SymTag st7 = kSymTagInvalid);
323 :
324 : // Represents a pattern that matches the given sub-patterns in order.
325 : // Equivalent to regex /abc/.
326 : Proxy Seq(const Proxy& p0, const Proxy& p1,
327 : const Proxy& p2 = Proxy(), const Proxy& p3 = Proxy(),
328 : const Proxy& p4 = Proxy(), const Proxy& p5 = Proxy(),
329 : const Proxy& p6 = Proxy(), const Proxy& p7 = Proxy());
330 :
331 : // Represents a pattern that matches exactly one of the given sub-patterns.
332 : // Equivalent to regex /(a|b|c)/.
333 : Proxy Or(const Proxy& p0, const Proxy& p1,
334 : const Proxy& p2 = Proxy(), const Proxy& p3 = Proxy(),
335 : const Proxy& p4 = Proxy(), const Proxy& p5 = Proxy(),
336 : const Proxy& p6 = Proxy(), const Proxy& p7 = Proxy());
337 :
338 : // Represents a pattern that may or may not match the given sub-pattern.
339 : // Equivalent to regex /a?/.
340 : Proxy Opt(const Proxy& p);
341 :
342 : // Represents a pattern that may be matched one or more times.
343 : // Equivalent to regex /a+/.
344 : Proxy Plus(const Proxy& p);
345 :
346 : // Represents a pattern that may be matched zero or more times.
347 : // Equivalent to regex /a*/.
348 : Proxy Star(const Proxy& p);
349 :
350 : // Represents a pattern which, when fully matched, will invoke the given
351 : // @p callback. This callback can direct the behaviour of the match,
352 : // causing it to terminate early. See BrowserDirective for more details.
353 : Proxy Callback(const Proxy& p, const MatchCallback& callback);
354 :
355 : } // namespace builder
356 :
357 : } // namespace pe
358 :
359 : #endif // SYZYGY_PE_DIA_BROWSER_H_
|