1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/genfilter/filter_compiler.h"
16 :
17 : #include <stdio.h>
18 :
19 : #include "base/bind.h"
20 : #include "base/files/file_util.h"
21 : #include "base/strings/string_util.h"
22 : #include "base/strings/stringprintf.h"
23 : #include "base/strings/utf_string_conversions.h"
24 : #include "base/win/scoped_bstr.h"
25 : #include "base/win/scoped_com_initializer.h"
26 : #include "syzygy/common/com_utils.h"
27 : #include "syzygy/pe/dia_util.h"
28 : #include "syzygy/pe/find.h"
29 :
30 : namespace genfilter {
31 :
32 : namespace {
33 :
34 : const char kFunction[] = "function";
35 : const char kPublicSymbol[] = "public_symbol";
36 :
37 : const char* kRuleTypeStrings[] = { kFunction, kPublicSymbol };
38 : static_assert(arraysize(kRuleTypeStrings) == FilterCompiler::kRuleTypeCount,
39 : "Rule type string out of sync.");
40 :
41 : // Read a newline terminated line from a file. The newline is part of the
42 : // returned string.
43 E : bool ReadLine(FILE* file, std::string* line) {
44 E : DCHECK(file != NULL);
45 E : DCHECK(line != NULL);
46 :
47 E : line->clear();
48 E : while (true) {
49 E : errno = 0;
50 E : int c = ::fgetc(file);
51 E : if (c == EOF) {
52 E : if (errno != 0)
53 i : return false;
54 E : return true;
55 : }
56 :
57 E : line->append(1, static_cast<char>(c));
58 E : if (c == '\n')
59 E : return true;
60 E : }
61 E : }
62 :
63 : // Trims any comments from the provided string.
64 E : void TrimComment(std::string* s) {
65 E : DCHECK(s != NULL);
66 E : size_t comment_index = s->find_first_of('#');
67 E : if (comment_index == std::string::npos)
68 E : return;
69 :
70 E : s->resize(comment_index);
71 E : }
72 :
73 : } // namespace
74 :
75 E : bool FilterCompiler::Init(const base::FilePath& image_path) {
76 E : return Init(image_path, base::FilePath());
77 E : }
78 :
79 : bool FilterCompiler::Init(const base::FilePath& image_path,
80 E : const base::FilePath& pdb_path) {
81 E : image_path_ = image_path;
82 E : pdb_path_ = pdb_path;
83 :
84 : // Get the PDB path if none was provided.
85 E : if (pdb_path_.empty()) {
86 : // This logs verbosely for us on failure.
87 E : if (!pe::FindPdbForModule(image_path_, &pdb_path_))
88 E : return false;
89 :
90 E : if (pdb_path_.empty()) {
91 i : LOG(ERROR) << "Unable to find PDB for image: " << image_path_.value();
92 i : return false;
93 : }
94 E : } else {
95 : // If a PDB path was provided make sure it matches the image file.
96 E : if (!pe::PeAndPdbAreMatched(image_path_, pdb_path_)) {
97 E : LOG(ERROR) << "PDB file \"" << pdb_path_.value() << "\" does not match "
98 : << "image file \"" << image_path_.value() << "\".";
99 E : return false;
100 : }
101 : }
102 :
103 : // Get the module signature.
104 E : pe::PEFile pe_file;
105 E : if (!pe_file.Init(image_path)) {
106 i : LOG(ERROR) << "Unable to read module: " << image_path_.value();
107 i : return false;
108 : }
109 E : pe_file.GetSignature(&image_signature_);
110 :
111 E : return true;
112 E : }
113 :
114 : bool FilterCompiler::AddRule(ModificationType modification_type,
115 : RuleType rule_type,
116 E : const base::StringPiece& description) {
117 E : DCHECK_LE(0, rule_type);
118 E : DCHECK_GT(kRuleTypeCount, rule_type);
119 :
120 : // Generate source information for this rule.
121 E : std::string source_info("(no source file): ");
122 E : source_info.append(1, modification_type == kAddToFilter ? '+' : '-');
123 E : source_info.append(kRuleTypeStrings[rule_type]);
124 E : source_info.append(1, ':');
125 E : source_info.append(description.as_string());
126 :
127 E : if (!AddRule(modification_type, rule_type, description, source_info))
128 E : return false;
129 :
130 E : return true;
131 E : }
132 :
133 E : bool FilterCompiler::ParseFilterDescriptionFile(const base::FilePath& path) {
134 E : base::ScopedFILE file(base::OpenFile(path, "rb"));
135 E : if (file.get() == NULL) {
136 E : LOG(ERROR) << "Unable to open \"" << path.value() << "\" for reading.";
137 E : return false;
138 : }
139 :
140 E : static const RE kRuleRegex("^([+-])\\s*([a-zA-Z_]+)\\s*:\\s*(.+)$");
141 :
142 : // Convert the path to ASCII.
143 E : std::string path_utf8 = base::WideToUTF8(path.value());
144 :
145 : // Process the file one line at a time.
146 E : std::string line;
147 E : size_t line_number = 0;
148 E : size_t rules_added = 0;
149 E : while (!::feof(file.get())) {
150 E : ++line_number;
151 E : if (!ReadLine(file.get(), &line)) {
152 i : LOG(ERROR) << "Error reading from \"" << path.value() << "\".";
153 i : return false;
154 : }
155 E : TrimComment(&line);
156 E : base::TrimWhitespaceASCII(line, base::TRIM_ALL, &line);
157 :
158 : // Skip empty lines.
159 E : if (line.empty())
160 E : continue;
161 :
162 : // Parse the rule.
163 E : std::string mod, type, desc;
164 E : if (!kRuleRegex.FullMatch(line, &mod, &type, &desc)) {
165 E : LOG(ERROR) << "Unable to parse rule at line " << line_number
166 : << " of \"" << path.value() << "\".";
167 E : LOG(ERROR) << " Content: " << line;
168 E : return false;
169 : }
170 :
171 : // We are guaranteed that |mod| is "+" or "-" if the regex matches.
172 E : DCHECK_EQ(1u, mod.size());
173 : ModificationType mod_type =
174 E : (mod[0] == '+' ? kAddToFilter : kSubtractFromFilter);
175 :
176 : // Get the rule type.
177 E : RuleType rule_type = kFunctionRule;
178 E : type = base::ToLowerASCII(type);
179 E : if (type == kFunction) {
180 E : rule_type = kFunctionRule;
181 E : } else if (type == kPublicSymbol) {
182 E : rule_type = kPublicSymbolRule;
183 E : } else {
184 E : LOG(ERROR) << "Unknown rule type \"" << type << "\" at line "
185 : << line_number << " of \"" << path.value() << "\".";
186 E : return false;
187 : }
188 :
189 : // Generate the source information. This is so that we can have meaningful
190 : // log messages.
191 E : std::string source_info(path_utf8);
192 E : source_info.append(base::StringPrintf("(%d): ", line_number));
193 E : source_info.append(line);
194 :
195 : // Add the rule.
196 E : DCHECK(!desc.empty());
197 E : if (!AddRule(mod_type, rule_type, desc, source_info))
198 E : return false;
199 E : ++rules_added;
200 E : }
201 :
202 E : LOG(INFO) << "Added " << rules_added << " rule(s) from \"" << path.value()
203 : << "\".";
204 :
205 E : return true;
206 E : }
207 :
208 E : bool FilterCompiler::Compile(ImageFilter* filter) {
209 E : DCHECK(filter != NULL);
210 :
211 E : if (!CrawlSymbols())
212 i : return false;
213 :
214 E : if (!FillFilter(filter))
215 i : return false;
216 :
217 E : return true;
218 E : }
219 :
220 : bool FilterCompiler::AddRule(ModificationType modification_type,
221 : RuleType rule_type,
222 : const base::StringPiece& description,
223 E : const base::StringPiece& source_info) {
224 E : DCHECK_LE(0, rule_type);
225 E : DCHECK_GT(kRuleTypeCount, rule_type);
226 :
227 E : size_t index = rule_map_.size();
228 : Rule rule(index, modification_type, rule_type, image_signature_,
229 E : description, source_info);
230 :
231 E : if (!rule.regex.error().empty()) {
232 E : LOG(ERROR) << "Error adding rule.";
233 E : LOG(ERROR) << " Source: " << source_info;
234 E : LOG(ERROR) << " Error: " << rule.regex.error();
235 E : return false;
236 : }
237 :
238 : RuleMap::iterator rule_it =
239 E : rule_map_.insert(std::make_pair(index, rule)).first;
240 E : Rule* rule_ptr = &rule_it->second;
241 :
242 : // Update the vectors of rules by type.
243 E : rules_by_type_[rule_type].push_back(rule_ptr);
244 :
245 E : return true;
246 E : }
247 :
248 E : bool FilterCompiler::CrawlSymbols() {
249 : // We can bail early if there's no work to do.
250 E : if (rule_map_.empty())
251 i : return true;
252 :
253 E : base::win::ScopedComPtr<IDiaDataSource> data_source;
254 E : if (!pe::CreateDiaSource(data_source.Receive()))
255 i : return false;
256 :
257 E : base::win::ScopedComPtr<IDiaSession> session;
258 E : if (!pe::CreateDiaSession(pdb_path_, data_source.get(), session.Receive()))
259 i : return false;
260 :
261 : // Visit all compilands looking for symbols if we need to.
262 E : if (!rules_by_type_[kFunctionRule].empty()) {
263 E : pe::CompilandVisitor compiland_visitor(session.get());
264 : if (!compiland_visitor.VisitAllCompilands(
265 : base::Bind(&FilterCompiler::OnCompiland,
266 E : base::Unretained(this)))) {
267 i : return false;
268 : }
269 E : }
270 :
271 : // Visit public symbols if necessary.
272 E : if (!rules_by_type_[kPublicSymbolRule].empty()) {
273 : // Grab the global scope
274 E : base::win::ScopedComPtr<IDiaSymbol> global;
275 E : HRESULT hr = session->get_globalScope(global.Receive());
276 E : if (FAILED(hr)) {
277 i : LOG(ERROR) << "Failed to get the DIA global scope: "
278 : << common::LogHr(hr) << ".";
279 i : return false;
280 : }
281 :
282 E : pe::ChildVisitor public_symbol_visitor(global.get(), SymTagPublicSymbol);
283 : if (!public_symbol_visitor.VisitChildren(
284 : base::Bind(&FilterCompiler::OnPublicSymbol,
285 E : base::Unretained(this)))) {
286 i : return false;
287 : }
288 E : }
289 :
290 E : return true;
291 E : }
292 :
293 E : bool FilterCompiler::FillFilter(ImageFilter* filter) {
294 E : DCHECK(filter != NULL);
295 :
296 E : filter->signature = image_signature_;
297 : filter->filter = RelativeAddressFilter(
298 E : Range(RelativeAddress(0), image_signature_.module_size));
299 :
300 E : RuleMap::const_iterator it = rule_map_.begin();
301 E : size_t unmatched_rules = 0;
302 E : for (; it != rule_map_.end(); ++it) {
303 E : const Rule& rule = it->second;
304 E : if (rule.ranges.empty()) {
305 E : ++unmatched_rules;
306 E : LOG(WARNING) << "Unmatched rule: " << rule.source_info;
307 E : continue;
308 : }
309 :
310 : // Update the global filter with ranges matching this rule.
311 E : if (rule.modification_type == kAddToFilter) {
312 E : filter->filter.Union(rule.ranges, &filter->filter);
313 E : } else {
314 E : DCHECK_EQ(kSubtractFromFilter, rule.modification_type);
315 E : filter->filter.Subtract(rule.ranges, &filter->filter);
316 : }
317 E : }
318 :
319 E : if (unmatched_rules)
320 E : LOG(WARNING) << "There were " << unmatched_rules << " unmatched rule(s).";
321 :
322 E : return true;
323 E : }
324 :
325 E : bool FilterCompiler::OnCompiland(IDiaSymbol* compiland) {
326 E : DCHECK(compiland != NULL);
327 E : pe::ChildVisitor function_visitor(compiland, SymTagFunction);
328 : if (!function_visitor.VisitChildren(
329 : base::Bind(&FilterCompiler::OnFunction,
330 E : base::Unretained(this)))) {
331 i : return false;
332 : }
333 E : return true;
334 E : }
335 :
336 E : bool FilterCompiler::OnFunction(IDiaSymbol* function) {
337 E : DCHECK(function != NULL);
338 E : if (!MatchRulesBySymbolName(rules_by_type_[kFunctionRule], function))
339 i : return false;
340 E : return true;
341 E : }
342 :
343 E : bool FilterCompiler::OnPublicSymbol(IDiaSymbol* public_symbol) {
344 E : DCHECK(public_symbol != NULL);
345 E : if (!MatchRulesBySymbolName(rules_by_type_[kPublicSymbolRule], public_symbol))
346 i : return false;
347 E : return true;
348 E : }
349 :
350 : bool FilterCompiler::MatchRulesBySymbolName(const RulePointers& rules,
351 E : IDiaSymbol* symbol) {
352 E : DCHECK(symbol != NULL);
353 :
354 : // Get the symbol properties.
355 E : base::win::ScopedBstr name_bstr;
356 E : DWORD rva = 0;
357 E : ULONGLONG length = 0;
358 E : HRESULT hr = E_FAIL;
359 : if ((hr = symbol->get_name(name_bstr.Receive())) != S_OK ||
360 : (hr = symbol->get_relativeVirtualAddress(&rva)) != S_OK ||
361 E : (hr = symbol->get_length(&length)) != S_OK) {
362 : // For some public symbols get_relativeVirtualAddress fails. We can safely
363 : // ignore these failures.
364 E : return true;
365 : }
366 :
367 : // Convert the name to ASCII.
368 E : std::string name;
369 E : if (!base::WideToUTF8(name_bstr, name_bstr.Length(), &name)) {
370 i : LOG(ERROR) << "Failed to convert symbol name to UTF8: "
371 : << common::ToString(name_bstr);
372 i : return false;
373 : }
374 :
375 : // Look for any matching rules and update the associated image ranges.
376 E : RulePointers::const_iterator it = rules.begin();
377 E : for (; it != rules.end(); ++it) {
378 E : Rule* rule = *it;
379 E : if (rule->regex.FullMatch(name))
380 E : rule->ranges.Mark(Range(RelativeAddress(rva), length));
381 E : }
382 :
383 E : return true;
384 E : }
385 :
386 : } // namespace genfilter
|