1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/grinder/indexed_frequency_data_serializer.h"
16 :
17 : #include <string>
18 : #include <utility>
19 :
20 : #include "base/files/file_path.h"
21 : #include "base/json/json_reader.h"
22 : #include "base/strings/stringprintf.h"
23 : #include "syzygy/common/indexed_frequency_data.h"
24 : #include "syzygy/core/json_file_writer.h"
25 : #include "syzygy/pdb/pdb_reader.h"
26 : #include "syzygy/pdb/pdb_util.h"
27 : #include "syzygy/pe/find.h"
28 : #include "syzygy/pe/metadata.h"
29 : #include "syzygy/pe/pe_file.h"
30 :
31 : namespace grinder {
32 :
33 : namespace {
34 :
35 : using basic_block_util::EntryCountType;
36 : using basic_block_util::IndexedFrequencyInformation;
37 : using basic_block_util::IndexedFrequencyMap;
38 : using basic_block_util::ModuleIndexedFrequencyMap;
39 : using basic_block_util::ModuleInformation;
40 : using core::JSONFileWriter;
41 : using core::RelativeAddress;
42 :
43 : const char kMetadata[] = "metadata";
44 : const char kFrequencies[] = "frequencies";
45 : const char kDescription[] = "description";
46 : const char kNumEntriesKey[] = "num_entries";
47 : const char kNumColumnsKey[] = "num_columns";
48 : const char kDataTypeKey[] = "data_type";
49 : const char kFrequencySizeKey[] = "frequency_size";
50 :
51 : bool OutputFrequencyData(
52 : JSONFileWriter* writer,
53 : const ModuleInformation& module_information,
54 E : const IndexedFrequencyInformation& frequency_info) {
55 E : DCHECK(writer != NULL);
56 :
57 : // Start a new dictionary.
58 E : if (!writer->OpenDict())
59 i : return false;
60 :
61 : // Pour the module information into a PE Metadata object, for convenient
62 : // JSON serialization.
63 E : pe::Metadata metadata;
64 E : if (!metadata.Init(pe::PEFile::Signature(module_information)))
65 i : return false;
66 :
67 : // Output the module metadata.
68 E : if (!writer->OutputKey(kMetadata) || !metadata.SaveToJSON(writer))
69 i : return false;
70 :
71 : // Output the module information.
72 E : std::string data_type_str;
73 : if (!common::IndexedFrequencyDataTypeToString(frequency_info.data_type,
74 E : &data_type_str)) {
75 i : return false;
76 : }
77 : if (!writer->OutputComment("Indexed frequency data module description.") ||
78 : !writer->OutputKey(kDescription) ||
79 : !writer->OpenDict() ||
80 : !writer->OutputKey(kNumEntriesKey) ||
81 : !writer->OutputInteger(frequency_info.num_entries) ||
82 : !writer->OutputKey(kNumColumnsKey) ||
83 : !writer->OutputInteger(frequency_info.num_columns) ||
84 : !writer->OutputKey(kDataTypeKey) ||
85 : !writer->OutputString(data_type_str) ||
86 : !writer->OutputKey(kFrequencySizeKey) ||
87 : !writer->OutputInteger(frequency_info.frequency_size) ||
88 E : !writer->CloseDict()) {
89 i : return false;
90 : }
91 :
92 : // Output the frequency array.
93 E : const IndexedFrequencyMap& frequencies = frequency_info.frequency_map;
94 : if (!writer->OutputComment(base::StringPrintf(
95 : "%d basic-block frequencies.", frequencies.size()).c_str()) ||
96 : !writer->OutputKey(kFrequencies) ||
97 E : !writer->OpenList()) {
98 i : return false;
99 : }
100 :
101 : // Build a set of keys to output.
102 E : size_t num_columns = 0;
103 E : std::set<RelativeAddress> keys;
104 E : IndexedFrequencyMap::const_iterator it = frequencies.begin();
105 E : for (; it != frequencies.end(); ++it) {
106 E : RelativeAddress addr = it->first.first;
107 E : size_t column = it->first.second;
108 E : if (it->second != 0) {
109 E : keys.insert(addr);
110 E : num_columns = std::max(num_columns, column + 1);
111 : }
112 E : }
113 :
114 : // For each key with at least one non-zero column, output a block with each
115 : // column.
116 E : std::set<RelativeAddress>::iterator key = keys.begin();
117 E : for (; key != keys.end(); ++key) {
118 E : if (!writer->OpenList() || !writer->OutputInteger(key->value()))
119 i : return false;
120 E : for (size_t column = 0; column < num_columns; ++column) {
121 : IndexedFrequencyMap::const_iterator data =
122 E : frequencies.find(std::make_pair(*key, column));
123 E : int32 value = 0;
124 E : if (data != frequencies.end())
125 E : value = data->second;
126 E : if (!writer->OutputInteger(value))
127 i : return false;
128 E : }
129 E : if (!writer->CloseList())
130 i : return false;
131 E : }
132 :
133 : // Close the entry count array.
134 E : if (!writer->CloseList())
135 i : return false;
136 :
137 : // Close the dictionary.
138 E : if (!writer->CloseDict())
139 i : return false;
140 :
141 : // And we're done.
142 E : return true;
143 E : }
144 :
145 : bool ReadFrequencyData(const base::DictionaryValue* dict_value,
146 E : ModuleIndexedFrequencyMap* module_frequency_map) {
147 E : DCHECK(dict_value != NULL);
148 E : DCHECK(module_frequency_map != NULL);
149 :
150 : // Load the metadata about the image.
151 E : const base::DictionaryValue* metadata_dict = NULL;
152 E : if (!dict_value->GetDictionary(kMetadata, &metadata_dict)) {
153 E : LOG(ERROR) << "Missing or invalid " << kMetadata << " entry.";
154 E : return false;
155 : }
156 :
157 E : pe::Metadata metadata;
158 E : if (!metadata.LoadFromJSON(*metadata_dict)) {
159 : // The loader will log any errors.
160 i : return false;
161 : }
162 :
163 : // Extract the information list.
164 E : const base::DictionaryValue* information_dict = NULL;
165 E : if (!dict_value->GetDictionary(kDescription, &information_dict)) {
166 E : LOG(ERROR) << "Missing or invalid " << kDescription << " entry.";
167 E : return false;
168 : }
169 :
170 : // Extract the frequencies list.
171 E : const base::ListValue* frequency_list = NULL;
172 E : if (!dict_value->GetList(kFrequencies, &frequency_list)) {
173 E : LOG(ERROR) << "Missing or invalid " << kFrequencies << " entry.";
174 E : return false;
175 : }
176 :
177 : // Insert a new IndexedFrequencyMap record for this module.
178 E : const ModuleInformation& module_information = metadata.module_signature();
179 : std::pair<ModuleIndexedFrequencyMap::iterator, bool> result =
180 : module_frequency_map->insert(std::make_pair(
181 E : module_information, IndexedFrequencyInformation()));
182 :
183 : // Validate that we really did insert a new module into the map.
184 E : if (!result.second) {
185 i : LOG(ERROR) << "Found duplicate entries for " << module_information.path
186 : << ".";
187 i : return false;
188 : }
189 :
190 : // Populate frequency information.
191 E : IndexedFrequencyInformation& frequency_info = result.first->second;
192 E : int32 info_num_entries = 0;
193 E : int32 info_num_columns = 0;
194 E : std::string info_data_type_str;
195 E : int32 info_frequency_size = 0;
196 : if (!information_dict->GetInteger(kNumEntriesKey, &info_num_entries) ||
197 : !information_dict->GetInteger(kNumColumnsKey, &info_num_columns) ||
198 : !information_dict->GetString(kDataTypeKey, &info_data_type_str) ||
199 E : !information_dict->GetInteger(kFrequencySizeKey, &info_frequency_size)) {
200 i : return false;
201 : }
202 E : frequency_info.num_entries = info_num_entries;
203 E : frequency_info.num_columns = info_num_columns;
204 : if (!common::ParseFrequencyDataType(info_data_type_str,
205 E : &frequency_info.data_type)) {
206 i : return false;
207 : }
208 E : frequency_info.frequency_size = info_frequency_size;
209 :
210 : // Populate the IndexedFrequencyMap with the values in the list.
211 E : IndexedFrequencyMap& values = result.first->second.frequency_map;
212 E : size_t num_entries = frequency_list->GetSize();
213 E : for (size_t i = 0; i < num_entries; ++i) {
214 E : const base::ListValue* entry = NULL;
215 E : if (!frequency_list->GetList(i, &entry))
216 E : return false;
217 E : size_t num_columns = entry->GetSize();
218 E : if (num_columns == 0)
219 i : return false;
220 :
221 : // Get the basic block RVA.
222 E : int32 address = 0;
223 E : if (!entry->GetInteger(0, &address))
224 i : return false;
225 E : if (address < 0) {
226 i : LOG(ERROR) << "Invalid relative address in frequency list.";
227 i : return false;
228 : }
229 :
230 : // Retrieve each column.
231 E : for (size_t column = 1; column < num_columns; ++column) {
232 E : basic_block_util::EntryCountType entry_count = 0;
233 E : if (!entry->GetInteger(column, &entry_count))
234 i : return false;
235 E : if (entry_count < 0) {
236 i : LOG(ERROR) << "Invalid value in frequency list.";
237 i : return false;
238 : }
239 :
240 : // Add this entry to our map.
241 : if (!values.insert(std::make_pair(std::make_pair(
242 E : core::RelativeAddress(address), column - 1), entry_count)).second) {
243 i : LOG(ERROR) << "Duplicate basic block address in frequency list.";
244 i : return false;
245 : }
246 E : }
247 E : }
248 :
249 : // And we're done.
250 E : return true;
251 E : }
252 :
253 : } // namespace
254 :
255 : IndexedFrequencyDataSerializer::IndexedFrequencyDataSerializer()
256 E : : pretty_print_(false) {
257 E : }
258 :
259 : bool IndexedFrequencyDataSerializer::SaveAsJson(
260 E : const ModuleIndexedFrequencyMap& frequency_map, FILE* file) {
261 E : DCHECK(file != NULL);
262 E : core::JSONFileWriter writer(file, pretty_print_);
263 :
264 : // Open the list;
265 E : if (!writer.OpenList())
266 i : return false;
267 :
268 : // Output each entry;
269 E : ModuleIndexedFrequencyMap::const_iterator it = frequency_map.begin();
270 E : for (; it != frequency_map.end(); ++it) {
271 E : if (!OutputFrequencyData(&writer, it->first, it->second))
272 i : return false;
273 E : }
274 :
275 : // Close the list.
276 E : if (!writer.CloseList())
277 i : return false;
278 :
279 E : return true;
280 E : }
281 :
282 : bool IndexedFrequencyDataSerializer::SaveAsJson(
283 : const ModuleIndexedFrequencyMap& frequency_map,
284 E : const base::FilePath& path) {
285 E : DCHECK(!path.empty());
286 E : base::ScopedFILE file(base::OpenFile(path, "wb"));
287 E : if (file.get() == NULL) {
288 i : LOG(ERROR) << "Failed to open " << path.value() << " for reading.";
289 i : return false;
290 : }
291 :
292 E : if (!SaveAsJson(frequency_map, file.get())) {
293 i : LOG(ERROR) << "Failed to write JSON data to " << path.value() << ".";
294 i : return false;
295 : }
296 :
297 E : return true;
298 E : }
299 :
300 : bool IndexedFrequencyDataSerializer::LoadFromJson(
301 : const base::FilePath& path,
302 E : ModuleIndexedFrequencyMap* module_frequency_map) {
303 E : DCHECK(module_frequency_map != NULL);
304 E : DCHECK(!path.empty());
305 :
306 E : std::string json_string;
307 E : if (!base::ReadFileToString(path, &json_string)) {
308 E : LOG(ERROR) << "Failed to read '" << path.value() << "'.";
309 E : return false;
310 : }
311 :
312 E : base::JSONReader json_reader;
313 E : std::string error_msg;
314 : scoped_ptr<base::Value> json_value(
315 : json_reader.ReadAndReturnError(
316 E : json_string, base::JSON_ALLOW_TRAILING_COMMAS, NULL, &error_msg));
317 E : if (json_value.get() == NULL) {
318 E : LOG(ERROR) << "Failed to parse '" << path.value() << "' as JSON ("
319 : << error_msg << ").";
320 E : return false;
321 : }
322 :
323 E : if (!PopulateFromJsonValue(json_value.get(), module_frequency_map))
324 i : return false;
325 :
326 E : return true;
327 E : }
328 :
329 : bool IndexedFrequencyDataSerializer::PopulateFromJsonValue(
330 : const base::Value* json_value,
331 E : ModuleIndexedFrequencyMap* module_frequency_map) {
332 E : DCHECK(json_value != NULL);
333 E : DCHECK(module_frequency_map != NULL);
334 :
335 E : module_frequency_map->clear();
336 :
337 : // Extract the top level list of module.
338 E : const base::ListValue* module_list = NULL;
339 E : if (!json_value->GetAsList(&module_list)) {
340 E : LOG(ERROR) << "Expected a list as the top level JSON construct.";
341 E : return false;
342 : }
343 :
344 : // Extract each module.
345 E : size_t num_modules = module_list->GetSize();
346 E : for (size_t i = 0; i < num_modules; ++i) {
347 E : const base::DictionaryValue* dict_value = NULL;
348 E : if (!module_list->GetDictionary(i, &dict_value)) {
349 E : LOG(ERROR) << "Invalid type for entry " << i << ".";
350 E : return false;
351 : }
352 E : if (!ReadFrequencyData(dict_value, module_frequency_map)) {
353 : // ReadFrequencyData() has already logged the error.
354 E : return false;
355 : }
356 E : }
357 :
358 E : return true;
359 E : }
360 :
361 : } // namespace grinder
|