1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/grinder/indexed_frequency_data_serializer.h"
16 :
17 : #include <string>
18 : #include <utility>
19 :
20 : #include "base/stringprintf.h"
21 : #include "base/files/file_path.h"
22 : #include "base/json/json_reader.h"
23 : #include "syzygy/common/indexed_frequency_data.h"
24 : #include "syzygy/core/json_file_writer.h"
25 : #include "syzygy/pdb/pdb_reader.h"
26 : #include "syzygy/pdb/pdb_util.h"
27 : #include "syzygy/pe/find.h"
28 : #include "syzygy/pe/metadata.h"
29 : #include "syzygy/pe/pe_file.h"
30 :
31 : namespace grinder {
32 :
33 : namespace {
34 :
35 : using basic_block_util::EntryCountType;
36 : using basic_block_util::IndexedFrequencyInformation;
37 : using basic_block_util::IndexedFrequencyMap;
38 : using basic_block_util::ModuleIndexedFrequencyMap;
39 : using basic_block_util::ModuleInformation;
40 : using core::JSONFileWriter;
41 : using core::RelativeAddress;
42 :
43 : const char kMetadata[] = "metadata";
44 : const char kFrequencies[] = "frequencies";
45 : const char kDescription[] = "description";
46 : const char kNumEntriesKey[] = "num_entries";
47 : const char kNumColumnsKey[] = "num_columns";
48 : const char kDataTypeKey[] = "data_type";
49 : const char kFrequencySizeKey[] = "frequency_size";
50 :
51 : bool OutputFrequencyData(
52 : JSONFileWriter* writer,
53 : const ModuleInformation& module_information,
54 E : const IndexedFrequencyInformation& frequency_info) {
55 E : DCHECK(writer != NULL);
56 :
57 : // Start a new dictionary.
58 E : if (!writer->OpenDict())
59 i : return false;
60 :
61 : // Pour the module information into a PE Metadata object, for convenient
62 : // JSON serialization.
63 E : pe::Metadata metadata;
64 E : if (!metadata.Init(pe::PEFile::Signature(module_information)))
65 i : return false;
66 :
67 : // Output the module metadata.
68 E : if (!writer->OutputKey(kMetadata) || !metadata.SaveToJSON(writer))
69 i : return false;
70 :
71 : // Output the module information.
72 E : std::string data_type_str;
73 : if (!common::IndexedFrequencyDataTypeToString(frequency_info.data_type,
74 E : &data_type_str)) {
75 i : return false;
76 : }
77 : if (!writer->OutputComment("Indexed frequency data module description.") ||
78 : !writer->OutputKey(kDescription) ||
79 : !writer->OpenDict() ||
80 : !writer->OutputKey(kNumEntriesKey) ||
81 : !writer->OutputInteger(frequency_info.num_entries) ||
82 : !writer->OutputKey(kNumColumnsKey) ||
83 : !writer->OutputInteger(frequency_info.num_columns) ||
84 : !writer->OutputKey(kDataTypeKey) ||
85 : !writer->OutputString(data_type_str) ||
86 : !writer->OutputKey(kFrequencySizeKey) ||
87 : !writer->OutputInteger(frequency_info.frequency_size) ||
88 E : !writer->CloseDict()) {
89 i : return false;
90 : }
91 :
92 : // Output the frequency array.
93 E : const IndexedFrequencyMap& frequencies = frequency_info.frequency_map;
94 : if (!writer->OutputComment(base::StringPrintf(
95 : "%d basic-block frequencies.", frequencies.size()).c_str()) ||
96 : !writer->OutputKey(kFrequencies) ||
97 E : !writer->OpenList()) {
98 i : return false;
99 : }
100 :
101 : // Build a set of keys to output.
102 E : size_t num_columns = 0;
103 E : std::set<RelativeAddress> keys;
104 E : IndexedFrequencyMap::const_iterator it = frequencies.begin();
105 E : for (; it != frequencies.end(); ++it) {
106 E : RelativeAddress addr = it->first.first;
107 E : size_t column = it->first.second;
108 E : if (it->second != 0) {
109 E : keys.insert(addr);
110 E : num_columns = std::max(num_columns, column + 1);
111 : }
112 E : }
113 :
114 : // For each key with at least one non-zero column, output a block with each
115 : // column.
116 E : std::set<RelativeAddress>::iterator key = keys.begin();
117 E : for (; key != keys.end(); ++key) {
118 E : if (!writer->OpenList() || !writer->OutputInteger(key->value()))
119 i : return false;
120 E : for (size_t column = 0; column < num_columns; ++column) {
121 : IndexedFrequencyMap::const_iterator data =
122 E : frequencies.find(std::make_pair(*key, column));
123 E : int32 value = 0;
124 E : if (data != frequencies.end())
125 E : value = data->second;
126 E : if (!writer->OutputInteger(value))
127 i : return false;
128 E : }
129 E : if (!writer->CloseList())
130 i : return false;
131 E : }
132 :
133 : // Close the entry count array.
134 E : if (!writer->CloseList())
135 i : return false;
136 :
137 : // Close the dictionary.
138 E : if (!writer->CloseDict())
139 i : return false;
140 :
141 : // And we're done.
142 E : return true;
143 E : }
144 :
145 : bool ReadFrequencyData(const base::DictionaryValue* dict_value,
146 E : ModuleIndexedFrequencyMap* module_frequency_map) {
147 E : DCHECK(dict_value != NULL);
148 E : DCHECK(module_frequency_map != NULL);
149 :
150 : // Load the metadata about the image.
151 E : const base::DictionaryValue* metadata_dict = NULL;
152 E : if (!dict_value->GetDictionary(kMetadata, &metadata_dict)) {
153 E : LOG(ERROR) << "Missing or invalid " << kMetadata << " entry.";
154 E : return false;
155 : }
156 :
157 E : pe::Metadata metadata;
158 E : if (!metadata.LoadFromJSON(*metadata_dict)) {
159 : // The loader will log any errors.
160 i : return false;
161 : }
162 :
163 : // Extract the information list.
164 E : const base::DictionaryValue* information_dict = NULL;
165 E : if (!dict_value->GetDictionary(kDescription, &information_dict)) {
166 E : LOG(ERROR) << "Missing or invalid " << kDescription << " entry.";
167 E : return false;
168 : }
169 :
170 : // Extract the frequencies list.
171 E : const base::ListValue* frequency_list = NULL;
172 E : if (!dict_value->GetList(kFrequencies, &frequency_list)) {
173 E : LOG(ERROR) << "Missing or invalid " << kFrequencies << " entry.";
174 E : return false;
175 : }
176 :
177 : // Convert the signature into a ModuleInformation struct.
178 E : const pe::PEFile::Signature& signature = metadata.module_signature();
179 E : ModuleInformation module_information;
180 E : module_information.base_address = signature.base_address.value();
181 E : module_information.image_checksum = signature.module_checksum;
182 E : module_information.image_file_name = signature.path;
183 E : module_information.module_size = signature.module_size;
184 E : module_information.time_date_stamp = signature.module_time_date_stamp;
185 :
186 : // Insert a new IndexedFrequencyMap record for this module.
187 : std::pair<ModuleIndexedFrequencyMap::iterator, bool> result =
188 : module_frequency_map->insert(std::make_pair(
189 E : module_information, IndexedFrequencyInformation()));
190 :
191 : // Validate that we really did insert a new module into the map.
192 E : if (!result.second) {
193 i : LOG(ERROR) << "Found duplicate entries for " << signature.path << ".";
194 i : return false;
195 : }
196 :
197 : // Populate frequency information.
198 E : IndexedFrequencyInformation& frequency_info = result.first->second;
199 E : int32 info_num_entries = 0;
200 E : int32 info_num_columns = 0;
201 E : std::string info_data_type_str;
202 E : int32 info_frequency_size = 0;
203 : if (!information_dict->GetInteger(kNumEntriesKey, &info_num_entries) ||
204 : !information_dict->GetInteger(kNumColumnsKey, &info_num_columns) ||
205 : !information_dict->GetString(kDataTypeKey, &info_data_type_str) ||
206 E : !information_dict->GetInteger(kFrequencySizeKey, &info_frequency_size)) {
207 i : return false;
208 : }
209 E : frequency_info.num_entries = info_num_entries;
210 E : frequency_info.num_columns = info_num_columns;
211 : if (!common::ParseFrequencyDataType(info_data_type_str,
212 E : &frequency_info.data_type)) {
213 i : return false;
214 : }
215 E : frequency_info.frequency_size = info_frequency_size;
216 :
217 : // Populate the IndexedFrequencyMap with the values in the list.
218 E : IndexedFrequencyMap& values = result.first->second.frequency_map;
219 E : size_t num_entries = frequency_list->GetSize();
220 E : for (size_t i = 0; i < num_entries; ++i) {
221 E : const base::ListValue* entry = NULL;
222 E : if (!frequency_list->GetList(i, &entry))
223 E : return false;
224 E : size_t num_columns = entry->GetSize();
225 E : if (num_columns == 0)
226 i : return false;
227 :
228 : // Get the basic block RVA.
229 E : int32 address = 0;
230 E : if (!entry->GetInteger(0, &address))
231 i : return false;
232 E : if (address < 0) {
233 i : LOG(ERROR) << "Invalid relative address in frequency list.";
234 i : return false;
235 : }
236 :
237 : // Retrieve each column.
238 E : for (size_t column = 1; column < num_columns; ++column) {
239 E : basic_block_util::EntryCountType entry_count = 0;
240 E : if (!entry->GetInteger(column, &entry_count))
241 i : return false;
242 E : if (entry_count < 0) {
243 i : LOG(ERROR) << "Invalid value in frequency list.";
244 i : return false;
245 : }
246 :
247 : // Add this entry to our map.
248 : if (!values.insert(std::make_pair(std::make_pair(
249 E : core::RelativeAddress(address), column - 1), entry_count)).second) {
250 i : LOG(ERROR) << "Duplicate basic block address in frequency list.";
251 i : return false;
252 : }
253 E : }
254 E : }
255 :
256 : // And we're done.
257 E : return true;
258 E : }
259 :
260 : } // namespace
261 :
262 : IndexedFrequencyDataSerializer::IndexedFrequencyDataSerializer()
263 E : : pretty_print_(false) {
264 E : }
265 :
266 : bool IndexedFrequencyDataSerializer::SaveAsJson(
267 E : const ModuleIndexedFrequencyMap& frequency_map, FILE* file) {
268 E : DCHECK(file != NULL);
269 E : core::JSONFileWriter writer(file, pretty_print_);
270 :
271 : // Open the list;
272 E : if (!writer.OpenList())
273 i : return false;
274 :
275 : // Output each entry;
276 E : ModuleIndexedFrequencyMap::const_iterator it = frequency_map.begin();
277 E : for (; it != frequency_map.end(); ++it) {
278 E : if (!OutputFrequencyData(&writer, it->first, it->second))
279 i : return false;
280 E : }
281 :
282 : // Close the list.
283 E : if (!writer.CloseList())
284 i : return false;
285 :
286 E : return true;
287 E : }
288 :
289 : bool IndexedFrequencyDataSerializer::SaveAsJson(
290 : const ModuleIndexedFrequencyMap& frequency_map,
291 E : const base::FilePath& path) {
292 E : DCHECK(!path.empty());
293 E : file_util::ScopedFILE file(file_util::OpenFile(path, "wb"));
294 E : if (file.get() == NULL) {
295 i : LOG(ERROR) << "Failed to open " << path.value() << " for reading.";
296 i : return false;
297 : }
298 :
299 E : if (!SaveAsJson(frequency_map, file.get())) {
300 i : LOG(ERROR) << "Failed to write JSON data to " << path.value() << ".";
301 i : return false;
302 : }
303 :
304 E : return true;
305 E : }
306 :
307 : bool IndexedFrequencyDataSerializer::LoadFromJson(
308 : const base::FilePath& path,
309 E : ModuleIndexedFrequencyMap* module_frequency_map) {
310 E : DCHECK(module_frequency_map != NULL);
311 E : DCHECK(!path.empty());
312 :
313 E : std::string json_string;
314 E : if (!file_util::ReadFileToString(path, &json_string)) {
315 E : LOG(ERROR) << "Failed to read '" << path.value() << "'.";
316 E : return false;
317 : }
318 :
319 E : base::JSONReader json_reader;
320 E : std::string error_msg;
321 : scoped_ptr<base::Value> json_value(
322 : json_reader.ReadAndReturnError(
323 E : json_string, base::JSON_ALLOW_TRAILING_COMMAS, NULL, &error_msg));
324 E : if (json_value.get() == NULL) {
325 E : LOG(ERROR) << "Failed to parse '" << path.value() << "' as JSON ("
326 : << error_msg << ").";
327 E : return false;
328 : }
329 :
330 E : if (!PopulateFromJsonValue(json_value.get(), module_frequency_map))
331 i : return false;
332 :
333 E : return true;
334 E : }
335 :
336 : bool IndexedFrequencyDataSerializer::PopulateFromJsonValue(
337 : const base::Value* json_value,
338 E : ModuleIndexedFrequencyMap* module_frequency_map) {
339 E : DCHECK(json_value != NULL);
340 E : DCHECK(module_frequency_map != NULL);
341 :
342 E : module_frequency_map->clear();
343 :
344 : // Extract the top level list of module.
345 E : const base::ListValue* module_list = NULL;
346 E : if (!json_value->GetAsList(&module_list)) {
347 E : LOG(ERROR) << "Expected a list as the top level JSON construct.";
348 E : return false;
349 : }
350 :
351 : // Extract each module.
352 E : size_t num_modules = module_list->GetSize();
353 E : for (size_t i = 0; i < num_modules; ++i) {
354 E : const base::DictionaryValue* dict_value = NULL;
355 E : if (!module_list->GetDictionary(i, &dict_value)) {
356 E : LOG(ERROR) << "Invalid type for entry " << i << ".";
357 E : return false;
358 : }
359 E : if (!ReadFrequencyData(dict_value, module_frequency_map)) {
360 : // ReadFrequencyData() has already logged the error.
361 E : return false;
362 : }
363 E : }
364 :
365 E : return true;
366 E : }
367 :
368 : } // namespace grinder
|