1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/serialization.h"
16 :
17 : #include "base/bind.h"
18 : #include "base/files/file_util.h"
19 : #include "syzygy/block_graph/typed_block.h"
20 : #include "syzygy/pe/find.h"
21 : #include "syzygy/pe/image_layout.h"
22 : #include "syzygy/pe/metadata.h"
23 : #include "syzygy/pe/pe_file.h"
24 :
25 : namespace pe {
26 :
27 : namespace {
28 :
29 : using block_graph::BlockGraph;
30 : using block_graph::BlockGraphSerializer;
31 :
32 : // Used for versioning the serialized stream. Be sure to change this if
33 : // non-backwards compatible changes are made to the stream layout.
34 : static const uint32 kSerializedBlockGraphAndImageLayoutVersion = 0;
35 :
36 E : bool MetadataMatchesPEFile(const Metadata& metadata, const PEFile& pe_file) {
37 E : PEFile::Signature pe_signature;
38 E : pe_file.GetSignature(&pe_signature);
39 :
40 : // We are careful to use PEFile::Signature::IsConsistent rather than
41 : // Metadata::IsConsistent. This is because we explicitly want to handle
42 : // backwards compatibility with differing versions of the toolchain. Instead,
43 : // we version the whole serialized stream and enforce consistency in
44 : // LoadBlockGraphAndImageLayout.
45 E : if (metadata.module_signature().IsConsistent(pe_signature))
46 E : return true;
47 :
48 : // If the PE signature doesn't match outright, it's perhaps because the PE
49 : // file has been modified after we captured it's metadata. This can happen in
50 : // the case where e.g. a file is signed, which updates the data directory
51 : // to point to the signatures.
52 i : if (metadata.module_signature().IsConsistentExceptForChecksum(pe_signature)) {
53 i : LOG(WARNING) << "Matching PE module with modified checksum. "
54 : "Beware that this may be unsafe if the module has been "
55 : "significantly modified.\n"
56 : "Significant modification includes e.g. modifying "
57 : "resources.\n"
58 : "Signing files does, however, not constitute significant "
59 : "modification, so if you're e.g. instrumenting official "
60 : "Chrome binaries, you'll be fine.";
61 :
62 i : return true;
63 : }
64 :
65 i : return false;
66 E : }
67 :
68 E : bool FindPEFile(const Metadata& metadata, PEFile* pe_file) {
69 E : DCHECK(pe_file != NULL);
70 :
71 E : LOG(INFO) << "Searching for module to use in deserialization.";
72 :
73 : // We search for a PE file in the following sequence:
74 : // (1) If pe_file is already initialized, try to use it.
75 : // (2) Look for a PE file using the path stored in metadata.
76 : // (3) Search for a matching PE file in the already initialized pe_file
77 : // directory (if provided), and the metadata directory.
78 : // (4) Search for a matching PE file using a system wide search.
79 E : std::wstring search_path;
80 :
81 : // Approach 1: If we already have a PE file initialized, see if it matches the
82 : // signature of the one we serialized.
83 E : if (!pe_file->path().empty()) {
84 i : LOG(INFO) << "Attempting to use provided module in deserialization: "
85 : << pe_file->path().value();
86 :
87 i : if (MetadataMatchesPEFile(metadata, *pe_file))
88 i : return true;
89 :
90 : // Save the directory of the provided PE file in the search path.
91 i : search_path.append(pe_file->path().DirName().value());
92 i : search_path.append(L";");
93 i : LOG(WARNING) << "Metadata signature does not match provided module: "
94 : << pe_file->path().value();
95 : }
96 :
97 : // Approach 2: Try to use the path provided in the metadata itself.
98 E : base::FilePath metadata_path(metadata.module_signature().path);
99 E : LOG(INFO) << "Attempting to use metadata path in deserialization: "
100 : << metadata_path.value();
101 E : if (!base::PathExists(metadata_path) || !pe_file->Init(metadata_path)) {
102 i : LOG(WARNING) << "Unable to read module:" << metadata_path.value();
103 i : } else {
104 E : if (MetadataMatchesPEFile(metadata, *pe_file))
105 E : return true;
106 :
107 : // Append the directory to the search path if it exists.
108 i : base::FilePath dir = metadata_path.DirName();
109 i : if (base::DirectoryExists(dir))
110 i : search_path.append(metadata_path.DirName().value());
111 :
112 i : LOG(WARNING) << "Metadata signature does not match metadata module: "
113 : << metadata_path.value();
114 i : }
115 :
116 i : base::FilePath module_path;
117 :
118 : // Approach 3: Use an explicit search in the provided paths.
119 i : if (!search_path.empty()) {
120 i : LOG(INFO) << "Searching for module in provided paths: " << search_path;
121 : if (!FindModuleBySignature(metadata.module_signature(),
122 : search_path.c_str(),
123 i : &module_path)) {
124 i : LOG(WARNING) << "FindModuleBySignature failed.";
125 : }
126 : }
127 :
128 : // Approach 4: Do a system-wide search.
129 i : if (module_path.empty()) {
130 i : LOG(INFO) << "Searching for module using system paths.";
131 : if (!FindModuleBySignature(metadata.module_signature(),
132 i : &module_path)) {
133 i : LOG(ERROR) << "FindModuleBySignature failed.";
134 i : return false;
135 : }
136 : }
137 :
138 : // No module found in either of the above two searches?
139 i : if (module_path.empty()) {
140 i : LOG(ERROR) << "No module found in FindModuleBySignature.";
141 i : return false;
142 : }
143 :
144 : // If we get here, we've found a module. However, we don't just accept that
145 : // fact.
146 :
147 i : if (!pe_file->Init(module_path)) {
148 i : LOG(ERROR) << "Failed to read module: " << module_path.value();
149 i : return false;
150 : }
151 :
152 i : if (!MetadataMatchesPEFile(metadata, *pe_file)) {
153 i : LOG(ERROR) << "Metadata signature does not match found module: "
154 : << module_path.value();
155 i : return false;
156 : }
157 :
158 i : LOG(INFO) << "Found module with matching signature: " << module_path.value();
159 :
160 i : return true;
161 E : }
162 :
163 : // This callback is used to save the data in a block by simply savings its
164 : // address in the image-layout.
165 : bool SaveBlockData(const ImageLayout* image_layout,
166 : bool data_already_saved,
167 : const BlockGraph::Block& block,
168 E : core::OutArchive* out_archive) {
169 E : DCHECK(image_layout != NULL);
170 E : DCHECK(out_archive != NULL);
171 :
172 : // We're always in OUTPUT_NO_DATA mode, so either the data hasn't yet been
173 : // saved or there was no data to save.
174 E : DCHECK(block.data_size() == 0 || !data_already_saved);
175 :
176 E : core::RelativeAddress block_addr;
177 E : if (!image_layout->blocks.GetAddressOf(&block, &block_addr)) {
178 i : LOG(ERROR) << "Block with id " << block.id() << " not in image-layout.";
179 i : return false;
180 : }
181 :
182 : // Save the address of the block wrt to the provided image-layout. This will
183 : // be sufficient for us to lookup the block data in the PE file afterwards.
184 E : if (!out_archive->Save(block_addr)) {
185 i : LOG(ERROR) << "Unable to save address of block with id " << block.id()
186 : << ".";
187 i : return false;
188 : }
189 :
190 E : return true;
191 E : }
192 :
193 : // This callback is used to load the data in a block. It also simultaneously
194 : // constructs the image-layout.
195 : bool LoadBlockData(const PEFile* pe_file,
196 : ImageLayout* image_layout,
197 : bool need_to_set_data,
198 : size_t data_size,
199 : BlockGraph::Block* block,
200 E : core::InArchive* in_archive) {
201 E : DCHECK(pe_file != NULL);
202 E : DCHECK(image_layout != NULL);
203 E : DCHECK(block != NULL);
204 E : DCHECK(in_archive != NULL);
205 :
206 E : core::RelativeAddress block_addr;
207 E : if (!in_archive->Load(&block_addr)) {
208 i : LOG(ERROR) << "Unable to load address in image-layout of block with id "
209 : << block->id() << ".";
210 i : return false;
211 : }
212 :
213 : // Insert the block in the image layout.
214 E : if (!image_layout->blocks.InsertBlock(block_addr, block)) {
215 i : LOG(ERROR) << "Unable to insert block with id " << block->id() << " into "
216 : << "image-layout.";
217 i : return false;
218 : }
219 :
220 : // If we have no data in this block then there's no need to load any.
221 E : if (data_size == 0)
222 E : return true;
223 :
224 : // We're in OUTPUT_NO_DATA mode, so we should always be responsible for
225 : // setting the block data.
226 E : DCHECK(need_to_set_data);
227 E : DCHECK_EQ(0u, block->data_size());
228 E : DCHECK(block->data() == NULL);
229 :
230 E : const uint8* data = pe_file->GetImageData(block_addr, data_size);
231 E : if (data == NULL) {
232 i : LOG(ERROR) << "Unable to get data from PE file for block with id "
233 : << block->id() << ".";
234 i : return false;
235 : }
236 :
237 E : block->SetData(data, data_size);
238 :
239 E : return true;
240 E : }
241 :
242 : bool LoadBlockGraphAndImageLayout(
243 : const PEFile& pe_file,
244 : PEFile* pe_file_ptr,
245 : block_graph::BlockGraphSerializer::Attributes* attributes,
246 : ImageLayout* image_layout,
247 E : core::InArchive* in_archive) {
248 E : DCHECK(pe_file_ptr == NULL || pe_file_ptr == &pe_file);
249 E : DCHECK(image_layout != NULL);
250 E : DCHECK(in_archive != NULL);
251 :
252 E : BlockGraph* block_graph = image_layout->blocks.graph();
253 :
254 : // Load and check the stream version. This is where we could dispatch to
255 : // different handlers for old versions of the stream if we wish to maintain
256 : // backwards compatibility.
257 E : uint32 stream_version = 0;
258 E : if (!in_archive->Load(&stream_version)) {
259 i : LOG(ERROR) << "Unable to load serialized stream version.";
260 i : return false;
261 : }
262 E : if (stream_version != kSerializedBlockGraphAndImageLayoutVersion) {
263 E : LOG(ERROR) << "Invalid stream version " << stream_version << ", expected "
264 : << kSerializedBlockGraphAndImageLayoutVersion << ".";
265 E : return false;
266 : }
267 :
268 : // Load the metadata.
269 E : Metadata metadata;
270 E : if (!in_archive->Load(&metadata)) {
271 i : LOG(ERROR) << "Unable to load metadata.";
272 i : return false;
273 : }
274 :
275 E : if (pe_file_ptr != NULL) {
276 : // If we've been given a modifiable PE-file, then we can be more intelligent
277 : // about our search. This call logs verbosely on failure so we don't have
278 : // to.
279 E : if (!FindPEFile(metadata, pe_file_ptr))
280 i : return false;
281 E : } else {
282 E : if (!MetadataMatchesPEFile(metadata, pe_file)) {
283 i : LOG(ERROR) << "Provided PE file does not match signature in serialized "
284 : << "stream.";
285 i : return false;
286 : }
287 : }
288 :
289 : // Set up the serializer.
290 E : BlockGraphSerializer bgs;
291 : bgs.set_load_block_data_callback(
292 : base::Bind(&LoadBlockData,
293 : base::Unretained(&pe_file),
294 E : base::Unretained(image_layout)));
295 :
296 : // Now deserialize the block-graph. This will simultaneously deserialize the
297 : // image-layout address-space.
298 E : if (!bgs.Load(block_graph, in_archive)) {
299 i : LOG(ERROR) << "Unable to load block-graph.";
300 i : return false;
301 : }
302 :
303 : // Return the attributes if asked to.
304 E : if (attributes != NULL)
305 E : *attributes = bgs.attributes();
306 :
307 : // We can now recreate the rest of the image-layout from the block-graph.
308 : // Start by retrieving the DOS header block, which is always at the start of
309 : // the image.
310 : BlockGraph::Block* dos_header_block =
311 E : image_layout->blocks.GetBlockByAddress(core::RelativeAddress());
312 E : if (dos_header_block == NULL) {
313 i : LOG(ERROR) << "Unable to find DOS header in image-layout address-space.";
314 i : return false;
315 : }
316 :
317 : // Cast this as an IMAGE_DOS_HEADER.
318 E : block_graph::ConstTypedBlock<IMAGE_DOS_HEADER> dos_header;
319 E : if (!dos_header.Init(0, dos_header_block)) {
320 i : LOG(ERROR) << "Unable to cast DOS header block to IMAGE_DOS_HEADER.";
321 i : return false;
322 : }
323 :
324 : // Get the NT headers.
325 E : block_graph::ConstTypedBlock<IMAGE_NT_HEADERS> nt_headers;
326 E : if (!dos_header.Dereference(dos_header->e_lfanew, &nt_headers)) {
327 i : LOG(ERROR) << "Unable to dereference NT headers from DOS header.";
328 i : return false;
329 : }
330 :
331 : // Finally, use these headers to populate the section info vector of the
332 : // image-layout.
333 E : if (!CopyHeaderToImageLayout(nt_headers.block(), image_layout)) {
334 i : LOG(ERROR) << "Unable to copy NT headers to image-layout.";
335 i : return false;
336 : }
337 :
338 E : return true;
339 E : }
340 :
341 : } // namespace
342 :
343 : bool SaveBlockGraphAndImageLayout(
344 : const PEFile& pe_file,
345 : block_graph::BlockGraphSerializer::Attributes attributes,
346 : const ImageLayout& image_layout,
347 E : core::OutArchive* out_archive) {
348 E : DCHECK(out_archive != NULL);
349 :
350 E : const BlockGraph& block_graph = *image_layout.blocks.graph();
351 :
352 E : if (!out_archive->Save(kSerializedBlockGraphAndImageLayoutVersion)) {
353 i : LOG(ERROR) << "Unable to save serialized stream version.";
354 i : return false;
355 : }
356 :
357 : // Get the metadata for this module and the toolchain. This will
358 : // allow us to validate input files in other pieces of the toolchain.
359 E : Metadata metadata;
360 E : PEFile::Signature pe_file_signature;
361 E : pe_file.GetSignature(&pe_file_signature);
362 E : if (!metadata.Init(pe_file_signature)) {
363 i : LOG(ERROR) << "Unable to initialize metadata for PE file \""
364 : << pe_file.path().value() << "\".";
365 i : return false;
366 : }
367 :
368 : // Save the metadata.
369 E : if (!out_archive->Save(metadata)) {
370 i : LOG(ERROR) << "Unable to save metadata for PE file \""
371 : << pe_file.path().value() << "\".";
372 i : return false;
373 : }
374 :
375 : // Initialize the serializer. We don't save any of the data because it can all
376 : // be retrieved from the PE file.
377 E : BlockGraphSerializer bgs;
378 E : bgs.set_data_mode(BlockGraphSerializer::OUTPUT_NO_DATA);
379 E : bgs.set_attributes(attributes);
380 : bgs.set_save_block_data_callback(base::Bind(
381 : &SaveBlockData,
382 E : base::Unretained(&image_layout)));
383 :
384 : // Write the block-graph. This also simultaneously serializes the
385 : // address-space portion of the image-layout.
386 E : if (!bgs.Save(block_graph, out_archive)) {
387 i : LOG(ERROR) << "Unable to save block-graph.";
388 i : return false;
389 : }
390 :
391 E : return true;
392 E : }
393 :
394 : bool LoadBlockGraphAndImageLayout(
395 : const PEFile& pe_file,
396 : block_graph::BlockGraphSerializer::Attributes* attributes,
397 : ImageLayout* image_layout,
398 E : core::InArchive* in_archive) {
399 : if (!LoadBlockGraphAndImageLayout(pe_file, NULL, attributes,
400 E : image_layout, in_archive)) {
401 i : return false;
402 : }
403 :
404 E : return true;
405 E : }
406 :
407 : bool LoadBlockGraphAndImageLayout(
408 : PEFile* pe_file,
409 : block_graph::BlockGraphSerializer::Attributes* attributes,
410 : ImageLayout* image_layout,
411 E : core::InArchive* in_archive) {
412 E : DCHECK(pe_file != NULL);
413 E : DCHECK(image_layout != NULL);
414 E : DCHECK(in_archive != NULL);
415 :
416 : if (!LoadBlockGraphAndImageLayout(*pe_file, pe_file, attributes,
417 E : image_layout, in_archive)) {
418 E : return false;
419 : }
420 :
421 E : return true;
422 E : }
423 :
424 : } // namespace pe
|