1 : // Copyright 2011 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/file_util.h"
16 :
17 : #include "base/files/file_util.h"
18 : #include "base/win/scoped_handle.h"
19 : #include "syzygy/common/com_utils.h"
20 : #include "syzygy/core/serialization.h"
21 :
22 : namespace core {
23 :
24 : namespace {
25 :
26 : enum FileInformationResult {
27 : kFileNotFound,
28 : kSuccess,
29 : kFailure
30 : };
31 :
32 : // Gets a handle to a file, and the file information for it. Leaves the handle
33 : // open.
34 : FileInformationResult GetFileInformation(
35 : const base::FilePath& path,
36 : base::win::ScopedHandle* handle,
37 E : BY_HANDLE_FILE_INFORMATION* file_info) {
38 : // Open the file in the least restrictive possible way.
39 E : handle->Set(
40 : ::CreateFile(path.value().c_str(),
41 : SYNCHRONIZE,
42 : FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
43 : NULL,
44 : OPEN_EXISTING,
45 : FILE_ATTRIBUTE_NORMAL,
46 : NULL));
47 E : if (!handle->IsValid()) {
48 : // The file not being found is a special case.
49 E : DWORD error = ::GetLastError();
50 E : if (error == ERROR_FILE_NOT_FOUND || error == ERROR_PATH_NOT_FOUND)
51 E : return kFileNotFound;
52 :
53 i : LOG(ERROR) << "Unable to open \"" << path.value() << "\": "
54 : << common::LogWe(error);
55 i : return kFailure;
56 : }
57 :
58 E : if (!::GetFileInformationByHandle(handle->Get(), file_info)) {
59 i : DWORD error = ::GetLastError();
60 i : LOG(ERROR) << "GetFileInformationByHandle failed for \"" << path.value()
61 : << "\": " << common::LogWe(error);
62 i : return kFailure;
63 : }
64 :
65 E : return kSuccess;
66 E : }
67 :
68 : } // namespace
69 :
70 : FilePathCompareResult CompareFilePaths(const base::FilePath& path1,
71 E : const base::FilePath& path2) {
72 : // Now we try opening both files for reading to see if they point to the same
73 : // underlying volume and file index. We open both files simultaneously to
74 : // avoid a race condition whereby the file could be moved/removed in between
75 : // the two calls to GetFileInformation.
76 :
77 E : base::win::ScopedHandle handle1;
78 E : BY_HANDLE_FILE_INFORMATION info1 = {};
79 E : FileInformationResult result1 = GetFileInformation(path1, &handle1, &info1);
80 E : if (result1 == kFailure)
81 i : return kFilePathCompareError;
82 :
83 E : base::win::ScopedHandle handle2;
84 E : BY_HANDLE_FILE_INFORMATION info2 = {};
85 E : FileInformationResult result2 = GetFileInformation(path2, &handle2, &info2);
86 E : if (result2 == kFailure)
87 i : return kFilePathCompareError;
88 :
89 : // If neither file exists we can't really compare them based on anything
90 : // other than the path itself.
91 E : if (result1 == kFileNotFound && result2 == kFileNotFound) {
92 E : base::FilePath abs1(MakeAbsoluteFilePath(path1));
93 E : base::FilePath abs2(MakeAbsoluteFilePath(path2));
94 :
95 E : if (abs1.empty() || abs2.empty())
96 i : return kUnableToCompareFilePaths;
97 :
98 E : if (abs1 == abs2)
99 E : return kEquivalentFilePaths;
100 :
101 E : return kUnableToCompareFilePaths;
102 : }
103 :
104 : // If only one of them exists, then they can't possibly be the same file.
105 E : if (result1 == kFileNotFound || result2 == kFileNotFound)
106 E : return kDistinctFilePaths;
107 :
108 : // If they both exist we compare the details of where they live on disk.
109 : bool identical = info1.dwVolumeSerialNumber == info2.dwVolumeSerialNumber &&
110 E : info1.nFileIndexLow == info2.nFileIndexLow &&
111 : info1.nFileIndexHigh == info2.nFileIndexHigh;
112 :
113 E : return identical ? kEquivalentFilePaths : kDistinctFilePaths;
114 E : }
115 :
116 : namespace {
117 :
118 : // A struct for storing magic signatures for a given file type.
119 : struct FileMagic {
120 : FileType file_type;
121 : size_t magic_size;
122 : const uint8_t* magic;
123 : };
124 :
125 : // Macros for defining magic signatures for files.
126 : #define DEFINE_BINARY_MAGIC(type, bin) \
127 : { type, arraysize(bin), bin }
128 : #define DEFINE_STRING_MAGIC(type, str) \
129 : { type, arraysize(str) - 1, str } // Ignores the trailing NUL.
130 :
131 : // Magic signatures used by various file types.
132 : // Archive (.lib) files begin with a simple string.
133 : const uint8_t kArchiveFileMagic[] = "!<arch>";
134 : // Machine independent COFF files begin with 0x00 0x00, and then two bytes
135 : // that aren't 0xFF 0xFF. Anonymous object files (unsupported) are followed by
136 : // 0xFF 0xFF, and then two bytes containing type information.
137 : // - Export definitions are type 0x00 0x00.
138 : // - Object files containing LTCG intermediate code appear to be type 0x01 0x00.
139 : const uint8_t kCoffFileMagic0[] = {0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00};
140 : const uint8_t kCoffFileMagic1[] = {0x00, 0x00, 0xFF, 0xFF};
141 : const uint8_t kCoffFileMagic2[] = {0x00, 0x00};
142 : // X86 COFF files begin with 0x4c 0x01.
143 : const uint8_t kCoffFileMagic3[] = {0x4C, 0x01};
144 : // X86-64 COFF files begin with 0x64 0x86.
145 : const uint8_t kCoffFileMagic4[] = {0x64, 0x86};
146 : const uint8_t kPdbFileMagic[] = "Microsoft C/C++ MSF ";
147 : // PE files all contain DOS stubs, and the first two bytes of 16-bit DOS
148 : // exectuables are always "MZ".
149 : const uint8_t kPeFileMagic[] = "MZ";
150 : // This is a dummy resource file entry that also reads as an invalid 16-bit
151 : // resource. This allows MS tools to distinguish between 16-bit and 32-bit
152 : // resources. We only care about 32-bit resources, and this is sufficient for
153 : // us to distinguish between a resource file and a COFF object file.
154 : const uint8_t kResourceFileMagic[] = {
155 : 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
156 : 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
157 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
158 :
159 : // Simple magic signatures for files.
160 : const FileMagic kFileMagics[] = {
161 : DEFINE_BINARY_MAGIC(kResourceFileType, kResourceFileMagic),
162 : DEFINE_STRING_MAGIC(kPdbFileType, kPdbFileMagic),
163 : DEFINE_STRING_MAGIC(kArchiveFileType, kArchiveFileMagic),
164 : // This effectively emulates a more complicated if-then-else expression,
165 : // by mapping some COFF files to an unknown file type.
166 : DEFINE_BINARY_MAGIC(kImportDefinitionFileType, kCoffFileMagic0),
167 : DEFINE_BINARY_MAGIC(kAnonymousCoffFileType, kCoffFileMagic1),
168 : DEFINE_BINARY_MAGIC(kCoffFileType, kCoffFileMagic2),
169 : DEFINE_BINARY_MAGIC(kCoffFileType, kCoffFileMagic3),
170 : DEFINE_BINARY_MAGIC(kCoff64FileType, kCoffFileMagic4),
171 : DEFINE_STRING_MAGIC(kPeFileType, kPeFileMagic),
172 : };
173 :
174 : #undef DEFINE_BINARY_MAGIC
175 : #undef DEFINE_STRING_MAGIC
176 :
177 E : bool GuessFileTypeImpl(size_t length, InStream* stream, FileType* file_type) {
178 E : DCHECK_NE(reinterpret_cast<InStream*>(NULL), stream);
179 E : DCHECK_NE(reinterpret_cast<FileType*>(NULL), file_type);
180 :
181 E : *file_type = kUnknownFileType;
182 :
183 : // No point trying to identify an empty file.
184 E : if (length == 0)
185 i : return true;
186 :
187 : // Check all of the magic signatures.
188 E : std::vector<uint8_t> magic;
189 E : for (size_t i = 0; i < arraysize(kFileMagics); ++i) {
190 E : const FileMagic& file_magic = kFileMagics[i];
191 :
192 : // Try to read sufficient data for the current signature, bounded by the
193 : // available data in the file.
194 E : if (magic.size() < length && magic.size() < file_magic.magic_size) {
195 E : size_t old_size = magic.size();
196 E : size_t new_size = std::min(length, file_magic.magic_size);
197 E : DCHECK_LT(old_size, new_size);
198 E : magic.resize(new_size);
199 E : size_t missing = new_size - old_size;
200 E : if (!stream->Read(missing, magic.data() + old_size)) {
201 i : LOG(ERROR) << "Failed to read magic bytes from stream.";
202 i : return false;
203 : }
204 : }
205 :
206 : // There is insufficient data to compare with this signature.
207 E : if (magic.size() < file_magic.magic_size)
208 E : continue;
209 :
210 : // If the signature matches then we can return the recognized type.
211 E : if (::memcmp(magic.data(), file_magic.magic, file_magic.magic_size) == 0) {
212 E : *file_type = file_magic.file_type;
213 E : return true;
214 : }
215 E : }
216 :
217 E : DCHECK_EQ(kUnknownFileType, *file_type);
218 E : return true;
219 E : }
220 :
221 : } // namespace
222 :
223 E : bool GuessFileType(const base::FilePath& path, FileType* file_type) {
224 E : DCHECK(!path.empty());
225 E : DCHECK(file_type != NULL);
226 :
227 E : *file_type = kUnknownFileType;
228 :
229 E : if (!base::PathExists(path)) {
230 E : LOG(ERROR) << "File does not exist: " << path.value();
231 E : return false;
232 : }
233 :
234 E : size_t file_size = 0;
235 : {
236 E : int64_t temp_file_size = 0;
237 E : if (!base::GetFileSize(path, &temp_file_size)) {
238 i : LOG(ERROR) << "Unable to get file size: " << path.value();
239 i : return false;
240 : }
241 E : DCHECK_LE(0, temp_file_size);
242 E : file_size = static_cast<size_t>(temp_file_size);
243 : }
244 :
245 : // No point trying to identify an empty file.
246 E : if (file_size == 0)
247 i : return true;
248 :
249 E : base::ScopedFILE file(base::OpenFile(path, "rb"));
250 E : if (file.get() == NULL) {
251 E : LOG(ERROR) << "Unable to open file for reading: " << path.value();
252 E : return false;
253 : }
254 :
255 E : FileInStream stream(file.get());
256 E : if (!GuessFileTypeImpl(file_size, &stream, file_type))
257 i : return false;
258 :
259 E : return true;
260 E : }
261 :
262 : bool GuessFileType(const uint8_t* contents,
263 : size_t length,
264 E : FileType* file_type) {
265 E : DCHECK_NE(reinterpret_cast<uint8_t*>(NULL), contents);
266 E : DCHECK_NE(reinterpret_cast<FileType*>(NULL), file_type);
267 :
268 E : ByteInStream<const uint8_t*> stream(contents, contents + length);
269 E : if (!GuessFileTypeImpl(length, &stream, file_type))
270 i : return false;
271 :
272 E : return true;
273 E : }
274 :
275 : } // namespace core
|