1 : // Copyright 2011 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/core/file_util.h"
16 :
17 : #include "base/files/file_util.h"
18 : #include "base/win/scoped_handle.h"
19 : #include "syzygy/common/com_utils.h"
20 : #include "syzygy/core/serialization.h"
21 :
22 : namespace core {
23 :
24 : namespace {
25 :
26 : enum FileInformationResult {
27 : kFileNotFound,
28 : kSuccess,
29 : kFailure
30 : };
31 :
32 : // Gets a handle to a file, and the file information for it. Leaves the handle
33 : // open.
34 : FileInformationResult GetFileInformation(
35 : const base::FilePath& path,
36 : base::win::ScopedHandle* handle,
37 E : BY_HANDLE_FILE_INFORMATION* file_info) {
38 : // Open the file in the least restrictive possible way.
39 : handle->Set(
40 : ::CreateFile(path.value().c_str(),
41 : SYNCHRONIZE,
42 : FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
43 : NULL,
44 : OPEN_EXISTING,
45 : FILE_ATTRIBUTE_NORMAL,
46 E : NULL));
47 E : if (!handle->IsValid()) {
48 : // The file not being found is a special case.
49 E : DWORD error = ::GetLastError();
50 E : if (error == ERROR_FILE_NOT_FOUND || error == ERROR_PATH_NOT_FOUND)
51 E : return kFileNotFound;
52 :
53 i : LOG(ERROR) << "Unable to open \"" << path.value() << "\": "
54 : << common::LogWe(error);
55 i : return kFailure;
56 : }
57 :
58 E : if (!::GetFileInformationByHandle(handle->Get(), file_info)) {
59 i : DWORD error = ::GetLastError();
60 i : LOG(ERROR) << "GetFileInformationByHandle failed for \"" << path.value()
61 : << "\": " << common::LogWe(error);
62 i : return kFailure;
63 : }
64 :
65 E : return kSuccess;
66 E : }
67 :
68 : } // namespace
69 :
70 : FilePathCompareResult CompareFilePaths(const base::FilePath& path1,
71 E : const base::FilePath& path2) {
72 : // Now we try opening both files for reading to see if they point to the same
73 : // underlying volume and file index. We open both files simultaneously to
74 : // avoid a race condition whereby the file could be moved/removed in between
75 : // the two calls to GetFileInformation.
76 :
77 E : base::win::ScopedHandle handle1;
78 E : BY_HANDLE_FILE_INFORMATION info1 = {};
79 E : FileInformationResult result1 = GetFileInformation(path1, &handle1, &info1);
80 E : if (result1 == kFailure)
81 i : return kFilePathCompareError;
82 :
83 E : base::win::ScopedHandle handle2;
84 E : BY_HANDLE_FILE_INFORMATION info2 = {};
85 E : FileInformationResult result2 = GetFileInformation(path2, &handle2, &info2);
86 E : if (result2 == kFailure)
87 i : return kFilePathCompareError;
88 :
89 : // If neither file exists we can't really compare them based on anything
90 : // other than the path itself.
91 E : if (result1 == kFileNotFound && result2 == kFileNotFound) {
92 E : base::FilePath abs1(MakeAbsoluteFilePath(path1));
93 E : base::FilePath abs2(MakeAbsoluteFilePath(path2));
94 :
95 E : if (abs1.empty() || abs2.empty())
96 i : return kUnableToCompareFilePaths;
97 :
98 E : if (abs1 == abs2)
99 E : return kEquivalentFilePaths;
100 :
101 E : return kUnableToCompareFilePaths;
102 : }
103 :
104 : // If only one of them exists, then they can't possibly be the same file.
105 E : if (result1 == kFileNotFound || result2 == kFileNotFound)
106 E : return kDistinctFilePaths;
107 :
108 : // If they both exist we compare the details of where they live on disk.
109 : bool identical = info1.dwVolumeSerialNumber == info2.dwVolumeSerialNumber &&
110 : info1.nFileIndexLow == info2.nFileIndexLow &&
111 E : info1.nFileIndexHigh == info2.nFileIndexHigh;
112 :
113 E : return identical ? kEquivalentFilePaths : kDistinctFilePaths;
114 E : }
115 :
116 : namespace {
117 :
118 : // A struct for storing magic signatures for a given file type.
119 : struct FileMagic {
120 : FileType file_type;
121 : size_t magic_size;
122 : const uint8* magic;
123 : };
124 :
125 : // Macros for defining magic signatures for files.
126 : #define DEFINE_BINARY_MAGIC(type, bin) \
127 : { type, arraysize(bin), bin }
128 : #define DEFINE_STRING_MAGIC(type, str) \
129 : { type, arraysize(str) - 1, str } // Ignores the trailing NUL.
130 :
131 : // Magic signatures used by various file types.
132 : // Archive (.lib) files begin with a simple string.
133 : const uint8 kArchiveFileMagic[] = "!<arch>";
134 : // Machine independent COFF files begin with 0x00 0x00, and then two bytes
135 : // that aren't 0xFF 0xFF. Anonymous object files (unsupported) are followed by
136 : // 0xFF 0xFF, and then two bytes containing type information.
137 : // - Export definitions are type 0x00 0x00.
138 : // - Object files containing LTCG intermediate code appear to be type 0x01 0x00.
139 : const uint8 kCoffFileMagic0[] = { 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00 };
140 : const uint8 kCoffFileMagic1[] = { 0x00, 0x00, 0xFF, 0xFF };
141 : const uint8 kCoffFileMagic2[] = { 0x00, 0x00 };
142 : // X86 COFF files begin with 0x4c 0x01.
143 : const uint8 kCoffFileMagic3[] = { 0x4C, 0x01 };
144 : // X86-64 COFF files begin with 0x64 0x86.
145 : const uint8 kCoffFileMagic4[] = { 0x64, 0x86 };
146 : const uint8 kPdbFileMagic[] = "Microsoft C/C++ MSF ";
147 : // PE files all contain DOS stubs, and the first two bytes of 16-bit DOS
148 : // exectuables are always "MZ".
149 : const uint8 kPeFileMagic[] = "MZ";
150 : // This is a dummy resource file entry that also reads as an invalid 16-bit
151 : // resource. This allows MS tools to distinguish between 16-bit and 32-bit
152 : // resources. We only care about 32-bit resources, and this is sufficient for
153 : // us to distinguish between a resource file and a COFF object file.
154 : const uint8 kResourceFileMagic[] = {
155 : 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
156 : 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00,
157 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
158 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
159 :
160 : // Simple magic signatures for files.
161 : const FileMagic kFileMagics[] = {
162 : DEFINE_BINARY_MAGIC(kResourceFileType, kResourceFileMagic),
163 : DEFINE_STRING_MAGIC(kPdbFileType, kPdbFileMagic),
164 : DEFINE_STRING_MAGIC(kArchiveFileType, kArchiveFileMagic),
165 : // This effectively emulates a more complicated if-then-else expression,
166 : // by mapping some COFF files to an unknown file type.
167 : DEFINE_BINARY_MAGIC(kImportDefinitionFileType, kCoffFileMagic0),
168 : DEFINE_BINARY_MAGIC(kAnonymousCoffFileType, kCoffFileMagic1),
169 : DEFINE_BINARY_MAGIC(kCoffFileType, kCoffFileMagic2),
170 : DEFINE_BINARY_MAGIC(kCoffFileType, kCoffFileMagic3),
171 : DEFINE_BINARY_MAGIC(kCoff64FileType, kCoffFileMagic4),
172 : DEFINE_STRING_MAGIC(kPeFileType, kPeFileMagic),
173 : };
174 :
175 : #undef DEFINE_BINARY_MAGIC
176 : #undef DEFINE_STRING_MAGIC
177 :
178 E : bool GuessFileTypeImpl(size_t length, InStream* stream, FileType* file_type) {
179 E : DCHECK_NE(reinterpret_cast<InStream*>(NULL), stream);
180 E : DCHECK_NE(reinterpret_cast<FileType*>(NULL), file_type);
181 :
182 E : *file_type = kUnknownFileType;
183 :
184 : // No point trying to identify an empty file.
185 E : if (length == 0)
186 i : return true;
187 :
188 : // Check all of the magic signatures.
189 E : std::vector<uint8> magic;
190 E : for (size_t i = 0; i < arraysize(kFileMagics); ++i) {
191 E : const FileMagic& file_magic = kFileMagics[i];
192 :
193 : // Try to read sufficient data for the current signature, bounded by the
194 : // available data in the file.
195 E : if (magic.size() < length && magic.size() < file_magic.magic_size) {
196 E : size_t old_size = magic.size();
197 E : size_t new_size = std::min(length, file_magic.magic_size);
198 E : DCHECK_LT(old_size, new_size);
199 E : magic.resize(new_size);
200 E : size_t missing = new_size - old_size;
201 E : if (!stream->Read(missing, magic.data() + old_size)) {
202 i : LOG(ERROR) << "Failed to read magic bytes from stream.";
203 i : return false;
204 : }
205 : }
206 :
207 : // There is insufficient data to compare with this signature.
208 E : if (magic.size() < file_magic.magic_size)
209 E : continue;
210 :
211 : // If the signature matches then we can return the recognized type.
212 E : if (::memcmp(magic.data(), file_magic.magic, file_magic.magic_size) == 0) {
213 E : *file_type = file_magic.file_type;
214 E : return true;
215 : }
216 E : }
217 :
218 E : DCHECK_EQ(kUnknownFileType, *file_type);
219 E : return true;
220 E : }
221 :
222 : } // namespace
223 :
224 E : bool GuessFileType(const base::FilePath& path, FileType* file_type) {
225 E : DCHECK(!path.empty());
226 E : DCHECK(file_type != NULL);
227 :
228 E : *file_type = kUnknownFileType;
229 :
230 E : if (!base::PathExists(path)) {
231 E : LOG(ERROR) << "File does not exist: " << path.value();
232 E : return false;
233 : }
234 :
235 E : size_t file_size = 0;
236 : {
237 E : int64 temp_file_size = 0;
238 E : if (!base::GetFileSize(path, &temp_file_size)) {
239 i : LOG(ERROR) << "Unable to get file size: " << path.value();
240 i : return false;
241 : }
242 E : DCHECK_LE(0, temp_file_size);
243 E : file_size = static_cast<size_t>(temp_file_size);
244 : }
245 :
246 : // No point trying to identify an empty file.
247 E : if (file_size == 0)
248 i : return true;
249 :
250 E : base::ScopedFILE file(base::OpenFile(path, "rb"));
251 E : if (file.get() == NULL) {
252 E : LOG(ERROR) << "Unable to open file for reading: " << path.value();
253 E : return false;
254 : }
255 :
256 E : FileInStream stream(file.get());
257 E : if (!GuessFileTypeImpl(file_size, &stream, file_type))
258 i : return false;
259 :
260 E : return true;
261 E : }
262 :
263 E : bool GuessFileType(const uint8* contents, size_t length, FileType* file_type) {
264 E : DCHECK_NE(reinterpret_cast<uint8*>(NULL), contents);
265 E : DCHECK_NE(reinterpret_cast<FileType*>(NULL), file_type);
266 :
267 E : ByteInStream<const uint8*> stream(contents, contents + length);
268 E : if (!GuessFileTypeImpl(length, &stream, file_type))
269 i : return false;
270 :
271 E : return true;
272 E : }
273 :
274 : } // namespace core
|