1 : // Copyright 2014 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/ar/ar_reader.h"
16 :
17 : #include <set>
18 :
19 : #include "base/logging.h"
20 : #include "base/strings/string_number_conversions.h"
21 : #include "syzygy/common/align.h"
22 :
23 : namespace ar {
24 :
25 : namespace {
26 :
27 : typedef ArReader::FileOffsetVector FileOffsetVector;
28 :
29 : // Calculates that length of a space terminated string with a maximum size.
30 : template<size_t kArrayLength>
31 E : size_t ArStringLength(const char (&s)[kArrayLength]) {
32 E : DCHECK_NE(reinterpret_cast<const char*>(NULL), &s[0]);
33 E : size_t l = kArrayLength;
34 E : while (l > 0 && s[l - 1] == ' ')
35 E : --l;
36 E : return l;
37 E : }
38 :
39 : // Parses an unsigned integer from a space-terminated string. The output -1 is
40 : // reserved to indicate an empty string, ie: no value.
41 : template<size_t kArrayLength, typename OutputType>
42 E : bool ParseArNumber(const char (&s)[kArrayLength], OutputType* output) {
43 E : DCHECK_NE(reinterpret_cast<const char*>(NULL), &s[0]);
44 E : DCHECK_NE(reinterpret_cast<OutputType*>(NULL), output);
45 :
46 : // Ensure the output size is sufficiently big for the string we're parsing.
47 : // bits log(2) / log(10) >= digits
48 : // digits <= 0.3 * bits
49 : // 10 * digits <= 3 * bits
50 : static_assert(10 * kArrayLength <= 3 * 8 * sizeof(OutputType),
51 : "Output type is to small for input string.");
52 :
53 E : size_t l = ArStringLength(s);
54 E : if (l == 0) {
55 i : *output = ~0UL;
56 i : return true;
57 : }
58 :
59 E : OutputType value = 0;
60 E : for (size_t i = 0; i < l; ++i) {
61 E : value *= 10;
62 E : if (s[i] < '0' || s[i] > '9') {
63 i : LOG(ERROR) << "Invalid number in archive file header.";
64 i : return false;
65 : }
66 E : value += s[i] - '0';
67 E : }
68 :
69 E : *output = value;
70 E : return true;
71 E : }
72 :
73 : bool ParseArFileHeader(const ArFileHeader& header,
74 E : ParsedArFileHeader* parsed_header) {
75 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), parsed_header);
76 :
77 E : size_t name_len = ArStringLength(header.name);
78 E : parsed_header->name = std::string(header.name, name_len);
79 :
80 : // The time is in seconds since epoch.
81 : uint64 timestamp;
82 E : if (!ParseArNumber(header.timestamp, ×tamp))
83 i : return false;
84 : parsed_header->timestamp = base::Time::FromDoubleT(
85 E : static_cast<double>(timestamp));
86 :
87 E : if (!ParseArNumber(header.mode, &parsed_header->mode))
88 i : return false;
89 :
90 E : if (!ParseArNumber(header.size, &parsed_header->size))
91 i : return false;
92 :
93 E : return true;
94 E : }
95 :
96 : template<typename Object>
97 E : bool ReadObject(FILE* file, Object* object) {
98 E : DCHECK_NE(reinterpret_cast<FILE*>(NULL), file);
99 E : DCHECK_NE(reinterpret_cast<Object*>(NULL), object);
100 E : if (::fread(object, sizeof(Object), 1, file) != 1) {
101 i : LOG(ERROR) << "Failed to read from archive.";
102 i : return false;
103 : }
104 E : return true;
105 E : }
106 :
107 : bool ParseSecondarySymbolTable(
108 : size_t file_size,
109 : const uint8* data,
110 : size_t length,
111 : SymbolIndexMap* symbols,
112 E : FileOffsetVector* file_offsets) {
113 E : DCHECK_NE(reinterpret_cast<const uint8*>(NULL), data);
114 E : DCHECK_NE(reinterpret_cast<SymbolIndexMap*>(NULL), symbols);
115 E : DCHECK_NE(reinterpret_cast<FileOffsetVector*>(NULL), file_offsets);
116 :
117 E : if (length < sizeof(uint32)) {
118 i : LOG(ERROR) << "Secondary symbol table contains no file count.";
119 i : return false;
120 : }
121 :
122 : // Validate the size of the secondary symbol table.
123 E : const uint32* offsets = reinterpret_cast<const uint32*>(data) + 1;
124 E : size_t file_count = offsets[-1];
125 E : if (length < (file_count + 2) * sizeof(uint32)) {
126 i : LOG(ERROR) << "Secondary symbol table file offsets are truncated.";
127 i : return false;
128 : }
129 :
130 E : size_t symbol_count = offsets[file_count];
131 :
132 : // Get pointers to the various parts of the symbol table.
133 : const uint16* indices = reinterpret_cast<const uint16*>(
134 E : offsets + file_count + 1);
135 E : const char* names = reinterpret_cast<const char*>(indices + symbol_count);
136 E : const char* names_end = reinterpret_cast<const char*>(data + length);
137 E : if (names > names_end) {
138 i : LOG(ERROR) << "Secondary symbol table indices are truncated.";
139 i : return false;
140 : }
141 :
142 : // Read and validate the file offsets. It is possible for this table to be
143 : // larger than necessary, and invalid or deleted files are represented with a
144 : // zero offset. We track these, and also build a map to a reduced set of file
145 : // indices.
146 : typedef std::map<size_t, size_t> FileIndexMap;
147 E : FileIndexMap file_index_map;
148 E : file_offsets->resize(0);
149 E : file_offsets->reserve(file_count);
150 E : for (size_t i = 0; i < file_count; ++i) {
151 : // Skip invalid/deleted files.
152 E : if (offsets[i] == 0)
153 E : continue;
154 :
155 E : if (offsets[i] >= file_size) {
156 i : LOG(ERROR) << "Invalid symbol offset encountered in archive.";
157 i : return false;
158 : }
159 :
160 : // File indices are 1-indexed in the archive, but we use 0-indexing.
161 E : size_t reduced_file_index = file_index_map.size();
162 E : file_index_map.insert(std::make_pair(i, reduced_file_index));
163 E : file_offsets->push_back(offsets[i]);
164 E : }
165 :
166 : // Read the file indices for each symbol.
167 E : std::set<std::string> symbol_names;
168 E : for (size_t i = 0; i < symbol_count; ++i) {
169 E : size_t name_len = ::strnlen(names, names_end - names);
170 E : if (name_len == 0) {
171 i : LOG(ERROR) << "Symbol " << i << " has an invalid name.";
172 i : return false;
173 : }
174 :
175 E : uint16 file_index = indices[i];
176 E : std::string name = std::string(names, name_len);
177 E : names += name_len + 1;
178 :
179 E : if (file_index == 0 || file_index > file_count) {
180 i : LOG(ERROR) << "Invalid file index " << file_index << " for symbol "
181 : << i << ": " << name;
182 i : return false;
183 : }
184 :
185 : // Use the raw file index to find the reduced file index, using
186 : // 0-indexing.
187 : FileIndexMap::const_iterator index_it = file_index_map.find(
188 E : file_index - 1);
189 E : if (index_it == file_index_map.end()) {
190 i : LOG(ERROR) << "Encountered a symbol referring to an invalid file index.";
191 i : return false;
192 : }
193 E : file_index = index_it->second;
194 :
195 : // Insert the symbol. We log a warning if there's a duplicate symbol, but
196 : // this is not strictly illegal.
197 E : if (!symbols->insert(std::make_pair(name, file_index)).second)
198 i : LOG(WARNING) << "Duplicate symbol encountered in archive.";
199 E : }
200 :
201 E : return true;
202 E : }
203 :
204 : } // namespace
205 :
206 : ArReader::ArReader()
207 E : : length_(0), offset_(0), index_(0), start_of_object_files_(0) {
208 E : }
209 :
210 E : bool ArReader::Init(const base::FilePath& ar_path) {
211 E : DCHECK(path_.empty());
212 :
213 E : path_ = ar_path;
214 E : file_.reset(base::OpenFile(path_, "rb"));
215 E : if (file_.get() == NULL) {
216 E : LOG(ERROR) << "Failed to open file for reading: " << path_.value();
217 E : return false;
218 : }
219 :
220 E : if (!base::GetFileSize(path_, reinterpret_cast<int64*>(&length_))) {
221 i : LOG(ERROR) << "Unable to get the archive file size.";
222 i : return false;
223 : }
224 :
225 : // Parse the global header.
226 E : ArGlobalHeader global_header = {};
227 E : if (!ReadObject(file_.get(), &global_header))
228 i : return false;
229 : if (::memcmp(global_header.magic,
230 : kArGlobalMagic,
231 E : sizeof(kArGlobalMagic)) != 0) {
232 i : LOG(ERROR) << "Invalid archive file global header.";
233 i : return false;
234 : }
235 E : offset_ += sizeof(global_header);
236 :
237 : // Read (and ignore) the primary symbol table. This needs to be present but
238 : // it contains data that is also to be found in the secondary symbol table,
239 : // with higher fidelity.
240 E : ParsedArFileHeader header;
241 E : if (!ReadNextFile(&header, NULL)) {
242 i : LOG(ERROR) << "Failed to read primary symbol table.";
243 i : return false;
244 : }
245 E : if (header.name != "/") {
246 i : LOG(ERROR) << "Did not find primary symbol table in archive.";
247 i : return false;
248 : }
249 :
250 : // Read and parse the secondary symbol table.
251 E : DataBuffer data;
252 E : if (!ReadNextFile(&header, &data)) {
253 i : LOG(ERROR) << "Failed to read secondary symbol table.";
254 i : return false;
255 : }
256 E : if (header.name != "/") {
257 i : LOG(ERROR) << "Did not find secondary symbol table in archive.";
258 i : return false;
259 : }
260 : if (!ParseSecondarySymbolTable(length_, data.data(), data.size(),
261 E : &symbols_, &offsets_)) {
262 i : LOG(ERROR) << "Failed to parse secondary symbol table.";
263 i : return false;
264 : }
265 :
266 : // Remember where we are. The object files may start at this location, or we
267 : // may encounter an optional filename table.
268 E : start_of_object_files_ = offset_;
269 :
270 E : if (!ReadNextFile(&header, &data)) {
271 i : LOG(ERROR) << "Failed to read filename table or first archive member.";
272 i : return false;
273 : }
274 E : if (header.name == "//") {
275 E : std::swap(data, filenames_);
276 E : start_of_object_files_ = offset_;
277 : }
278 :
279 : // Create an inverse of the offsets_ vector.
280 E : for (size_t i = 0; i < offsets_.size(); ++i)
281 E : CHECK(offsets_inverse_.insert(std::make_pair(offsets_[i], i)).second);
282 :
283 : // Make sure we're at the beginning of the first file in the archive.
284 E : if (!SeekIndex(0))
285 i : return false;
286 :
287 E : return true;
288 E : }
289 :
290 E : bool ArReader::BuildFileIndex() {
291 E : DCHECK(files_.empty());
292 E : DCHECK(files_inverse_.empty());
293 :
294 E : size_t old_index = index_;
295 :
296 E : if (!SeekIndex(0))
297 i : return false;
298 :
299 E : files_.reserve(offsets_.size());
300 :
301 E : while (HasNext()) {
302 E : size_t index = index_;
303 :
304 : // Read the file and get its translated name.
305 E : ParsedArFileHeader header;
306 E : if (!ExtractNext(&header, NULL))
307 i : return false;
308 :
309 E : files_.push_back(header.name);
310 E : CHECK(files_inverse_.insert(std::make_pair(header.name, index)).second);
311 E : }
312 :
313 E : if (!SeekIndex(old_index))
314 i : return false;
315 :
316 E : return true;
317 E : }
318 :
319 E : bool ArReader::SeekIndex(size_t index) {
320 E : if (index >= offsets_.size())
321 i : return false;
322 :
323 E : size_t offset = offsets_[index];
324 E : if (offset_ == offset)
325 E : return true;
326 :
327 E : if (::fseek(file_.get(), offset, SEEK_SET) != 0) {
328 i : LOG(ERROR) << "Failed to seek to archive file " << index
329 : << " at offset " << offset << ".";
330 i : return false;
331 : }
332 E : offset_ = offset;
333 E : index_ = index;
334 :
335 E : return true;
336 E : }
337 :
338 E : bool ArReader::HasNext() const {
339 E : if (index_ < offsets_.size())
340 E : return true;
341 E : return false;
342 E : }
343 :
344 : bool ArReader::ExtractNext(ParsedArFileHeader* header,
345 E : DataBuffer* data) {
346 E : DCHECK_LT(index_, offsets_.size());
347 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
348 :
349 : // If all has gone well then the cursor should have been left at the
350 : // beginning of a valid archive file, or the end of the file.
351 E : if (offset_ < length_) {
352 E : OffsetIndexMap::const_iterator index_it = offsets_inverse_.find(offset_);
353 E : if (index_it == offsets_inverse_.end()) {
354 i : LOG(ERROR) << "Encoded file offsets do not match archive contents.";
355 i : return false;
356 : }
357 : }
358 :
359 : // Seek to the beginning of the next archive file if we're not already there.
360 E : if (offset_ != offsets_[index_]) {
361 E : if (::fseek(file_.get(), offsets_[index_], SEEK_SET) != 0) {
362 i : LOG(ERROR) << "Failed to seek to file " << index_ << ".";
363 i : return false;
364 : }
365 E : offset_ = offsets_[index_];
366 : }
367 E : DCHECK_LT(offset_, length_);
368 :
369 E : if (!ReadNextFile(header, data))
370 i : return false;
371 E : ++index_;
372 :
373 : // Store the actual filename in the header.
374 E : std::string filename;
375 E : if (!TranslateFilename(header->name, &filename))
376 i : return false;
377 E : header->name = filename;
378 :
379 E : return true;
380 E : }
381 :
382 : bool ArReader::Extract(size_t index,
383 : ParsedArFileHeader* header,
384 E : DataBuffer* data) {
385 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
386 :
387 E : if (index >= offsets_.size())
388 i : return false;
389 :
390 : // Seek to the file in question.
391 E : if (::fseek(file_.get(), offsets_[index], SEEK_SET) != 0) {
392 i : LOG(ERROR) << "Failed to seek to file " << index << ".";
393 i : return false;
394 : }
395 E : offset_ = offsets_[index];
396 E : index_ = index;
397 :
398 E : if (!ExtractNext(header, data))
399 i : return false;
400 :
401 E : return true;
402 E : }
403 :
404 : bool ArReader::ReadNextFile(ParsedArFileHeader* header,
405 E : DataBuffer* data) {
406 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
407 :
408 : // Read and parse the file header.
409 E : ArFileHeader raw_header = {};
410 E : if (!ReadObject(file_.get(), &raw_header))
411 i : return false;
412 E : if (!ParseArFileHeader(raw_header, header))
413 i : return false;
414 E : offset_ += sizeof(raw_header);
415 :
416 : uint64 aligned_size = common::AlignUp64(header->size,
417 E : kArFileAlignment);
418 E : uint64 seek_size = aligned_size;
419 :
420 : // Read the actual file contents if necessary.
421 E : if (data != NULL) {
422 E : seek_size = aligned_size - header->size;
423 E : data->resize(header->size);
424 : if (::fread(data->data(), 1, header->size, file_.get()) !=
425 E : header->size) {
426 i : LOG(ERROR) << "Failed to read file \"" << header->name
427 : << "\" at offset " << offset_ << " of archive \""
428 : << path_.value() << "\".";
429 i : return false;
430 : }
431 E : offset_ += header->size;
432 : }
433 :
434 : // Seek to the beginning of the next file.
435 E : if (seek_size > 0 && ::fseek(file_.get(), seek_size, SEEK_CUR) != 0) {
436 i : LOG(ERROR) << "Failed to seek to next file at offset " << offset_
437 : << " of archive \"" << path_.value() << "\".";
438 i : return false;
439 : }
440 E : offset_ += seek_size;
441 :
442 E : return true;
443 E : }
444 :
445 : bool ArReader::TranslateFilename(const std::string& internal_name,
446 E : std::string* full_name) {
447 E : DCHECK_NE(reinterpret_cast<std::string*>(NULL), full_name);
448 :
449 E : if (internal_name.empty()) {
450 i : LOG(ERROR) << "Invalid internal archive filename: " << internal_name;
451 i : return false;
452 : }
453 :
454 : // If there is no leading slash then the name is directly encoded in the
455 : // header.
456 E : if (internal_name[0] != '/') {
457 E : if (internal_name.back() != '/') {
458 i : LOG(ERROR) << "Invalid filename: " << internal_name;
459 i : return false;
460 : }
461 : *full_name = std::string(internal_name.begin(),
462 E : internal_name.end() - 1);
463 E : return true;
464 : }
465 :
466 E : uint32 filename_offset = 0;
467 E : if (!base::StringToUint(internal_name.c_str() + 1, &filename_offset)) {
468 i : LOG(ERROR) << "Unable to parse filename offset: " << internal_name;
469 i : return false;
470 : }
471 :
472 E : if (filename_offset >= filenames_.size()) {
473 i : LOG(ERROR) << "Invalid filename offset: " << filename_offset;
474 i : return false;
475 : }
476 :
477 E : const char* data = reinterpret_cast<char*>(filenames_.data());
478 : size_t filename_length = ::strnlen(data + filename_offset,
479 E : filenames_.size() - filename_offset);
480 E : *full_name = std::string(data + filename_offset, filename_length);
481 :
482 E : return true;
483 E : }
484 :
485 : } // namespace ar
|