1 : // Copyright 2014 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/ar/ar_reader.h"
16 :
17 : #include <set>
18 :
19 : #include "base/logging.h"
20 : #include "base/strings/string_number_conversions.h"
21 : #include "syzygy/common/align.h"
22 :
23 : namespace ar {
24 :
25 : namespace {
26 :
27 : typedef ArReader::FileOffsetVector FileOffsetVector;
28 :
29 : // Calculates that length of a space terminated string with a maximum size.
30 : template<size_t kArrayLength>
31 E : size_t ArStringLength(const char (&s)[kArrayLength]) {
32 E : DCHECK_NE(reinterpret_cast<const char*>(NULL), &s[0]);
33 E : size_t l = kArrayLength;
34 E : while (l > 0 && s[l - 1] == ' ')
35 E : --l;
36 E : return l;
37 E : }
38 :
39 : // Parses an unsigned integer from a space-terminated string. The output -1 is
40 : // reserved to indicate an empty string, ie: no value.
41 : template<size_t kArrayLength, typename OutputType>
42 E : bool ParseArNumber(const char (&s)[kArrayLength], OutputType* output) {
43 E : DCHECK_NE(reinterpret_cast<const char*>(NULL), &s[0]);
44 E : DCHECK_NE(reinterpret_cast<OutputType*>(NULL), output);
45 :
46 : // Ensure the output size is sufficiently big for the string we're parsing.
47 : // bits log(2) / log(10) >= digits
48 : // digits <= 0.3 * bits
49 : // 10 * digits <= 3 * bits
50 : COMPILE_ASSERT(10 * kArrayLength <= 3 * 8 * sizeof(OutputType),
51 : output_type_to_small_for_input_string);
52 :
53 E : size_t l = ArStringLength(s);
54 E : if (l == 0) {
55 i : *output = ~0;
56 i : return true;
57 : }
58 :
59 E : OutputType value = 0;
60 E : for (size_t i = 0; i < l; ++i) {
61 E : value *= 10;
62 E : if (s[i] < '0' || s[i] > '9') {
63 i : LOG(ERROR) << "Invalid number in archive file header.";
64 i : return false;
65 : }
66 E : value += s[i] - '0';
67 E : }
68 :
69 E : *output = value;
70 E : return true;
71 E : }
72 :
73 : bool ParseArFileHeader(const ArFileHeader& header,
74 E : ParsedArFileHeader* parsed_header) {
75 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), parsed_header);
76 :
77 E : size_t name_len = ArStringLength(header.name);
78 E : parsed_header->name = std::string(header.name, name_len);
79 :
80 : // The time is in seconds since epoch.
81 : uint64 timestamp;
82 E : if (!ParseArNumber(header.timestamp, ×tamp))
83 i : return false;
84 : parsed_header->timestamp = base::Time::FromDoubleT(
85 E : static_cast<double>(timestamp));
86 :
87 E : if (!ParseArNumber(header.mode, &parsed_header->mode))
88 i : return false;
89 :
90 E : if (!ParseArNumber(header.size, &parsed_header->size))
91 i : return false;
92 :
93 E : return true;
94 E : }
95 :
96 : template<typename Object>
97 E : bool ReadObject(FILE* file, Object* object) {
98 E : DCHECK_NE(reinterpret_cast<FILE*>(NULL), file);
99 E : DCHECK_NE(reinterpret_cast<Object*>(NULL), object);
100 E : if (::fread(object, sizeof(Object), 1, file) != 1) {
101 i : LOG(ERROR) << "Failed to read from archive.";
102 i : return false;
103 : }
104 E : return true;
105 E : }
106 :
107 : bool ParseSecondarySymbolTable(
108 : size_t file_size,
109 : const uint8* data,
110 : size_t length,
111 : SymbolIndexMap* symbols,
112 E : FileOffsetVector* file_offsets) {
113 E : DCHECK_NE(reinterpret_cast<const uint8*>(NULL), data);
114 E : DCHECK_NE(reinterpret_cast<SymbolIndexMap*>(NULL), symbols);
115 E : DCHECK_NE(reinterpret_cast<FileOffsetVector*>(NULL), file_offsets);
116 :
117 E : if (length < sizeof(uint32)) {
118 i : LOG(ERROR) << "Secondary symbol table contains no file count.";
119 i : return false;
120 : }
121 :
122 : // Validate the size of the secondary symbol table.
123 E : const uint32* offsets = reinterpret_cast<const uint32*>(data) + 1;
124 E : size_t file_count = offsets[-1];
125 E : if (length < (file_count + 2) * sizeof(uint32)) {
126 i : LOG(ERROR) << "Secondary symbol table file offsets are truncated.";
127 i : return false;
128 : }
129 :
130 E : size_t symbol_count = offsets[file_count];
131 :
132 : // Get pointers to the various parts of the symbol table.
133 : const uint16* indices = reinterpret_cast<const uint16*>(
134 E : offsets + file_count + 1);
135 E : const uint16* indices_end = indices + symbol_count;
136 E : const char* names = reinterpret_cast<const char*>(indices + symbol_count);
137 E : const char* names_end = reinterpret_cast<const char*>(data + length);
138 E : if (names > names_end) {
139 i : LOG(ERROR) << "Secondary symbol table indices are truncated.";
140 i : return false;
141 : }
142 :
143 : // Read and validate the file offsets. It is possible for this table to be
144 : // larger than necessary, and invalid or deleted files are represented with a
145 : // zero offset. We track these, and also build a map to a reduced set of file
146 : // indices.
147 : typedef std::map<size_t, size_t> FileIndexMap;
148 E : FileIndexMap file_index_map;
149 E : file_offsets->resize(0);
150 E : file_offsets->reserve(file_count);
151 E : for (size_t i = 0; i < file_count; ++i) {
152 : // Skip invalid/deleted files.
153 E : if (offsets[i] == 0)
154 E : continue;
155 :
156 E : if (offsets[i] >= file_size) {
157 i : LOG(ERROR) << "Invalid symbol offset encountered in archive.";
158 i : return false;
159 : }
160 :
161 : // File indices are 1-indexed in the archive, but we use 0-indexing.
162 E : size_t reduced_file_index = file_index_map.size();
163 E : file_index_map.insert(std::make_pair(i, reduced_file_index));
164 E : file_offsets->push_back(offsets[i]);
165 E : }
166 :
167 : // Read the file indices for each symbol.
168 E : std::set<std::string> symbol_names;
169 E : for (size_t i = 0; i < symbol_count; ++i) {
170 E : size_t name_len = ::strnlen(names, names_end - names);
171 E : if (name_len == 0) {
172 i : LOG(ERROR) << "Symbol " << i << " has an invalid name.";
173 i : return false;
174 : }
175 :
176 E : uint16 file_index = indices[i];
177 E : std::string name = std::string(names, name_len);
178 E : names += name_len + 1;
179 :
180 E : if (file_index == 0 || file_index > file_count) {
181 i : LOG(ERROR) << "Invalid file index " << file_index << " for symbol "
182 : << i << ": " << name;
183 i : return false;
184 : }
185 :
186 : // Use the raw file index to find the reduced file index, using
187 : // 0-indexing.
188 : FileIndexMap::const_iterator index_it = file_index_map.find(
189 E : file_index - 1);
190 E : if (index_it == file_index_map.end()) {
191 i : LOG(ERROR) << "Encountered a symbol referring to an invalid file index.";
192 i : return false;
193 : }
194 E : file_index = index_it->second;
195 :
196 : // Insert the symbol. We log a warning if there's a duplicate symbol, but
197 : // this is not strictly illegal.
198 E : if (!symbols->insert(std::make_pair(name, file_index)).second)
199 i : LOG(WARNING) << "Duplicate symbol encountered in archive.";
200 E : }
201 :
202 E : return true;
203 E : }
204 :
205 : } // namespace
206 :
207 : ArReader::ArReader()
208 E : : length_(0), offset_(0), index_(0), start_of_object_files_(0) {
209 E : }
210 :
211 E : bool ArReader::Init(const base::FilePath& ar_path) {
212 E : DCHECK(path_.empty());
213 :
214 E : path_ = ar_path;
215 E : file_.reset(base::OpenFile(path_, "rb"));
216 E : if (file_.get() == NULL) {
217 E : LOG(ERROR) << "Failed to open file for reading: " << path_.value();
218 E : return false;
219 : }
220 :
221 E : if (!base::GetFileSize(path_, reinterpret_cast<int64*>(&length_))) {
222 i : LOG(ERROR) << "Unable to get the archive file size.";
223 i : return false;
224 : }
225 :
226 : // Parse the global header.
227 E : ArGlobalHeader global_header = {};
228 E : if (!ReadObject(file_.get(), &global_header))
229 i : return false;
230 : if (::memcmp(global_header.magic,
231 : kArGlobalMagic,
232 E : sizeof(kArGlobalMagic)) != 0) {
233 i : LOG(ERROR) << "Invalid archive file global header.";
234 i : return false;
235 : }
236 E : offset_ += sizeof(global_header);
237 :
238 : // Read (and ignore) the primary symbol table. This needs to be present but
239 : // it contains data that is also to be found in the secondary symbol table,
240 : // with higher fidelity.
241 E : ParsedArFileHeader header;
242 E : if (!ReadNextFile(&header, NULL)) {
243 i : LOG(ERROR) << "Failed to read primary symbol table.";
244 i : return false;
245 : }
246 E : if (header.name != "/") {
247 i : LOG(ERROR) << "Did not find primary symbol table in archive.";
248 i : return false;
249 : }
250 :
251 : // Read and parse the secondary symbol table.
252 E : DataBuffer data;
253 E : if (!ReadNextFile(&header, &data)) {
254 i : LOG(ERROR) << "Failed to read secondary symbol table.";
255 i : return false;
256 : }
257 E : if (header.name != "/") {
258 i : LOG(ERROR) << "Did not find secondary symbol table in archive.";
259 i : return false;
260 : }
261 : if (!ParseSecondarySymbolTable(length_, data.data(), data.size(),
262 E : &symbols_, &offsets_)) {
263 i : LOG(ERROR) << "Failed to parse secondary symbol table.";
264 i : return false;
265 : }
266 :
267 : // Remember where we are. The object files may start at this location, or we
268 : // may encounter an optional filename table.
269 E : start_of_object_files_ = offset_;
270 :
271 E : if (!ReadNextFile(&header, &data)) {
272 i : LOG(ERROR) << "Failed to read filename table or first archive member.";
273 i : return false;
274 : }
275 E : if (header.name == "//") {
276 E : std::swap(data, filenames_);
277 E : start_of_object_files_ = offset_;
278 : }
279 :
280 : // Create an inverse of the offsets_ vector.
281 E : for (size_t i = 0; i < offsets_.size(); ++i)
282 E : CHECK(offsets_inverse_.insert(std::make_pair(offsets_[i], i)).second);
283 :
284 : // Make sure we're at the beginning of the first file in the archive.
285 E : if (!SeekIndex(0))
286 i : return false;
287 :
288 E : return true;
289 E : }
290 :
291 E : bool ArReader::BuildFileIndex() {
292 E : DCHECK(files_.empty());
293 E : DCHECK(files_inverse_.empty());
294 :
295 E : size_t old_index = index_;
296 :
297 E : if (!SeekIndex(0))
298 i : return false;
299 :
300 E : files_.reserve(offsets_.size());
301 :
302 E : while (HasNext()) {
303 E : size_t index = index_;
304 :
305 : // Read the file and get its translated name.
306 E : ParsedArFileHeader header;
307 E : if (!ExtractNext(&header, NULL))
308 i : return false;
309 :
310 E : files_.push_back(header.name);
311 E : CHECK(files_inverse_.insert(std::make_pair(header.name, index)).second);
312 E : }
313 :
314 E : if (!SeekIndex(old_index))
315 i : return false;
316 :
317 E : return true;
318 E : }
319 :
320 E : bool ArReader::SeekIndex(size_t index) {
321 E : if (index >= offsets_.size())
322 i : return false;
323 :
324 E : size_t offset = offsets_[index];
325 E : if (offset_ == offset)
326 E : return true;
327 :
328 E : if (::fseek(file_.get(), offset, SEEK_SET) != 0) {
329 i : LOG(ERROR) << "Failed to seek to archive file " << index
330 : << " at offset " << offset << ".";
331 i : return false;
332 : }
333 E : offset_ = offset;
334 E : index_ = index;
335 :
336 E : return true;
337 E : }
338 :
339 E : bool ArReader::HasNext() const {
340 E : if (index_ < offsets_.size())
341 E : return true;
342 E : return false;
343 E : }
344 :
345 : bool ArReader::ExtractNext(ParsedArFileHeader* header,
346 E : DataBuffer* data) {
347 E : DCHECK_LT(index_, offsets_.size());
348 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
349 :
350 : // If all has gone well then the cursor should have been left at the
351 : // beginning of a valid archive file, or the end of the file.
352 E : if (offset_ < length_) {
353 E : OffsetIndexMap::const_iterator index_it = offsets_inverse_.find(offset_);
354 E : if (index_it == offsets_inverse_.end()) {
355 i : LOG(ERROR) << "Encoded file offsets do not match archive contents.";
356 i : return false;
357 : }
358 : }
359 :
360 : // Seek to the beginning of the next archive file if we're not already there.
361 E : if (offset_ != offsets_[index_]) {
362 E : if (::fseek(file_.get(), offsets_[index_], SEEK_SET) != 0) {
363 i : LOG(ERROR) << "Failed to seek to file " << index_ << ".";
364 i : return false;
365 : }
366 E : offset_ = offsets_[index_];
367 : }
368 E : DCHECK_LT(offset_, length_);
369 :
370 E : if (!ReadNextFile(header, data))
371 i : return false;
372 E : ++index_;
373 :
374 : // Store the actual filename in the header.
375 E : std::string filename;
376 E : if (!TranslateFilename(header->name, &filename))
377 i : return false;
378 E : header->name = filename;
379 :
380 E : return true;
381 E : }
382 :
383 : bool ArReader::Extract(size_t index,
384 : ParsedArFileHeader* header,
385 E : DataBuffer* data) {
386 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
387 :
388 E : if (index >= offsets_.size())
389 i : return false;
390 :
391 : // Seek to the file in question.
392 E : if (::fseek(file_.get(), offsets_[index], SEEK_SET) != 0) {
393 i : LOG(ERROR) << "Failed to seek to file " << index << ".";
394 i : return false;
395 : }
396 E : offset_ = offsets_[index];
397 E : index_ = index;
398 :
399 E : if (!ExtractNext(header, data))
400 i : return false;
401 :
402 E : return true;
403 E : }
404 :
405 : bool ArReader::ReadNextFile(ParsedArFileHeader* header,
406 E : DataBuffer* data) {
407 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
408 :
409 : // Read and parse the file header.
410 E : ArFileHeader raw_header = {};
411 E : if (!ReadObject(file_.get(), &raw_header))
412 i : return false;
413 E : if (!ParseArFileHeader(raw_header, header))
414 i : return false;
415 E : offset_ += sizeof(raw_header);
416 :
417 : uint64 aligned_size = common::AlignUp64(header->size,
418 E : kArFileAlignment);
419 E : uint64 seek_size = aligned_size;
420 :
421 : // Read the actual file contents if necessary.
422 E : if (data != NULL) {
423 E : seek_size = aligned_size - header->size;
424 E : data->resize(header->size);
425 : if (::fread(data->data(), 1, header->size, file_.get()) !=
426 E : header->size) {
427 i : LOG(ERROR) << "Failed to read file \"" << header->name
428 : << "\" at offset " << offset_ << " of archive \""
429 : << path_.value() << "\".";
430 i : return false;
431 : }
432 E : offset_ += header->size;
433 : }
434 :
435 : // Seek to the beginning of the next file.
436 E : if (seek_size > 0 && ::fseek(file_.get(), seek_size, SEEK_CUR) != 0) {
437 i : LOG(ERROR) << "Failed to seek to next file at offset " << offset_
438 : << " of archive \"" << path_.value() << "\".";
439 i : return false;
440 : }
441 E : offset_ += seek_size;
442 :
443 E : return true;
444 E : }
445 :
446 : bool ArReader::TranslateFilename(const std::string& internal_name,
447 E : std::string* full_name) {
448 E : DCHECK_NE(reinterpret_cast<std::string*>(NULL), full_name);
449 :
450 E : if (internal_name.empty()) {
451 i : LOG(ERROR) << "Invalid internal archive filename: " << internal_name;
452 i : return false;
453 : }
454 :
455 : // If there is no leading slash then the name is directly encoded in the
456 : // header.
457 E : if (internal_name[0] != '/') {
458 E : if (internal_name.back() != '/') {
459 i : LOG(ERROR) << "Invalid filename: " << internal_name;
460 i : return false;
461 : }
462 : *full_name = std::string(internal_name.begin(),
463 E : internal_name.end() - 1);
464 E : return true;
465 : }
466 :
467 E : uint32 filename_offset = 0;
468 E : if (!base::StringToUint(internal_name.c_str() + 1, &filename_offset)) {
469 i : LOG(ERROR) << "Unable to parse filename offset: " << internal_name;
470 i : return false;
471 : }
472 :
473 E : if (filename_offset >= filenames_.size()) {
474 i : LOG(ERROR) << "Invalid filename offset: " << filename_offset;
475 i : return false;
476 : }
477 :
478 E : const char* data = reinterpret_cast<char*>(filenames_.data());
479 : size_t filename_length = ::strnlen(data + filename_offset,
480 E : filenames_.size() - filename_offset);
481 E : *full_name = std::string(data + filename_offset, filename_length);
482 :
483 E : return true;
484 E : }
485 :
486 : } // namespace ar
|