1 : // Copyright 2014 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/ar/ar_reader.h"
16 :
17 : #include <set>
18 :
19 : #include "base/logging.h"
20 : #include "base/strings/string_number_conversions.h"
21 : #include "syzygy/common/align.h"
22 :
23 : namespace ar {
24 :
25 : namespace {
26 :
27 : typedef ArReader::FileOffsetVector FileOffsetVector;
28 :
29 : // Calculates that length of a space terminated string with a maximum size.
30 : template<size_t kArrayLength>
31 E : size_t ArStringLength(const char (&s)[kArrayLength]) {
32 E : DCHECK_NE(reinterpret_cast<const char*>(NULL), &s[0]);
33 E : size_t l = kArrayLength;
34 E : while (l > 0 && s[l - 1] == ' ')
35 E : --l;
36 E : return l;
37 E : }
38 :
39 : // Parses an unsigned integer from a space-terminated string. The output -1 is
40 : // reserved to indicate an empty string, ie: no value.
41 : template<size_t kArrayLength, typename OutputType>
42 E : bool ParseArNumber(const char (&s)[kArrayLength], OutputType* output) {
43 E : DCHECK_NE(reinterpret_cast<const char*>(NULL), &s[0]);
44 E : DCHECK_NE(reinterpret_cast<OutputType*>(NULL), output);
45 :
46 : // Ensure the output size is sufficiently big for the string we're parsing.
47 : // bits log(2) / log(10) >= digits
48 : // digits <= 0.3 * bits
49 : // 10 * digits <= 3 * bits
50 : static_assert(10 * kArrayLength <= 3 * 8 * sizeof(OutputType),
51 : "Output type is to small for input string.");
52 :
53 E : size_t l = ArStringLength(s);
54 E : if (l == 0) {
55 i : *output = ~0UL;
56 i : return true;
57 : }
58 :
59 E : OutputType value = 0;
60 E : for (size_t i = 0; i < l; ++i) {
61 E : value *= 10;
62 E : if (s[i] < '0' || s[i] > '9') {
63 i : LOG(ERROR) << "Invalid number in archive file header.";
64 i : return false;
65 : }
66 E : value += s[i] - '0';
67 E : }
68 :
69 E : *output = value;
70 E : return true;
71 E : }
72 :
73 : bool ParseArFileHeader(const ArFileHeader& header,
74 E : ParsedArFileHeader* parsed_header) {
75 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), parsed_header);
76 :
77 E : size_t name_len = ArStringLength(header.name);
78 E : parsed_header->name = std::string(header.name, name_len);
79 :
80 : // The time is in seconds since epoch.
81 : uint64_t timestamp;
82 E : if (!ParseArNumber(header.timestamp, ×tamp))
83 i : return false;
84 E : parsed_header->timestamp = base::Time::FromDoubleT(
85 : static_cast<double>(timestamp));
86 :
87 E : if (!ParseArNumber(header.mode, &parsed_header->mode))
88 i : return false;
89 :
90 E : if (!ParseArNumber(header.size, &parsed_header->size))
91 i : return false;
92 :
93 E : return true;
94 E : }
95 :
96 : template<typename Object>
97 E : bool ReadObject(FILE* file, Object* object) {
98 E : DCHECK_NE(reinterpret_cast<FILE*>(NULL), file);
99 E : DCHECK_NE(reinterpret_cast<Object*>(NULL), object);
100 E : if (::fread(object, sizeof(Object), 1, file) != 1) {
101 i : LOG(ERROR) << "Failed to read from archive.";
102 i : return false;
103 : }
104 E : return true;
105 E : }
106 :
107 : bool ParseSecondarySymbolTable(size_t file_size,
108 : const uint8_t* data,
109 : size_t length,
110 : SymbolIndexMap* symbols,
111 E : FileOffsetVector* file_offsets) {
112 E : DCHECK_NE(reinterpret_cast<const uint8_t*>(NULL), data);
113 E : DCHECK_NE(reinterpret_cast<SymbolIndexMap*>(NULL), symbols);
114 E : DCHECK_NE(reinterpret_cast<FileOffsetVector*>(NULL), file_offsets);
115 :
116 E : if (length < sizeof(uint32_t)) {
117 i : LOG(ERROR) << "Secondary symbol table contains no file count.";
118 i : return false;
119 : }
120 :
121 : // Validate the size of the secondary symbol table.
122 E : const uint32_t* offsets = reinterpret_cast<const uint32_t*>(data) + 1;
123 E : size_t file_count = offsets[-1];
124 E : if (length < (file_count + 2) * sizeof(uint32_t)) {
125 i : LOG(ERROR) << "Secondary symbol table file offsets are truncated.";
126 i : return false;
127 : }
128 :
129 E : size_t symbol_count = offsets[file_count];
130 :
131 : // Get pointers to the various parts of the symbol table.
132 : const uint16_t* indices =
133 E : reinterpret_cast<const uint16_t*>(offsets + file_count + 1);
134 E : const char* names = reinterpret_cast<const char*>(indices + symbol_count);
135 E : const char* names_end = reinterpret_cast<const char*>(data + length);
136 E : if (names > names_end) {
137 i : LOG(ERROR) << "Secondary symbol table indices are truncated.";
138 i : return false;
139 : }
140 :
141 : // Read and validate the file offsets. It is possible for this table to be
142 : // larger than necessary, and invalid or deleted files are represented with a
143 : // zero offset. We track these, and also build a map to a reduced set of file
144 : // indices.
145 : typedef std::map<size_t, size_t> FileIndexMap;
146 E : FileIndexMap file_index_map;
147 E : file_offsets->resize(0);
148 E : file_offsets->reserve(file_count);
149 E : for (size_t i = 0; i < file_count; ++i) {
150 : // Skip invalid/deleted files.
151 E : if (offsets[i] == 0)
152 E : continue;
153 :
154 E : if (offsets[i] >= file_size) {
155 i : LOG(ERROR) << "Invalid symbol offset encountered in archive.";
156 i : return false;
157 : }
158 :
159 : // File indices are 1-indexed in the archive, but we use 0-indexing.
160 E : size_t reduced_file_index = file_index_map.size();
161 E : file_index_map.insert(std::make_pair(i, reduced_file_index));
162 E : file_offsets->push_back(offsets[i]);
163 E : }
164 :
165 : // Read the file indices for each symbol.
166 E : std::set<std::string> symbol_names;
167 E : for (size_t i = 0; i < symbol_count; ++i) {
168 E : size_t name_len = ::strnlen(names, names_end - names);
169 E : if (name_len == 0) {
170 i : LOG(ERROR) << "Symbol " << i << " has an invalid name.";
171 i : return false;
172 : }
173 :
174 E : uint16_t file_index = indices[i];
175 E : std::string name = std::string(names, name_len);
176 E : names += name_len + 1;
177 :
178 E : if (file_index == 0 || file_index > file_count) {
179 i : LOG(ERROR) << "Invalid file index " << file_index << " for symbol "
180 : << i << ": " << name;
181 i : return false;
182 : }
183 :
184 : // Use the raw file index to find the reduced file index, using
185 : // 0-indexing.
186 E : FileIndexMap::const_iterator index_it = file_index_map.find(
187 : file_index - 1);
188 E : if (index_it == file_index_map.end()) {
189 i : LOG(ERROR) << "Encountered a symbol referring to an invalid file index.";
190 i : return false;
191 : }
192 E : file_index = static_cast<uint16_t>(index_it->second);
193 :
194 : // Insert the symbol. We log a warning if there's a duplicate symbol, but
195 : // this is not strictly illegal.
196 E : if (!symbols->insert(std::make_pair(name, file_index)).second)
197 i : LOG(WARNING) << "Duplicate symbol encountered in archive.";
198 E : }
199 :
200 E : return true;
201 E : }
202 :
203 : } // namespace
204 :
205 : ArReader::ArReader()
206 E : : length_(0), offset_(0), index_(0), start_of_object_files_(0) {
207 E : }
208 :
209 E : bool ArReader::Init(const base::FilePath& ar_path) {
210 E : DCHECK(path_.empty());
211 :
212 E : path_ = ar_path;
213 E : file_.reset(base::OpenFile(path_, "rb"));
214 E : if (file_.get() == NULL) {
215 E : LOG(ERROR) << "Failed to open file for reading: " << path_.value();
216 E : return false;
217 : }
218 :
219 E : if (!base::GetFileSize(path_, reinterpret_cast<int64_t*>(&length_))) {
220 i : LOG(ERROR) << "Unable to get the archive file size.";
221 i : return false;
222 : }
223 :
224 : // Parse the global header.
225 E : ArGlobalHeader global_header = {};
226 E : if (!ReadObject(file_.get(), &global_header))
227 i : return false;
228 : if (::memcmp(global_header.magic,
229 : kArGlobalMagic,
230 E : sizeof(kArGlobalMagic)) != 0) {
231 i : LOG(ERROR) << "Invalid archive file global header.";
232 i : return false;
233 : }
234 E : offset_ += sizeof(global_header);
235 :
236 : // Read (and ignore) the primary symbol table. This needs to be present but
237 : // it contains data that is also to be found in the secondary symbol table,
238 : // with higher fidelity.
239 E : ParsedArFileHeader header;
240 E : if (!ReadNextFile(&header, NULL)) {
241 i : LOG(ERROR) << "Failed to read primary symbol table.";
242 i : return false;
243 : }
244 E : if (header.name != "/") {
245 i : LOG(ERROR) << "Did not find primary symbol table in archive.";
246 i : return false;
247 : }
248 :
249 : // Read and parse the secondary symbol table.
250 E : DataBuffer data;
251 E : if (!ReadNextFile(&header, &data)) {
252 i : LOG(ERROR) << "Failed to read secondary symbol table.";
253 i : return false;
254 : }
255 E : if (header.name != "/") {
256 i : LOG(ERROR) << "Did not find secondary symbol table in archive.";
257 i : return false;
258 : }
259 E : if (!ParseSecondarySymbolTable(length_, data.data(), data.size(),
260 : &symbols_, &offsets_)) {
261 i : LOG(ERROR) << "Failed to parse secondary symbol table.";
262 i : return false;
263 : }
264 :
265 : // Remember where we are. The object files may start at this location, or we
266 : // may encounter an optional filename table.
267 E : start_of_object_files_ = offset_;
268 :
269 E : if (!ReadNextFile(&header, &data)) {
270 i : LOG(ERROR) << "Failed to read filename table or first archive member.";
271 i : return false;
272 : }
273 E : if (header.name == "//") {
274 E : std::swap(data, filenames_);
275 E : start_of_object_files_ = offset_;
276 : }
277 :
278 : // Create an inverse of the offsets_ vector.
279 E : for (size_t i = 0; i < offsets_.size(); ++i)
280 E : CHECK(offsets_inverse_.insert(std::make_pair(offsets_[i], i)).second);
281 :
282 : // Make sure we're at the beginning of the first file in the archive.
283 E : if (!SeekIndex(0))
284 i : return false;
285 :
286 E : return true;
287 E : }
288 :
289 E : bool ArReader::BuildFileIndex() {
290 E : DCHECK(files_.empty());
291 E : DCHECK(files_inverse_.empty());
292 :
293 E : size_t old_index = index_;
294 :
295 E : if (!SeekIndex(0))
296 i : return false;
297 :
298 E : files_.reserve(offsets_.size());
299 :
300 E : while (HasNext()) {
301 E : size_t index = index_;
302 :
303 : // Read the file and get its translated name.
304 E : ParsedArFileHeader header;
305 E : if (!ExtractNext(&header, NULL))
306 i : return false;
307 :
308 E : files_.push_back(header.name);
309 E : CHECK(files_inverse_.insert(std::make_pair(header.name, index)).second);
310 E : }
311 :
312 E : if (!SeekIndex(old_index))
313 i : return false;
314 :
315 E : return true;
316 E : }
317 :
318 E : bool ArReader::SeekIndex(size_t index) {
319 E : if (index >= offsets_.size())
320 i : return false;
321 :
322 E : size_t offset = offsets_[index];
323 E : if (offset_ == offset)
324 E : return true;
325 :
326 E : if (::fseek(file_.get(), offset, SEEK_SET) != 0) {
327 i : LOG(ERROR) << "Failed to seek to archive file " << index
328 : << " at offset " << offset << ".";
329 i : return false;
330 : }
331 E : offset_ = offset;
332 E : index_ = index;
333 :
334 E : return true;
335 E : }
336 :
337 E : bool ArReader::HasNext() const {
338 E : if (index_ < offsets_.size())
339 E : return true;
340 E : return false;
341 E : }
342 :
343 : bool ArReader::ExtractNext(ParsedArFileHeader* header,
344 E : DataBuffer* data) {
345 E : DCHECK_LT(index_, offsets_.size());
346 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
347 :
348 : // If all has gone well then the cursor should have been left at the
349 : // beginning of a valid archive file, or the end of the file.
350 E : if (offset_ < length_) {
351 E : OffsetIndexMap::const_iterator index_it = offsets_inverse_.find(offset_);
352 E : if (index_it == offsets_inverse_.end()) {
353 i : LOG(ERROR) << "Encoded file offsets do not match archive contents.";
354 i : return false;
355 : }
356 : }
357 :
358 : // Seek to the beginning of the next archive file if we're not already there.
359 E : if (offset_ != offsets_[index_]) {
360 E : if (::fseek(file_.get(), offsets_[index_], SEEK_SET) != 0) {
361 i : LOG(ERROR) << "Failed to seek to file " << index_ << ".";
362 i : return false;
363 : }
364 E : offset_ = offsets_[index_];
365 : }
366 E : DCHECK_LT(offset_, length_);
367 :
368 E : if (!ReadNextFile(header, data))
369 i : return false;
370 E : ++index_;
371 :
372 : // Store the actual filename in the header.
373 E : std::string filename;
374 E : if (!TranslateFilename(header->name, &filename))
375 i : return false;
376 E : header->name = filename;
377 :
378 E : return true;
379 E : }
380 :
381 : bool ArReader::Extract(size_t index,
382 : ParsedArFileHeader* header,
383 E : DataBuffer* data) {
384 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
385 :
386 E : if (index >= offsets_.size())
387 i : return false;
388 :
389 : // Seek to the file in question.
390 E : if (::fseek(file_.get(), offsets_[index], SEEK_SET) != 0) {
391 i : LOG(ERROR) << "Failed to seek to file " << index << ".";
392 i : return false;
393 : }
394 E : offset_ = offsets_[index];
395 E : index_ = index;
396 :
397 E : if (!ExtractNext(header, data))
398 i : return false;
399 :
400 E : return true;
401 E : }
402 :
403 : bool ArReader::ReadNextFile(ParsedArFileHeader* header,
404 E : DataBuffer* data) {
405 E : DCHECK_NE(reinterpret_cast<ParsedArFileHeader*>(NULL), header);
406 :
407 : // Read and parse the file header.
408 E : ArFileHeader raw_header = {};
409 E : if (!ReadObject(file_.get(), &raw_header))
410 i : return false;
411 E : if (!ParseArFileHeader(raw_header, header))
412 i : return false;
413 E : offset_ += sizeof(raw_header);
414 :
415 E : uint64_t aligned_size = common::AlignUp64(header->size, kArFileAlignment);
416 E : uint64_t seek_size = aligned_size;
417 :
418 : // Read the actual file contents if necessary.
419 E : if (data != NULL) {
420 E : seek_size = aligned_size - header->size;
421 E : data->resize(header->size);
422 E : if (::fread(data->data(), 1, header->size, file_.get()) !=
423 : header->size) {
424 i : LOG(ERROR) << "Failed to read file \"" << header->name
425 : << "\" at offset " << offset_ << " of archive \""
426 : << path_.value() << "\".";
427 i : return false;
428 : }
429 E : offset_ += header->size;
430 : }
431 :
432 : // Seek to the beginning of the next file.
433 E : if (seek_size > 0 && ::fseek(file_.get(), seek_size, SEEK_CUR) != 0) {
434 i : LOG(ERROR) << "Failed to seek to next file at offset " << offset_
435 : << " of archive \"" << path_.value() << "\".";
436 i : return false;
437 : }
438 E : offset_ += seek_size;
439 :
440 E : return true;
441 E : }
442 :
443 : bool ArReader::TranslateFilename(const std::string& internal_name,
444 E : std::string* full_name) {
445 E : DCHECK_NE(reinterpret_cast<std::string*>(NULL), full_name);
446 :
447 E : if (internal_name.empty()) {
448 i : LOG(ERROR) << "Invalid internal archive filename: " << internal_name;
449 i : return false;
450 : }
451 :
452 : // If there is no leading slash then the name is directly encoded in the
453 : // header.
454 E : if (internal_name[0] != '/') {
455 E : if (internal_name.back() != '/') {
456 i : LOG(ERROR) << "Invalid filename: " << internal_name;
457 i : return false;
458 : }
459 E : *full_name = std::string(internal_name.begin(),
460 : internal_name.end() - 1);
461 E : return true;
462 : }
463 :
464 E : uint32_t filename_offset = 0;
465 E : if (!base::StringToUint(internal_name.c_str() + 1, &filename_offset)) {
466 i : LOG(ERROR) << "Unable to parse filename offset: " << internal_name;
467 i : return false;
468 : }
469 :
470 E : if (filename_offset >= filenames_.size()) {
471 i : LOG(ERROR) << "Invalid filename offset: " << filename_offset;
472 i : return false;
473 : }
474 :
475 E : const char* data = reinterpret_cast<char*>(filenames_.data());
476 E : size_t filename_length = ::strnlen(data + filename_offset,
477 : filenames_.size() - filename_offset);
478 E : *full_name = std::string(data + filename_offset, filename_length);
479 :
480 E : return true;
481 E : }
482 :
483 : } // namespace ar
|