1 : // Copyright 2014 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Declares a class for reading/extracting files from a lib file in the 'ar'
16 : // achive format.
17 : //
18 : // A MSVS library file uses the standard archive file format that is used
19 : // by most toolchains everywhere. More specifically it observes the same format
20 : // as the GNU variant, with seem extensions. The format is well documented here:
21 : //
22 : // http://kishorekumar.net/pecoff_v8.1.htm
23 : //
24 : // The archive contains three special metadata files, occurring as the three
25 : // files in the archive.
26 : //
27 : // "/" : This file contains a concatenation of all symbol information
28 : // across all object files in the library. This is divided into
29 : // 3 parts:
30 : // - a big-endian 32-bit integer encoding the number of symbols.
31 : // - big-endian 32-bit integers encoding the offset in the archive
32 : // of the file containing the symbol. This must be in increasing
33 : // order.
34 : // - a concatenation of null-terminated ASCII-encoded symbol
35 : // names. These are implicitly ordered due to the ordering of the
36 : // offsets.
37 : // This table only ends up pointing to object files that actually
38 : // contain symbols, this can undercount the true number of files in
39 : // the archive. This is present for backwards compatibility with
40 : // older linkers (and the GCC format), but is not actively used by
41 : // MSVS.
42 : // "/" : If a second file with the name "/" is present this is a MSVS
43 : // custom table that encodes the number of files in the archive, and
44 : // their absolute locations.
45 : // - a little endian 32-bit integer indicating the number of object
46 : // files in the archive. This includes object files that do not
47 : // contain symbols.
48 : // - little-endian 32-bit integers encoding the offset in the archive
49 : // of the file containing the symbol.
50 : // - a little-endian 32-bit integer encoding the number of symbols.
51 : // - a run of little-endian 16-bit integers indicating the file in
52 : // which the symbol is located (1 indexed).
53 : // - a concatenation of null-terminated ASCII-encoded symbol
54 : // names. These are in increasing lexical order.
55 : // "//": This file contains extended filenames of all object files in the
56 : // library. These are simply a concatenation of null-terminated
57 : // ASCII-encoded filenames. This has been observed to always be in
58 : // the same order as the files in the archive itself.
59 : // NOTE: This file does not always have the name '//', sometimes
60 : // appearing as a 3rd '/' table.
61 : //
62 : // All of the above mentioned special files must exist in the archive, and must
63 : // be the first 3 files.
64 : //
65 : // The actual object files are stored with names like "/<some-number>".
66 : // Their true full path names are available at offset <some-number> in the "//"
67 : // extended path name stream. These have been observed to be in strictly
68 : // increasing order, with the filenames themselves in no particular order.
69 : //
70 : // It is worth noting that the file offset table in the "/" file need not be in
71 : // order of increasing offset, although it usually is. If it isn't, then the
72 : // actual order of the files in the archive should be ignored, and the order
73 : // implied by the offset table used when navigating. This is the order that
74 : // reflects the way symbols have been added to the symbol table. This is mostly
75 : // important when reading files from one archive and writing them into another;
76 : // to maintain proper symbol information we must ensure we iterate over the
77 : // files in the order they are specified in the offset table.
78 :
79 : #ifndef SYZYGY_AR_AR_READER_H_
80 : #define SYZYGY_AR_AR_READER_H_
81 :
82 : #include <map>
83 : #include <vector>
84 :
85 : #include "base/files/file_path.h"
86 : #include "base/files/file_util.h"
87 : #include "syzygy/ar/ar_common.h"
88 :
89 : namespace ar {
90 :
91 : // Class for extracting files from archive files. This currently does not
92 : // expose the parsed symbol information in any meaningful way.
93 : class ArReader {
94 : public:
95 : // Stores the offsets of each file object, by their index.
96 : typedef std::vector<uint32> FileOffsetVector;
97 : // Stores the inverse of a FileOffsetVector.
98 : typedef std::map<uint32, size_t> OffsetIndexMap;
99 : // Maps sorted object filenames to their index in the archive. This is a
100 : // multimap as multiple files may exist with the same name.
101 : typedef std::set<std::pair<std::string, size_t>> FileNameMap;
102 : // Stores filenames indexed by the file number.
103 : typedef std::vector<std::string> FileNameVector;
104 :
105 : ArReader();
106 :
107 : // Opens the provided file, validating that it is indeed an archive file,
108 : // parsing its headers and populating symbol and filename information. Logs
109 : // verbosely on failure.
110 : // @param ar_path The path to the file to be opened.
111 : // @returns true on success, false otherwise.
112 : bool Init(const base::FilePath& ar_path);
113 :
114 : // Determines the full names of all files in the archive, populating the
115 : // file-name map. This must be called in order to find a file by name. This
116 : // incurs a linear scan of the entire archive.
117 : // @returns true on success, false otherwise.
118 : // @note Can only be called after a successful call to Init. This should only
119 : // be called once.
120 : bool BuildFileIndex();
121 :
122 : // @returns the path of the file being read.
123 E : const base::FilePath& path() const { return path_; }
124 :
125 : // @returns the map of symbols contained in the various object files in the
126 : // archive. The symbol name is mapped to the index of the object file
127 : // containing it.
128 E : const SymbolIndexMap& symbols() const { return symbols_; }
129 :
130 : // @returns the offsets of files in the archive. This is only valid after a
131 : // successful call to Init.
132 E : const FileOffsetVector& offsets() const { return offsets_; }
133 :
134 : // @returns the vector of file names, by their index in the archive.
135 : // This is only valid after a successful call to BuildFileIndex.
136 E : const FileNameVector& files() const { return files_; }
137 :
138 : // @returns the map of files present in the archive, and their
139 : // indices within it. This is only valid after a successful call to
140 : // BuildFileIndex.
141 E : const FileNameMap& files_inverse() const { return files_inverse_; }
142 :
143 : // Seeks the start of the given file.
144 : // @param index The index of the file to seek to.
145 : bool SeekIndex(size_t index);
146 :
147 : // @returns true if there is a next file in the archive to extract.
148 : bool HasNext() const;
149 :
150 : // Extracts the next file to a buffer, and advances the cursor to the
151 : // following file in the archive.
152 : // @param header The header to be populated.
153 : // @param data The buffer to be populated. May be NULL, in which case
154 : // only the header will be filled in.
155 : // @returns true on success, false otherwise.
156 : bool ExtractNext(ParsedArFileHeader* header, DataBuffer* data);
157 :
158 : // Extracts the specified file to a buffer. Leaves the cursor pointing
159 : // at the next file in the archive.
160 : // @param index The index of the file to be extracted.
161 : // @param header The header to be populated.
162 : // @param data The buffer to be populated.
163 : // @returns true on success, false otherwise.
164 : bool Extract(size_t index,
165 : ParsedArFileHeader* header,
166 : DataBuffer* data);
167 :
168 : protected:
169 : // Reads the next file from the archive, advancing the cursor. Returns true
170 : // on success, false otherwise. Does not translate the internal name to an
171 : // external filename. Doesn't update 'index_'.
172 : bool ReadNextFile(ParsedArFileHeader* header, DataBuffer* data);
173 :
174 : // Translates an archive internal filename to the full extended filename.
175 : bool TranslateFilename(const std::string& internal_name,
176 : std::string* full_name);
177 :
178 : // The file that is being read.
179 : base::FilePath path_;
180 : base::ScopedFILE file_;
181 :
182 : // Data regarding the archive.
183 : uint64 length_;
184 : uint64 offset_; // The cursor in the archive's byte stream.
185 : size_t index_; // The index of the archive member the cursor points at.
186 : uint64 start_of_object_files_;
187 :
188 : // Parsed header information.
189 : SymbolIndexMap symbols_;
190 : FileOffsetVector offsets_;
191 : OffsetIndexMap offsets_inverse_;
192 : // The raw file names, concatenated into a single buffer.
193 : DataBuffer filenames_;
194 : // Maps filenames to their indices in the archive. This is populated by
195 : // BuildFileIndex.
196 : FileNameVector files_;
197 : FileNameMap files_inverse_;
198 :
199 : private:
200 : DISALLOW_COPY_AND_ASSIGN(ArReader);
201 : };
202 :
203 : } // namespace ar
204 :
205 : #endif // SYZYGY_AR_AR_READER_H_
|