Coverage for /Syzygy/ar/ar_reader.h

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
100.0%550.C++source

Line-by-line coverage:

   1    :  // Copyright 2014 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // Declares a class for reading/extracting files from a lib file in the 'ar'
  16    :  // achive format.
  17    :  //
  18    :  // A MSVS library file uses the standard archive file format that is used
  19    :  // by most toolchains everywhere. More specifically it observes the same format
  20    :  // as the GNU variant, with seem extensions. The format is well documented here:
  21    :  //
  22    :  //   http://kishorekumar.net/pecoff_v8.1.htm
  23    :  //
  24    :  // The archive contains three special metadata files, occurring as the three
  25    :  // files in the archive.
  26    :  //
  27    :  //   "/" : This file contains a concatenation of all symbol information
  28    :  //         across all object files in the library. This is divided into
  29    :  //         3 parts:
  30    :  //         - a big-endian 32-bit integer encoding the number of symbols.
  31    :  //         - big-endian 32-bit integers encoding the offset in the archive
  32    :  //           of the file containing the symbol. This must be in increasing
  33    :  //           order.
  34    :  //         - a concatenation of null-terminated ASCII-encoded symbol
  35    :  //           names. These are implicitly ordered due to the ordering of the
  36    :  //           offsets.
  37    :  //         This table only ends up pointing to object files that actually
  38    :  //         contain symbols, this can undercount the true number of files in
  39    :  //         the archive. This is present for backwards compatibility with
  40    :  //         older linkers (and the GCC format), but is not actively used by
  41    :  //         MSVS.
  42    :  //   "/" : If a second file with the name "/" is present this is a MSVS
  43    :  //         custom table that encodes the number of files in the archive, and
  44    :  //         their absolute locations.
  45    :  //         - a little endian 32-bit integer indicating the number of object
  46    :  //           files in the archive. This includes object files that do not
  47    :  //           contain symbols.
  48    :  //         - little-endian 32-bit integers encoding the offset in the archive
  49    :  //           of the file containing the symbol.
  50    :  //         - a little-endian 32-bit integer encoding the number of symbols.
  51    :  //         - a run of little-endian 16-bit integers indicating the file in
  52    :  //           which the symbol is located (1 indexed).
  53    :  //         - a concatenation of null-terminated ASCII-encoded symbol
  54    :  //           names. These are in increasing lexical order.
  55    :  //   "//": This file contains extended filenames of all object files in the
  56    :  //         library. These are simply a concatenation of null-terminated
  57    :  //         ASCII-encoded filenames. This has been observed to always be in
  58    :  //         the same order as the files in the archive itself.
  59    :  //         NOTE: This file does not always have the name '//', sometimes
  60    :  //               appearing as a 3rd '/' table.
  61    :  //
  62    :  // All of the above mentioned special files must exist in the archive, and must
  63    :  // be the first 3 files.
  64    :  //
  65    :  // The actual object files are stored with names like "/<some-number>".
  66    :  // Their true full path names are available at offset <some-number> in the "//"
  67    :  // extended path name stream. These have been observed to be in strictly
  68    :  // increasing order, with the filenames themselves in no particular order.
  69    :  //
  70    :  // It is worth noting that the file offset table in the "/" file need not be in
  71    :  // order of increasing offset, although it usually is. If it isn't, then the
  72    :  // actual order of the files in the archive should be ignored, and the order
  73    :  // implied by the offset table used when navigating. This is the order that
  74    :  // reflects the way symbols have been added to the symbol table. This is mostly
  75    :  // important when reading files from one archive and writing them into another;
  76    :  // to maintain proper symbol information we must ensure we iterate over the
  77    :  // files in the order they are specified in the offset table.
  78    :  
  79    :  #ifndef SYZYGY_AR_AR_READER_H_
  80    :  #define SYZYGY_AR_AR_READER_H_
  81    :  
  82    :  #include <map>
  83    :  #include <vector>
  84    :  
  85    :  #include "base/files/file_path.h"
  86    :  #include "base/files/file_util.h"
  87    :  #include "syzygy/ar/ar_common.h"
  88    :  
  89    :  namespace ar {
  90    :  
  91    :  // Class for extracting files from archive files. This currently does not
  92    :  // expose the parsed symbol information in any meaningful way.
  93    :  class ArReader {
  94    :   public:
  95    :    // Stores the offsets of each file object, by their index.
  96    :    typedef std::vector<uint32> FileOffsetVector;
  97    :    // Stores the inverse of a FileOffsetVector.
  98    :    typedef std::map<uint32, size_t> OffsetIndexMap;
  99    :    // Maps sorted object filenames to their index in the archive. This is a
 100    :    // multimap as multiple files may exist with the same name.
 101    :    typedef std::set<std::pair<std::string, size_t>> FileNameMap;
 102    :    // Stores filenames indexed by the file number.
 103    :    typedef std::vector<std::string> FileNameVector;
 104    :  
 105    :    ArReader();
 106    :  
 107    :    // Opens the provided file, validating that it is indeed an archive file,
 108    :    // parsing its headers and populating symbol and filename information. Logs
 109    :    // verbosely on failure.
 110    :    // @param ar_path The path to the file to be opened.
 111    :    // @returns true on success, false otherwise.
 112    :    bool Init(const base::FilePath& ar_path);
 113    :  
 114    :    // Determines the full names of all files in the archive, populating the
 115    :    // file-name map. This must be called in order to find a file by name. This
 116    :    // incurs a linear scan of the entire archive.
 117    :    // @returns true on success, false otherwise.
 118    :    // @note Can only be called after a successful call to Init. This should only
 119    :    //     be called once.
 120    :    bool BuildFileIndex();
 121    :  
 122    :    // @returns the path of the file being read.
 123  E :    const base::FilePath& path() const { return path_; }
 124    :  
 125    :    // @returns the map of symbols contained in the various object files in the
 126    :    //     archive. The symbol name is mapped to the index of the object file
 127    :    //     containing it.
 128  E :    const SymbolIndexMap& symbols() const { return symbols_; }
 129    :  
 130    :    // @returns the offsets of files in the archive. This is only valid after a
 131    :    //     successful call to Init.
 132  E :    const FileOffsetVector& offsets() const { return offsets_; }
 133    :  
 134    :    // @returns the vector of file names, by their index in the archive.
 135    :    //     This is only valid after a successful call to BuildFileIndex.
 136  E :    const FileNameVector& files() const { return files_; }
 137    :  
 138    :    // @returns the map of files present in the archive, and their
 139    :    //     indices within it. This is only valid after a successful call to
 140    :    //     BuildFileIndex.
 141  E :    const FileNameMap& files_inverse() const { return files_inverse_; }
 142    :  
 143    :    // Seeks the start of the given file.
 144    :    // @param index The index of the file to seek to.
 145    :    bool SeekIndex(size_t index);
 146    :  
 147    :    // @returns true if there is a next file in the archive to extract.
 148    :    bool HasNext() const;
 149    :  
 150    :    // Extracts the next file to a buffer, and advances the cursor to the
 151    :    // following file in the archive.
 152    :    // @param header The header to be populated.
 153    :    // @param data The buffer to be populated. May be NULL, in which case
 154    :    //     only the header will be filled in.
 155    :    // @returns true on success, false otherwise.
 156    :    bool ExtractNext(ParsedArFileHeader* header, DataBuffer* data);
 157    :  
 158    :    // Extracts the specified file to a buffer. Leaves the cursor pointing
 159    :    // at the next file in the archive.
 160    :    // @param index The index of the file to be extracted.
 161    :    // @param header The header to be populated.
 162    :    // @param data The buffer to be populated.
 163    :    // @returns true on success, false otherwise.
 164    :    bool Extract(size_t index,
 165    :                 ParsedArFileHeader* header,
 166    :                 DataBuffer* data);
 167    :  
 168    :   protected:
 169    :    // Reads the next file from the archive, advancing the cursor. Returns true
 170    :    // on success, false otherwise. Does not translate the internal name to an
 171    :    // external filename. Doesn't update 'index_'.
 172    :    bool ReadNextFile(ParsedArFileHeader* header, DataBuffer* data);
 173    :  
 174    :    // Translates an archive internal filename to the full extended filename.
 175    :    bool TranslateFilename(const std::string& internal_name,
 176    :                           std::string* full_name);
 177    :  
 178    :    // The file that is being read.
 179    :    base::FilePath path_;
 180    :    base::ScopedFILE file_;
 181    :  
 182    :    // Data regarding the archive.
 183    :    uint64 length_;
 184    :    uint64 offset_;  // The cursor in the archive's byte stream.
 185    :    size_t index_;  // The index of the archive member the cursor points at.
 186    :    uint64 start_of_object_files_;
 187    :  
 188    :    // Parsed header information.
 189    :    SymbolIndexMap symbols_;
 190    :    FileOffsetVector offsets_;
 191    :    OffsetIndexMap offsets_inverse_;
 192    :    // The raw file names, concatenated into a single buffer.
 193    :    DataBuffer filenames_;
 194    :    // Maps filenames to their indices in the archive. This is populated by
 195    :    // BuildFileIndex.
 196    :    FileNameVector files_;
 197    :    FileNameMap files_inverse_;
 198    :  
 199    :   private:
 200    :    DISALLOW_COPY_AND_ASSIGN(ArReader);
 201    :  };
 202    :  
 203    :  }  // namespace ar
 204    :  
 205    :  #endif  // SYZYGY_AR_AR_READER_H_

Coverage information generated Thu Jan 14 17:40:38 2016.