Coverage for /Syzygy/refinery/detectors/lfh_entry_detector.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
91.8%1011100.C++source

Line-by-line coverage:

   1    :  // Copyright 2015 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/refinery/detectors/lfh_entry_detector.h"
  16    :  
  17    :  #include "base/logging.h"
  18    :  #include "base/containers/hash_tables.h"
  19    :  #include "syzygy/common/align.h"
  20    :  #include "syzygy/refinery/types/typed_data.h"
  21    :  
  22    :  namespace refinery {
  23    :  
  24  E :  LFHEntryDetector::LFHEntryDetector() : bit_source_(nullptr) {
  25  E :  }
  26    :  
  27  E :  bool LFHEntryDetector::Init(TypeRepository* repo, BitSource* bit_source) {
  28  E :    DCHECK(repo);
  29  E :    DCHECK(bit_source);
  30  E :    DCHECK(bit_source_ == nullptr);
  31    :  
  32  E :    for (auto type : *repo) {
  33  E :      if (type->name() == L"_HEAP_ENTRY") {
  34  E :        if (!type->CastTo(&entry_type_))
  35  i :          return false;
  36  E :        break;
  37    :      }
  38  E :    }
  39    :  
  40  E :    if (!entry_type_)
  41  E :      return false;
  42    :  
  43  E :    bit_source_ = bit_source;
  44    :  
  45  E :    return true;
  46  E :  }
  47    :  
  48    :  bool LFHEntryDetector::Detect(const AddressRange& range,
  49  E :                                LFHEntryRuns* found_runs) {
  50  E :    DCHECK(range.IsValid());
  51  E :    DCHECK(found_runs);
  52  E :    DCHECK(bit_source_);
  53  E :    DCHECK(entry_type_);
  54    :  
  55  E :    found_runs->clear();
  56    :  
  57    :    // This will be 8 or 16 depending on bitness.
  58    :    // TODO(siggi): Fix this code for 64 bit.
  59  E :    const size_t kEntrySize = entry_type_->size();
  60  E :    const Address start = common::AlignUp(range.start(), kEntrySize);
  61    :    const Address end =
  62  E :        common::AlignDown(range.end() - entry_type_->size(), kEntrySize);
  63  E :    DCHECK_EQ(0, (end - start) % kEntrySize);
  64    :  
  65    :    // TODO(siggi): This is ~O(N^2) and so is wasteful for large ranges.
  66    :    //     A better approach might be to process the entire range, count up all
  67    :    //     the subsegment codes that occur, with the first occurrence of each.
  68    :    //     This will then allow processing the range in closer to O(N), as a
  69    :    //     search will only be done where a code occurs more than once, and then
  70    :    //     from the first occurrence of that code.
  71  E :    SubsegmentSet used_subsegments;
  72  E :    for (Address curr = start; curr < end; curr += kEntrySize) {
  73    :      LFHEntryRun found_run;
  74    :  
  75    :      if (ScanForEntryMatch(AddressRange(curr, end - curr), &used_subsegments,
  76  E :                            &found_run)) {
  77  E :        found_runs->push_back(found_run);
  78    :      }
  79  E :    }
  80    :  
  81  E :    return true;
  82  E :  }
  83    :  
  84    :  bool LFHEntryDetector::GetDecodedLFHEntrySubsegment(
  85    :      const TypedData& lfh_heap_entry,
  86  E :      uint64_t* decoded_subseg) {
  87  E :    TypedData subseg_field;
  88  E :    if (!lfh_heap_entry.GetNamedField(L"SubSegmentCode", &subseg_field)) {
  89  i :      VLOG(1) << "Getting LFHEntry SubSegmentCode field failed.";
  90  i :      return false;
  91    :    }
  92  E :    uint64_t entry_subseg = 0;
  93  E :    if (!subseg_field.GetUnsignedValue(&entry_subseg)) {
  94  E :      VLOG(1) << "Getting LFHEntry SubSegmentCode value failed.";
  95  E :      return false;
  96    :    }
  97    :    // Back out he XORed address of the entry itself.
  98  E :    *decoded_subseg = entry_subseg ^ (lfh_heap_entry.addr() >> 3);
  99    :  
 100  E :    return true;
 101  E :  }
 102    :  
 103    :  bool LFHEntryDetector::ScanForEntryMatch(const AddressRange& range,
 104    :                                           SubsegmentSet* used_subsegments,
 105  E :                                           LFHEntryRun* found_run) {
 106  E :    DCHECK(range.IsValid());
 107  E :    DCHECK(found_run);
 108  E :    DCHECK(used_subsegments);
 109  E :    DCHECK(bit_source_);
 110  E :    DCHECK(entry_type_);
 111    :  
 112    :    // Cast the start of the range to a HEAP_ENTRY.
 113  E :    TypedData lfh_heap_entry(bit_source_, entry_type_, range.start());
 114  E :    uint64_t subseg = 0;
 115  E :    if (!GetDecodedLFHEntrySubsegment(lfh_heap_entry, &subseg)) {
 116  E :      VLOG(1) << "Failed to get subsegment from base entry.";
 117  E :      return false;
 118    :    }
 119    :  
 120    :    // See whether we've already discovered this subsegment.
 121  E :    if (used_subsegments->find(subseg) != used_subsegments->end())
 122  E :      return false;
 123    :  
 124    :    // Validate the entry to the extent possible at this point.
 125  E :    TypedData extended_block_signature_field;
 126    :    if (!lfh_heap_entry.GetNamedField(L"ExtendedBlockSignature",
 127  E :                                      &extended_block_signature_field)) {
 128  i :      LOG(ERROR) << "No ExtendedBlockSignature field in entry.";
 129  i :      return false;
 130    :    }
 131    :  
 132  E :    uint64_t extended_block_signature = 0;
 133    :    if (!extended_block_signature_field.GetUnsignedValue(
 134  E :            &extended_block_signature)) {
 135  i :      VLOG(1) << "Failed to get extended_block_signature from base entry.";
 136  i :      return false;
 137    :    }
 138    :  
 139    :    // Check that the LFH flag is set on the entry.
 140  E :    const uint16_t kLFHBlockFlag = 0x80;
 141  E :    if ((extended_block_signature & kLFHBlockFlag) == 0)
 142  E :      return false;
 143    :  
 144    :    // Check that the rest of the entry is sane. Free blocks have the remaining
 145    :    // bits set, whereas used blocks use the remaining bits to encode the number
 146    :    // of unused bytes in the block, plus 8.
 147  E :    const uint16_t kLFHUnusedBytesMask = 0x7F;
 148    :    if ((extended_block_signature & kLFHUnusedBytesMask) != 0 &&
 149  E :        (extended_block_signature & kLFHUnusedBytesMask) < 8) {
 150  E :      return false;
 151    :    }
 152    :  
 153    :    // Now that the entry has passed initial validation, record that we're
 154    :    // processing this subsegment value.
 155  E :    used_subsegments->insert(subseg);
 156    :  
 157    :    // The distance histogram is used to pick an entry size by simple majority
 158    :    // vote. This yields some resilience to corruption and false positive
 159    :    // matches.
 160    :    using DistanceHistogram = base::hash_map<size_t, size_t>;
 161  E :    DistanceHistogram distances;
 162  E :    Address last_match = range.start();
 163    :  
 164    :    // Bound the search to the size of the range we're given.
 165  E :    size_t end = range.size() / lfh_heap_entry.type()->size();
 166  E :    for (size_t i = 2; i < end; ++i) {
 167    :      // Walk forward to the next candidate.
 168  E :      TypedData candidate;
 169  E :      if (!lfh_heap_entry.OffsetAndCast(i, lfh_heap_entry.type(), &candidate))
 170  i :        break;
 171    :  
 172  E :      uint64_t candidate_subseg = 0;
 173  E :      if (!GetDecodedLFHEntrySubsegment(candidate, &candidate_subseg))
 174  i :        break;
 175    :  
 176    :      // TODO(siggi): It may make sense to validate the entries to cut down on
 177    :      //     false positives.
 178  E :      if (subseg == candidate_subseg) {
 179  E :        const size_t kDistance = candidate.addr() - last_match;
 180  E :        last_match = candidate.addr();
 181    :  
 182    :        // Record the distance from the last match.
 183  E :        ++distances[kDistance];
 184    :      }
 185  E :    }
 186    :  
 187  E :    if (distances.size() == 0)
 188  E :      return false;
 189    :  
 190  E :    size_t voted_size = 0;
 191  E :    size_t voted_count = 0;
 192  E :    size_t num_votes = 0;
 193  E :    for (const auto& entry : distances) {
 194  E :      const auto& size = entry.first;
 195  E :      const auto& count = entry.second;
 196    :      // Voting count ties are broken by the lowest size, as corruption in a run
 197    :      // of entries of size D will show as kD.
 198  E :      if (count > voted_count || (count == voted_count && size < voted_size)) {
 199  E :        voted_size = size;
 200  E :        voted_count = count;
 201    :      }
 202  E :      num_votes += count;
 203  E :    }
 204    :  
 205    :    // Record the found run.
 206  E :    found_run->first_entry = range.start();
 207  E :    found_run->last_entry = last_match;
 208  E :    found_run->entry_distance_bytes = voted_size;
 209  E :    found_run->size_votes = voted_count;
 210  E :    found_run->entries_found = num_votes + 1;
 211  E :    found_run->subsegment_code = subseg;
 212    :  
 213  E :    return true;
 214  E :  }
 215    :  
 216    :  }  // namespace refinery

Coverage information generated Thu Jan 14 17:40:38 2016.