1 : // Copyright 2015 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/refinery/detectors/lfh_entry_detector.h"
16 :
17 : #include "base/logging.h"
18 : #include "base/containers/hash_tables.h"
19 : #include "syzygy/common/align.h"
20 : #include "syzygy/refinery/types/typed_data.h"
21 :
22 : namespace refinery {
23 :
24 E : LFHEntryDetector::LFHEntryDetector() : bit_source_(nullptr) {
25 E : }
26 :
27 E : bool LFHEntryDetector::Init(TypeRepository* repo, BitSource* bit_source) {
28 E : DCHECK(repo);
29 E : DCHECK(bit_source);
30 E : DCHECK(bit_source_ == nullptr);
31 :
32 E : for (auto type : *repo) {
33 E : if (type->name() == L"_HEAP_ENTRY") {
34 E : if (!type->CastTo(&entry_type_))
35 i : return false;
36 E : break;
37 : }
38 E : }
39 :
40 E : if (!entry_type_)
41 E : return false;
42 :
43 E : bit_source_ = bit_source;
44 :
45 E : return true;
46 E : }
47 :
48 : bool LFHEntryDetector::Detect(const AddressRange& range,
49 E : LFHEntryRuns* found_runs) {
50 E : DCHECK(range.IsValid());
51 E : DCHECK(found_runs);
52 E : DCHECK(bit_source_);
53 E : DCHECK(entry_type_);
54 :
55 E : found_runs->clear();
56 :
57 : // This will be 8 or 16 depending on bitness.
58 : // TODO(siggi): Fix this code for 64 bit.
59 E : const size_t kEntrySize = entry_type_->size();
60 E : const Address start = common::AlignUp(range.start(), kEntrySize);
61 : const Address end =
62 E : common::AlignDown(range.end() - entry_type_->size(), kEntrySize);
63 E : DCHECK_EQ(0, (end - start) % kEntrySize);
64 :
65 : // TODO(siggi): This is ~O(N^2) and so is wasteful for large ranges.
66 : // A better approach might be to process the entire range, count up all
67 : // the subsegment codes that occur, with the first occurrence of each.
68 : // This will then allow processing the range in closer to O(N), as a
69 : // search will only be done where a code occurs more than once, and then
70 : // from the first occurrence of that code.
71 E : SubsegmentSet used_subsegments;
72 E : for (Address curr = start; curr < end; curr += kEntrySize) {
73 : LFHEntryRun found_run;
74 :
75 : if (ScanForEntryMatch(AddressRange(curr, end - curr), &used_subsegments,
76 E : &found_run)) {
77 E : found_runs->push_back(found_run);
78 : }
79 E : }
80 :
81 E : return true;
82 E : }
83 :
84 : bool LFHEntryDetector::GetDecodedLFHEntrySubsegment(
85 : const TypedData& lfh_heap_entry,
86 E : uint64_t* decoded_subseg) {
87 E : TypedData subseg_field;
88 E : if (!lfh_heap_entry.GetNamedField(L"SubSegmentCode", &subseg_field)) {
89 i : VLOG(1) << "Getting LFHEntry SubSegmentCode field failed.";
90 i : return false;
91 : }
92 E : uint64_t entry_subseg = 0;
93 E : if (!subseg_field.GetUnsignedValue(&entry_subseg)) {
94 E : VLOG(1) << "Getting LFHEntry SubSegmentCode value failed.";
95 E : return false;
96 : }
97 : // Back out he XORed address of the entry itself.
98 E : *decoded_subseg = entry_subseg ^ (lfh_heap_entry.addr() >> 3);
99 :
100 E : return true;
101 E : }
102 :
103 : bool LFHEntryDetector::ScanForEntryMatch(const AddressRange& range,
104 : SubsegmentSet* used_subsegments,
105 E : LFHEntryRun* found_run) {
106 E : DCHECK(range.IsValid());
107 E : DCHECK(found_run);
108 E : DCHECK(used_subsegments);
109 E : DCHECK(bit_source_);
110 E : DCHECK(entry_type_);
111 :
112 : // Cast the start of the range to a HEAP_ENTRY.
113 E : TypedData lfh_heap_entry(bit_source_, entry_type_, range.start());
114 E : uint64_t subseg = 0;
115 E : if (!GetDecodedLFHEntrySubsegment(lfh_heap_entry, &subseg)) {
116 E : VLOG(1) << "Failed to get subsegment from base entry.";
117 E : return false;
118 : }
119 :
120 : // See whether we've already discovered this subsegment.
121 E : if (used_subsegments->find(subseg) != used_subsegments->end())
122 E : return false;
123 :
124 : // Validate the entry to the extent possible at this point.
125 E : TypedData extended_block_signature_field;
126 : if (!lfh_heap_entry.GetNamedField(L"ExtendedBlockSignature",
127 E : &extended_block_signature_field)) {
128 i : LOG(ERROR) << "No ExtendedBlockSignature field in entry.";
129 i : return false;
130 : }
131 :
132 E : uint64_t extended_block_signature = 0;
133 : if (!extended_block_signature_field.GetUnsignedValue(
134 E : &extended_block_signature)) {
135 i : VLOG(1) << "Failed to get extended_block_signature from base entry.";
136 i : return false;
137 : }
138 :
139 : // Check that the LFH flag is set on the entry.
140 E : const uint16_t kLFHBlockFlag = 0x80;
141 E : if ((extended_block_signature & kLFHBlockFlag) == 0)
142 E : return false;
143 :
144 : // Check that the rest of the entry is sane. Free blocks have the remaining
145 : // bits set, whereas used blocks use the remaining bits to encode the number
146 : // of unused bytes in the block, plus 8.
147 E : const uint16_t kLFHUnusedBytesMask = 0x7F;
148 : if ((extended_block_signature & kLFHUnusedBytesMask) != 0 &&
149 E : (extended_block_signature & kLFHUnusedBytesMask) < 8) {
150 E : return false;
151 : }
152 :
153 : // Now that the entry has passed initial validation, record that we're
154 : // processing this subsegment value.
155 E : used_subsegments->insert(subseg);
156 :
157 : // The distance histogram is used to pick an entry size by simple majority
158 : // vote. This yields some resilience to corruption and false positive
159 : // matches.
160 : using DistanceHistogram = base::hash_map<size_t, size_t>;
161 E : DistanceHistogram distances;
162 E : Address last_match = range.start();
163 :
164 : // Bound the search to the size of the range we're given.
165 E : size_t end = range.size() / lfh_heap_entry.type()->size();
166 E : for (size_t i = 2; i < end; ++i) {
167 : // Walk forward to the next candidate.
168 E : TypedData candidate;
169 E : if (!lfh_heap_entry.OffsetAndCast(i, lfh_heap_entry.type(), &candidate))
170 i : break;
171 :
172 E : uint64_t candidate_subseg = 0;
173 E : if (!GetDecodedLFHEntrySubsegment(candidate, &candidate_subseg))
174 i : break;
175 :
176 : // TODO(siggi): It may make sense to validate the entries to cut down on
177 : // false positives.
178 E : if (subseg == candidate_subseg) {
179 E : const size_t kDistance = candidate.addr() - last_match;
180 E : last_match = candidate.addr();
181 :
182 : // Record the distance from the last match.
183 E : ++distances[kDistance];
184 : }
185 E : }
186 :
187 E : if (distances.size() == 0)
188 E : return false;
189 :
190 E : size_t voted_size = 0;
191 E : size_t voted_count = 0;
192 E : size_t num_votes = 0;
193 E : for (const auto& entry : distances) {
194 E : const auto& size = entry.first;
195 E : const auto& count = entry.second;
196 : // Voting count ties are broken by the lowest size, as corruption in a run
197 : // of entries of size D will show as kD.
198 E : if (count > voted_count || (count == voted_count && size < voted_size)) {
199 E : voted_size = size;
200 E : voted_count = count;
201 : }
202 E : num_votes += count;
203 E : }
204 :
205 : // Record the found run.
206 E : found_run->first_entry = range.start();
207 E : found_run->last_entry = last_match;
208 E : found_run->entry_distance_bytes = voted_size;
209 E : found_run->size_votes = voted_count;
210 E : found_run->entries_found = num_votes + 1;
211 E : found_run->subsegment_code = subseg;
212 :
213 E : return true;
214 E : }
215 :
216 : } // namespace refinery
|