1 : // Copyright 2015 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/refinery/analyzers/heap_analyzer.h"
16 :
17 : #include "base/strings/string_util.h"
18 : #include "base/strings/stringprintf.h"
19 : #include "syzygy/refinery/detectors/lfh_entry_detector.h"
20 : #include "syzygy/refinery/process_state/process_state_util.h"
21 :
22 : namespace refinery {
23 :
24 : namespace {
25 :
26 : scoped_refptr<TypeRepository> GetNtdllTypes(ProcessState* process_state,
27 E : SymbolProvider* symbol_provider) {
28 E : ModuleLayerPtr modules;
29 E : if (!process_state->FindLayer(&modules)) {
30 i : LOG(ERROR) << "No modules layer.";
31 i : return nullptr;
32 : }
33 :
34 E : for (const auto& module_sig : modules->data().signatures()) {
35 : if (base::EndsWith(module_sig.path, L"ntdll.dll",
36 E : base::CompareCase::INSENSITIVE_ASCII)) {
37 : pe::PEFile::Signature signature(
38 : module_sig.path, core::AbsoluteAddress(0U), module_sig.module_size,
39 E : module_sig.module_checksum, module_sig.module_time_date_stamp);
40 E : scoped_refptr<TypeRepository> ret;
41 :
42 E : if (symbol_provider->FindOrCreateTypeRepository(signature, &ret))
43 E : return ret;
44 i : }
45 E : }
46 :
47 i : return nullptr;
48 E : }
49 :
50 : bool RecordFoundRun(const LFHEntryDetector::LFHEntryRun& run,
51 : UserDefinedTypePtr entry_type,
52 E : ProcessState* process_state) {
53 E : HeapMetadataLayerPtr meta_layer;
54 E : process_state->FindOrCreateLayer(&meta_layer);
55 E : HeapAllocationLayerPtr alloc_layer;
56 E : process_state->FindOrCreateLayer(&alloc_layer);
57 :
58 E : for (Address entry_address = run.first_entry; entry_address <= run.last_entry;
59 E : entry_address += run.entry_distance_bytes) {
60 : // Check the state of the entry for the metadata and to record the state
61 : // and size of the allocation.
62 E : TypedData entry(process_state, entry_type, entry_address);
63 E : TypedData extended_block_signature_field;
64 : if (!entry.GetNamedField(L"ExtendedBlockSignature",
65 E : &extended_block_signature_field)) {
66 : // If the field is missing from the type, that's an error.
67 i : return false;
68 : }
69 :
70 E : const uint16_t kLFHBlockFlag = 0x80;
71 E : uint64_t extended_block_signature = 0;
72 E : DCHECK_LT(entry_type->size(), run.entry_distance_bytes);
73 E : size_t allocation_size = run.entry_distance_bytes - entry_type->size();
74 E : bool entry_is_corrupt = false;
75 E : uint64_t decoded_subsegment = 0;
76 : if (!LFHEntryDetector::GetDecodedLFHEntrySubsegment(entry,
77 E : &decoded_subsegment)) {
78 : // This really shouldn't happen.
79 i : NOTREACHED() << "Unable to get decoded LFH subsegment.";
80 i : return false;
81 : }
82 E : if (decoded_subsegment != run.subsegment_code) {
83 : // If the subsegment code doesn't match, the entry is corrupt.
84 E : entry_is_corrupt = true;
85 : }
86 :
87 E : bool alloc_is_free = true;
88 : if (!extended_block_signature_field.GetUnsignedValue(
89 : &extended_block_signature) ||
90 E : (extended_block_signature & kLFHBlockFlag) == 0) {
91 : // If we can't retrieve the value, or the high bit is clear, we assume
92 : // a corrupt entry.
93 E : entry_is_corrupt = true;
94 E : DCHECK_EQ(true, alloc_is_free);
95 E : } else {
96 : // Mask off the flag bit, the remainder should be unused bytes + 8 or
97 : // zero - zero marking an unused (free) block.
98 E : extended_block_signature &= ~kLFHBlockFlag;
99 E : if (extended_block_signature == 0) {
100 : // It's a free block, no header corruption that we can determine.
101 E : DCHECK_EQ(true, alloc_is_free);
102 E : } else if (extended_block_signature < 8) {
103 : // The header is corrupt.
104 i : entry_is_corrupt = true;
105 i : } else {
106 E : size_t unused_bytes = extended_block_signature - 8;
107 E : if (unused_bytes >= allocation_size) {
108 : // This is un-possible, must be corruption.
109 E : entry_is_corrupt = true;
110 E : DCHECK_EQ(true, alloc_is_free);
111 E : } else {
112 : // Unused bytes is reasonable, record this as a used block.
113 E : allocation_size -= unused_bytes;
114 E : alloc_is_free = false;
115 : }
116 : }
117 : }
118 :
119 : // Create the record for the entry's metadata.
120 E : AddressRange entry_range(entry_address, entry_type->size());
121 E : HeapMetadataRecordPtr meta_record;
122 E : meta_layer->CreateRecord(entry_range, &meta_record);
123 E : HeapMetadata* meta_data = meta_record->mutable_data();
124 E : meta_data->set_corrupt(entry_is_corrupt);
125 :
126 : // Record the allocation itself.
127 E : AddressRange alloc_range(entry_range.end(), allocation_size);
128 E : HeapAllocationRecordPtr alloc_record;
129 E : alloc_layer->CreateRecord(alloc_range, &alloc_record);
130 E : HeapAllocation* allocation = alloc_record->mutable_data();
131 E : allocation->set_is_free(alloc_is_free);
132 E : }
133 :
134 E : return true;
135 E : }
136 :
137 : bool RecordFoundRuns(const LFHEntryDetector::LFHEntryRuns& found_runs,
138 : UserDefinedTypePtr entry_type,
139 E : ProcessState* process_state) {
140 E : DCHECK_NE(0U, found_runs.size());
141 :
142 E : for (const auto& run : found_runs) {
143 : // For now, simply record all runs of three or more entries. A run of two
144 : // likely means that we've scored on the birthday paradox.
145 : // TODO(siggi): Improve on this.
146 : // One possibility is to build the max likelyhood view, where something to
147 : // watch out for is the elimination of strong findings that are extended at
148 : // either end by a false positive match. Adding LFH userdata header
149 : // detection
150 : // into the mix will add another degree of matching to this.
151 : if (run.entries_found > 2 &&
152 E : !RecordFoundRun(run, entry_type, process_state))
153 i : return false;
154 E : }
155 :
156 E : return true;
157 E : }
158 :
159 : } // namespace
160 :
161 : // static
162 : const char HeapAnalyzer::kHeapAnalyzerName[] = "HeapAnalyzer";
163 :
164 : HeapAnalyzer::HeapAnalyzer(scoped_refptr<SymbolProvider> symbol_provider)
165 E : : symbol_provider_(symbol_provider) {
166 E : }
167 :
168 : Analyzer::AnalysisResult HeapAnalyzer::Analyze(
169 : const minidump::Minidump& minidump,
170 E : ProcessState* process_state) {
171 : // TODO(siggi): At present this won't work for XP, figure out how to reject
172 : // XP dumps?
173 : // Start by finding the NTDLL module record and symbols, as that's where we
174 : // come by the symbols that describe the heap.
175 : scoped_refptr<TypeRepository> ntdll_repo =
176 E : GetNtdllTypes(process_state, symbol_provider_.get());
177 E : if (!ntdll_repo) {
178 i : LOG(ERROR) << "Couldn't get types for NTDLL.";
179 i : return ANALYSIS_ERROR;
180 : }
181 :
182 E : LFHEntryDetector detector;
183 E : if (!detector.Init(ntdll_repo.get(), process_state)) {
184 i : LOG(ERROR) << "Failed to initialize LFH detector.";
185 i : return ANALYSIS_ERROR;
186 : }
187 :
188 E : BytesLayerPtr bytes_layer;
189 E : if (!process_state->FindLayer(&bytes_layer)) {
190 i : LOG(ERROR) << "Failed to find bytes layer.";
191 i : return ANALYSIS_ERROR;
192 : }
193 :
194 : // Perform detection on the records from the bytes layer.
195 E : for (const auto& record : *bytes_layer) {
196 : // TODO(siggi): Skip stacks, and perhaps modules here.
197 E : LFHEntryDetector::LFHEntryRuns found_runs;
198 E : if (!detector.Detect(record->range(), &found_runs)) {
199 i : LOG(ERROR) << "Detection failed.";
200 i : return ANALYSIS_ERROR;
201 : }
202 :
203 E : if (found_runs.size()) {
204 E : if (!RecordFoundRuns(found_runs, detector.entry_type(), process_state)) {
205 i : LOG(ERROR) << "Failed to record found runs.";
206 : // TODO(siggi): Is this the right thing to do?
207 i : return ANALYSIS_ERROR;
208 : }
209 : }
210 E : }
211 :
212 E : return ANALYSIS_COMPLETE;
213 E : }
214 :
215 : } // namespace refinery
|