1 : // Copyright 2015 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/refinery/analyzers/heap_analyzer.h"
16 :
17 : #include "base/strings/string_util.h"
18 : #include "base/strings/stringprintf.h"
19 : #include "syzygy/refinery/detectors/lfh_entry_detector.h"
20 : #include "syzygy/refinery/process_state/process_state_util.h"
21 :
22 m : namespace refinery {
23 :
24 m : namespace {
25 :
26 m : scoped_refptr<TypeRepository> GetNtdllTypes(ProcessState* process_state,
27 m : SymbolProvider* symbol_provider) {
28 m : ModuleLayerPtr modules;
29 m : if (!process_state->FindLayer(&modules)) {
30 m : LOG(ERROR) << "No modules layer.";
31 m : return nullptr;
32 m : }
33 :
34 m : for (const auto& module_sig : modules->data().signatures()) {
35 m : if (base::EndsWith(module_sig.path, L"ntdll.dll",
36 m : base::CompareCase::INSENSITIVE_ASCII)) {
37 m : pe::PEFile::Signature signature(
38 m : module_sig.path, core::AbsoluteAddress(0U), module_sig.module_size,
39 m : module_sig.module_checksum, module_sig.module_time_date_stamp);
40 m : scoped_refptr<TypeRepository> ret;
41 :
42 m : if (symbol_provider->FindOrCreateTypeRepository(signature, &ret))
43 m : return ret;
44 m : }
45 m : }
46 :
47 m : return nullptr;
48 m : }
49 :
50 m : bool RecordFoundRun(const LFHEntryDetector::LFHEntryRun& run,
51 m : UserDefinedTypePtr entry_type,
52 m : ProcessState* process_state) {
53 m : HeapMetadataLayerPtr meta_layer;
54 m : process_state->FindOrCreateLayer(&meta_layer);
55 m : HeapAllocationLayerPtr alloc_layer;
56 m : process_state->FindOrCreateLayer(&alloc_layer);
57 :
58 m : for (Address entry_address = run.first_entry; entry_address <= run.last_entry;
59 m : entry_address += run.entry_distance_bytes) {
60 : // Check the state of the entry for the metadata and to record the state
61 : // and size of the allocation.
62 m : TypedData entry(process_state, entry_type, entry_address);
63 m : TypedData extended_block_signature_field;
64 m : if (!entry.GetNamedField(L"ExtendedBlockSignature",
65 m : &extended_block_signature_field)) {
66 : // If the field is missing from the type, that's an error.
67 m : return false;
68 m : }
69 :
70 m : const uint16_t kLFHBlockFlag = 0x80;
71 m : uint64_t extended_block_signature = 0;
72 m : DCHECK_LT(entry_type->size(), run.entry_distance_bytes);
73 m : size_t allocation_size = run.entry_distance_bytes - entry_type->size();
74 m : bool entry_is_corrupt = false;
75 m : uint64_t decoded_subsegment = 0;
76 m : if (!LFHEntryDetector::GetDecodedLFHEntrySubsegment(entry,
77 m : &decoded_subsegment)) {
78 : // This really shouldn't happen.
79 m : NOTREACHED() << "Unable to get decoded LFH subsegment.";
80 m : return false;
81 m : }
82 m : if (decoded_subsegment != run.subsegment_code) {
83 : // If the subsegment code doesn't match, the entry is corrupt.
84 m : entry_is_corrupt = true;
85 m : }
86 :
87 m : bool alloc_is_free = true;
88 m : if (!extended_block_signature_field.GetUnsignedValue(
89 m : &extended_block_signature) ||
90 m : (extended_block_signature & kLFHBlockFlag) == 0) {
91 : // If we can't retrieve the value, or the high bit is clear, we assume
92 : // a corrupt entry.
93 m : entry_is_corrupt = true;
94 m : DCHECK_EQ(true, alloc_is_free);
95 m : } else {
96 : // Mask off the flag bit, the remainder should be unused bytes + 8 or
97 : // zero - zero marking an unused (free) block.
98 m : extended_block_signature &= ~kLFHBlockFlag;
99 m : if (extended_block_signature == 0) {
100 : // It's a free block, no header corruption that we can determine.
101 m : DCHECK_EQ(true, alloc_is_free);
102 m : } else if (extended_block_signature < 8) {
103 : // The header is corrupt.
104 m : entry_is_corrupt = true;
105 m : } else {
106 m : size_t unused_bytes = extended_block_signature - 8;
107 m : if (unused_bytes >= allocation_size) {
108 : // This is un-possible, must be corruption.
109 m : entry_is_corrupt = true;
110 m : DCHECK_EQ(true, alloc_is_free);
111 m : } else {
112 : // Unused bytes is reasonable, record this as a used block.
113 m : allocation_size -= unused_bytes;
114 m : alloc_is_free = false;
115 m : }
116 m : }
117 m : }
118 :
119 : // Create the record for the entry's metadata.
120 m : AddressRange entry_range(entry_address, entry_type->size());
121 m : HeapMetadataRecordPtr meta_record;
122 m : meta_layer->CreateRecord(entry_range, &meta_record);
123 m : HeapMetadata* meta_data = meta_record->mutable_data();
124 m : meta_data->set_corrupt(entry_is_corrupt);
125 :
126 : // Record the allocation itself.
127 m : AddressRange alloc_range(entry_range.end(), allocation_size);
128 m : HeapAllocationRecordPtr alloc_record;
129 m : alloc_layer->CreateRecord(alloc_range, &alloc_record);
130 m : HeapAllocation* allocation = alloc_record->mutable_data();
131 m : allocation->set_is_free(alloc_is_free);
132 m : }
133 :
134 m : return true;
135 m : }
136 :
137 m : bool RecordFoundRuns(const LFHEntryDetector::LFHEntryRuns& found_runs,
138 m : UserDefinedTypePtr entry_type,
139 m : ProcessState* process_state) {
140 m : DCHECK_NE(0U, found_runs.size());
141 :
142 m : for (const auto& run : found_runs) {
143 : // For now, simply record all runs of three or more entries. A run of two
144 : // likely means that we've scored on the birthday paradox.
145 : // TODO(siggi): Improve on this.
146 : // One possibility is to build the max likelyhood view, where something to
147 : // watch out for is the elimination of strong findings that are extended at
148 : // either end by a false positive match. Adding LFH userdata header
149 : // detection
150 : // into the mix will add another degree of matching to this.
151 m : if (run.entries_found > 2 &&
152 m : !RecordFoundRun(run, entry_type, process_state))
153 m : return false;
154 m : }
155 :
156 m : return true;
157 m : }
158 :
159 m : } // namespace
160 :
161 : // static
162 m : const char HeapAnalyzer::kHeapAnalyzerName[] = "HeapAnalyzer";
163 :
164 m : HeapAnalyzer::HeapAnalyzer() {
165 m : }
166 :
167 m : Analyzer::AnalysisResult HeapAnalyzer::Analyze(
168 m : const minidump::Minidump& minidump,
169 m : const ProcessAnalysis& process_analysis) {
170 m : DCHECK(process_analysis.process_state() != nullptr);
171 m : DCHECK(process_analysis.symbol_provider() != nullptr);
172 :
173 : // TODO(siggi): At present this won't work for XP, figure out how to reject
174 : // XP dumps?
175 : // Start by finding the NTDLL module record and symbols, as that's where we
176 : // come by the symbols that describe the heap.
177 m : scoped_refptr<TypeRepository> ntdll_repo =
178 m : GetNtdllTypes(process_analysis.process_state(),
179 m : process_analysis.symbol_provider().get());
180 m : if (!ntdll_repo) {
181 m : LOG(ERROR) << "Couldn't get types for NTDLL.";
182 m : return ANALYSIS_ERROR;
183 m : }
184 :
185 m : LFHEntryDetector detector;
186 m : if (!detector.Init(ntdll_repo.get(), process_analysis.process_state())) {
187 m : LOG(ERROR) << "Failed to initialize LFH detector.";
188 m : return ANALYSIS_ERROR;
189 m : }
190 :
191 m : BytesLayerPtr bytes_layer;
192 m : if (!process_analysis.process_state()->FindLayer(&bytes_layer)) {
193 m : LOG(ERROR) << "Failed to find bytes layer.";
194 m : return ANALYSIS_ERROR;
195 m : }
196 :
197 : // Perform detection on the records from the bytes layer.
198 m : for (const auto& record : *bytes_layer) {
199 : // TODO(siggi): Skip stacks, and perhaps modules here.
200 m : LFHEntryDetector::LFHEntryRuns found_runs;
201 m : if (!detector.Detect(record->range(), &found_runs)) {
202 m : LOG(ERROR) << "Detection failed.";
203 m : return ANALYSIS_ERROR;
204 m : }
205 :
206 m : if (found_runs.size()) {
207 m : if (!RecordFoundRuns(found_runs, detector.entry_type(),
208 m : process_analysis.process_state())) {
209 m : LOG(ERROR) << "Failed to record found runs.";
210 : // TODO(siggi): Is this the right thing to do?
211 m : return ANALYSIS_ERROR;
212 m : }
213 m : }
214 m : }
215 :
216 m : return ANALYSIS_COMPLETE;
217 m : }
218 :
219 m : } // namespace refinery
|