1 : // Copyright 2015 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #ifndef SYZYGY_REFINERY_DETECTORS_LFH_ENTRY_DETECTOR_H_
16 : #define SYZYGY_REFINERY_DETECTORS_LFH_ENTRY_DETECTOR_H_
17 :
18 : #include <set>
19 : #include <vector>
20 :
21 : #include "base/macros.h"
22 :
23 : #include "syzygy/refinery/types/type.h"
24 : #include "syzygy/refinery/types/type_repository.h"
25 : #include "syzygy/refinery/types/typed_data.h"
26 :
27 m : namespace refinery {
28 :
29 : // This class attempts to decode Windows low fragmentation heap (LFH) entries.
30 : // See also LFHBinDetector for more context.
31 : //
32 : // This is done by heuristically searching for a run of equidistant heap
33 : // entries (HEs).
34 : // In an LFH user bin, each entry encodes a pointer to its associated
35 : // Heap SubSegment (HSS). These pointers are obfuscated by XORing them with a
36 : // mask comprised of the HE address (shifted down three), the heap handle
37 : // (which is a pointer to the HEAP structure), and the per-process LFHKey.
38 : //
39 : // When an entry is XORed with (HE>>3), it should yield LFHKey ^ HEAP ^ HSS.
40 : // While the value of this is unknown, all HEs in the same bin should yield
41 : // the same value. So the search conceptually picks a starting point (modulo
42 : // 8 or 16, depending on bitness) and a stride (multiple of 8 or 16 depending
43 : // on bitness), then tries to find matches along the stride.
44 : // The entry distance in a found run is picked by simple majority vote of the
45 : // distances between the heap entries found, which gives the method a little
46 : // bit of resilience to corrupt intermediate entries. A single - or a run - of
47 : // corrupt entries in a run of otherwise valid entries, with distance D will
48 : // manifest as a single vote of k*D against multiple votes for D.
49 : //
50 : // Note that a detection can result in false positives if the contents of
51 : // memory are just so. Because of the way heap entries are obfuscated, this is
52 : // fairly unlikely however.
53 m : class LFHEntryDetector {
54 m : public:
55 : // Details on a discovered run of LFH heap entries. Note that a run of
56 : // entries may not be contiguous, as the discovery heuristic has a bit of
57 : // resilience to corrupted entries in a run.
58 m : struct LFHEntryRun {
59 : // The address of the first and last heap entry in a discovered run of
60 : // heap entries.
61 m : Address first_entry;
62 m : Address last_entry;
63 :
64 : // The distance between discovered entries in a run.
65 m : size_t entry_distance_bytes;
66 :
67 : // These reflect the strength of the finding, by reporting the number of
68 : // entry pairs that matched this size, against the total number of entries
69 : // found. If size_votes == entries_found - 1, then all entries found were
70 : // equidistant.
71 m : size_t size_votes;
72 m : size_t entries_found;
73 :
74 : // The subsegment code for the voted size.
75 m : uint64_t subsegment_code;
76 m : };
77 m : using LFHEntryRuns = std::vector<LFHEntryRun>;
78 :
79 m : LFHEntryDetector();
80 :
81 : // Initialize the detector with @p repo, which needs to contain types
82 : // associated with the heap used in the process to analyze.
83 : // @param repo the type repo containing heap types.
84 : // @param bit_source a bit source for the ranges we'll detect.
85 : // @returns true on success, when all necessary types are found.
86 m : bool Init(TypeRepository* repo, BitSource* bit_source);
87 :
88 : // Inspects @p range for LFH entry runs, and returns findings in
89 : // @p found_runs.
90 : // @param range the range to run detcation agains.
91 : // @param found_runs returns the heap entry runs detected.
92 : // @returns true on success.
93 : // @note A success return doesn't imply that one or more entry runs were
94 : // found - check the size of @p found_runs to see whether runs were
95 : // found.
96 m : bool Detect(const AddressRange& range, LFHEntryRuns* found_runs);
97 :
98 : // Convenience decoding function.
99 m : static bool GetDecodedLFHEntrySubsegment(const TypedData& lfh_heap_entry,
100 m : uint64_t* decoded_subseg);
101 :
102 : // Accessor.
103 m : UserDefinedTypePtr entry_type() const { return entry_type_; }
104 :
105 m : private:
106 m : typedef std::set<uint64_t> SubsegmentSet;
107 :
108 : // Scans forward through @p range for a run of entries starting at
109 : // @p range.start().
110 : // @param range the address range to search.
111 : // @param used_subsegments a set of subsegment codes already discovered.
112 : // @param found_run contains the most recently discovered run on success.
113 : // @returns true if a run of two or more entries with a new subsegment
114 : // code is found.
115 m : bool ScanForEntryMatch(const AddressRange& range,
116 m : SubsegmentSet* used_subsegments,
117 m : LFHEntryRun* found_run);
118 :
119 : // Valid from Init().
120 m : BitSource* bit_source_;
121 m : UserDefinedTypePtr entry_type_;
122 :
123 m : DISALLOW_COPY_AND_ASSIGN(LFHEntryDetector);
124 m : };
125 :
126 m : } // namespace refinery
127 :
128 : #endif // SYZYGY_REFINERY_DETECTORS_LFH_ENTRY_DETECTOR_H_
|