1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/block_util.h"
16 :
17 : #include "syzygy/block_graph/block_util.h"
18 : #include "syzygy/core/address.h"
19 :
20 : namespace pe {
21 :
22 : namespace {
23 :
24 : using block_graph::BlockGraph;
25 :
26 : const size_t kPointerSize = sizeof(core::AbsoluteAddress);
27 :
28 : // Returns true if there is a data label at the given offset,
29 : // false otherwise.
30 : bool HasDataLabel(const BlockGraph::Block* block,
31 E : BlockGraph::Offset offset) {
32 : BlockGraph::Block::LabelMap::const_iterator label_it =
33 E : block->labels().find(offset);
34 E : if (label_it == block->labels().end())
35 E : return false;
36 E : if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL))
37 i : return false;
38 E : return true;
39 E : }
40 :
41 : bool IsValidSelfReferenceCodeToCode(
42 : const BlockGraph::Block* block,
43 E : const BlockGraph::Reference& ref) {
44 : // These references must be direct. They may be 1- or 4-byte PC-relative refs,
45 : // or 4-byte absolute refs.
46 E : if (!ref.IsDirect())
47 E : return false;
48 :
49 E : switch (ref.type()) {
50 : case BlockGraph::PC_RELATIVE_REF: {
51 E : if (ref.size() != 1 && ref.size() != kPointerSize)
52 i : return false;
53 E : break;
54 : }
55 :
56 : case BlockGraph::ABSOLUTE_REF: {
57 E : if (ref.size() != kPointerSize)
58 i : return false;
59 E : break;
60 : }
61 :
62 : default: {
63 E : return false;
64 : }
65 : }
66 :
67 E : return true;
68 E : }
69 :
70 : bool IsValidSelfReferenceCodeToData(
71 : const BlockGraph::Block* block,
72 E : const BlockGraph::Reference& ref) {
73 : // Must be direct 4-byte absolute references to a data label.
74 : if (ref.type() != BlockGraph::ABSOLUTE_REF ||
75 : ref.size() != kPointerSize ||
76 : !ref.IsDirect() ||
77 E : !HasDataLabel(block, ref.offset())) {
78 E : return false;
79 : }
80 E : return true;
81 E : }
82 :
83 : bool IsValidSelfReferenceDataToCode(
84 : const BlockGraph::Block* block,
85 E : const BlockGraph::Reference& ref) {
86 : // Must be 4-byte direct absolute references.
87 : if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
88 E : !ref.IsDirect()) {
89 E : return false;
90 : }
91 E : return true;
92 E : }
93 :
94 : bool IsValidSelfReferenceDataToData(
95 : const BlockGraph::Block* block,
96 E : const BlockGraph::Reference& ref) {
97 : // Must be 4-byte direct absolute references. We see this in 'meta' case
98 : // tables, where there will be one case table that is used to select among a
99 : // handful of case tables, and then the selected case table will be used for
100 : // the second round of logic. This happens a lot in very complex conditional
101 : // code like that generated by gtest and gmock.
102 : if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
103 E : !ref.IsDirect()) {
104 E : return false;
105 : }
106 E : return true;
107 E : }
108 :
109 : bool IsValidExternalReferenceCodeBlockToCode(
110 : const BlockGraph::Block* block,
111 E : const BlockGraph::Reference& ref) {
112 : // Must be direct 4-byte absolute or pc-rel reference to offset 0.
113 : if (ref.type() != BlockGraph::ABSOLUTE_REF &&
114 E : ref.type() != BlockGraph::PC_RELATIVE_REF) {
115 E : return false;
116 : }
117 E : if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
118 E : return false;
119 E : return true;
120 E : }
121 :
122 : bool IsValidExternalReferenceDataBlockToCode(
123 : const BlockGraph::Block* block,
124 E : const BlockGraph::Reference& ref) {
125 : // Must be direct 4-byte absolute or relative (PE structures) pointer to
126 : // offset 0.
127 : if (ref.type() != BlockGraph::ABSOLUTE_REF &&
128 E : ref.type() != BlockGraph::RELATIVE_REF) {
129 E : return false;
130 : }
131 E : if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
132 E : return false;
133 E : return true;
134 E : }
135 :
136 : } // namespace
137 :
138 E : bool CodeBlockReferencesAreClConsistent(const BlockGraph::Block* block) {
139 E : DCHECK(block != NULL);
140 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
141 :
142 : // Iterate over the outgoing references from this block.
143 : BlockGraph::Block::ReferenceMap::const_iterator ref_it =
144 E : block->references().begin();
145 E : for (; ref_it != block->references().end(); ++ref_it) {
146 E : switch (ref_it->second.referenced()->type()) {
147 : // References to data are always safe.
148 : case BlockGraph::DATA_BLOCK: {
149 E : break;
150 : }
151 :
152 : // References to code blocks must be direct.
153 : case BlockGraph::CODE_BLOCK: {
154 E : if (!ref_it->second.IsDirect())
155 E : return false;
156 E : break;
157 : }
158 :
159 : default: {
160 : // References to any other type of block are considered unsafe by
161 : // default. Really, this should never happen.
162 i : NOTREACHED() << "Unexpected block type.";
163 : }
164 : }
165 E : }
166 :
167 E : return true;
168 E : }
169 :
170 E : bool CodeBlockReferrersAreClConsistent(const BlockGraph::Block* block) {
171 E : DCHECK(block != NULL);
172 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
173 :
174 : // Code blocks generated by CL.EXE tend to be cleanly split in two, with
175 : // code first and local data (jump and case tables) second. We expect all of
176 : // the data labels to be referenced.
177 E : std::set<BlockGraph::Offset> data_label_offsets;
178 : BlockGraph::Block::LabelMap::const_iterator label_it =
179 E : block->labels().begin();
180 E : for (; label_it != block->labels().end(); ++label_it) {
181 : // Have we already seen at least one data label?
182 E : if (!data_label_offsets.empty()) {
183 : // We only expect to see other data labels thereafter.
184 E : if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL))
185 E : return false;
186 : }
187 :
188 : // Not data? Skip it.
189 E : if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL))
190 E : continue;
191 :
192 : // If we get here it's another data label.
193 E : data_label_offsets.insert(label_it->first);
194 E : }
195 :
196 : // Determine the transition point that divides code from data.
197 E : BlockGraph::Offset start_of_data = block->size();
198 E : if (!data_label_offsets.empty())
199 E : start_of_data = *data_label_offsets.begin();
200 :
201 : // Iterate through the referrers. Since we have to look up back-references
202 : // this is O(n log n).
203 : BlockGraph::Block::ReferrerSet::const_iterator ref_it =
204 E : block->referrers().begin();
205 E : for (; ref_it != block->referrers().end(); ++ref_it) {
206 : // Get the reference associated with this referrer.
207 E : BlockGraph::Reference ref;
208 E : CHECK(ref_it->first->GetReference(ref_it->second, &ref));
209 :
210 E : if (ref_it->first == block) { // Self-reference.
211 E : if (ref_it->second < start_of_data) { // From code
212 E : if (ref.offset() < start_of_data) { // To code.
213 E : if (!IsValidSelfReferenceCodeToCode(block, ref))
214 E : return false;
215 E : } else { // To data.
216 E : if (!IsValidSelfReferenceCodeToData(block, ref))
217 E : return false;
218 : // Mark the data label as having been seen.
219 E : data_label_offsets.erase(ref.offset());
220 : }
221 E : } else { // From data.
222 E : if (ref.offset() < start_of_data) { // To code.
223 E : if (!IsValidSelfReferenceDataToCode(block, ref))
224 E : return false;
225 E : } else { // To data.
226 E : if (!IsValidSelfReferenceDataToData(block, ref))
227 E : return false;
228 : // Mark the data label as having been seen.
229 E : data_label_offsets.erase(ref.offset());
230 : }
231 : }
232 E : } else { // External.
233 E : if (ref_it->first->type() == BlockGraph::CODE_BLOCK) { // From code.
234 E : if (ref.offset() < start_of_data) { // To code.
235 E : if (!IsValidExternalReferenceCodeBlockToCode(block, ref))
236 E : return false;
237 E : } else { // To data.
238 : // No code block should ever have a pointer to data internal to
239 : // a code block.
240 E : return false;
241 : }
242 E : } else { // From data.
243 E : if (ref.offset() < start_of_data) { // To code.
244 E : if (!IsValidExternalReferenceDataBlockToCode(block, ref))
245 E : return false;
246 E : } else { // To data.
247 : // No data block should ever have a pointer to data internal to
248 : // a code block.
249 E : return false;
250 : }
251 : }
252 : }
253 E : }
254 :
255 : // If there are leftover data labels that have not been referenced then we
256 : // are not consistent with CL.EXE compiled code.
257 E : if (!data_label_offsets.empty())
258 E : return false;
259 :
260 E : return true;
261 E : }
262 :
263 : bool CodeBlockIsClConsistent(
264 E : const block_graph::BlockGraph::Block* block) {
265 E : DCHECK(block != NULL);
266 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
267 :
268 E : if (!block_graph::CodeBlockAttributesAreBasicBlockSafe(block))
269 E : return false;
270 E : if (!CodeBlockReferencesAreClConsistent(block))
271 i : return false;
272 E : if (!CodeBlockReferrersAreClConsistent(block))
273 E : return false;
274 :
275 E : return true;
276 E : }
277 :
278 : bool CodeBlockIsBasicBlockDecomposable(
279 E : const block_graph::BlockGraph::Block* block) {
280 E : DCHECK(block != NULL);
281 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, block->type());
282 :
283 : // If the block was built by our toolchain it's inherently safe.
284 E : if (block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
285 E : return true;
286 :
287 : // We don't decompose gap or padding blocks.
288 E : if (block->attributes() & (BlockGraph::GAP_BLOCK | BlockGraph::PADDING_BLOCK))
289 E : return false;
290 :
291 : // We only decompose code blocks produced by a known-safe subset of cl.exe.
292 E : if (!CodeBlockIsClConsistent(block))
293 E : return false;
294 :
295 E : return true;
296 E : }
297 :
298 : } // namespace pe
|