1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/pe_transform_policy.h"
16 :
17 : namespace pe {
18 :
19 : namespace {
20 :
21 : using block_graph::BlockGraph;
22 :
23 : const size_t kPointerSize = sizeof(core::AbsoluteAddress);
24 :
25 : // Returns true if there is a data label at the given offset,
26 : // false otherwise.
27 : bool HasDataLabel(const BlockGraph::Block* code_block,
28 E : BlockGraph::Offset offset) {
29 : BlockGraph::Block::LabelMap::const_iterator label_it =
30 E : code_block->labels().find(offset);
31 E : if (label_it == code_block->labels().end())
32 E : return false;
33 E : if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL))
34 i : return false;
35 E : return true;
36 E : }
37 :
38 : bool IsValidSelfReferenceCodeToCode(
39 : const BlockGraph::Block* code_block,
40 E : const BlockGraph::Reference& ref) {
41 : // These references must be direct. They may be 1- or 4-byte PC-relative refs,
42 : // or 4-byte absolute refs.
43 E : if (!ref.IsDirect())
44 E : return false;
45 :
46 E : switch (ref.type()) {
47 : case BlockGraph::PC_RELATIVE_REF: {
48 E : if (ref.size() != 1 && ref.size() != kPointerSize)
49 i : return false;
50 E : break;
51 : }
52 :
53 : case BlockGraph::ABSOLUTE_REF: {
54 E : if (ref.size() != kPointerSize)
55 i : return false;
56 E : break;
57 : }
58 :
59 : default: {
60 E : return false;
61 : }
62 : }
63 :
64 E : return true;
65 E : }
66 :
67 : bool IsValidSelfReferenceCodeToData(
68 : const BlockGraph::Block* code_block,
69 E : const BlockGraph::Reference& ref) {
70 : // Must be direct 4-byte absolute references to a data label.
71 : if (ref.type() != BlockGraph::ABSOLUTE_REF ||
72 : ref.size() != kPointerSize ||
73 : !ref.IsDirect() ||
74 E : !HasDataLabel(code_block, ref.offset())) {
75 E : return false;
76 : }
77 E : return true;
78 E : }
79 :
80 : bool IsValidSelfReferenceDataToCode(
81 : const BlockGraph::Block* code_block,
82 E : const BlockGraph::Reference& ref) {
83 : // Must be 4-byte direct absolute references.
84 : if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
85 E : !ref.IsDirect()) {
86 E : return false;
87 : }
88 E : return true;
89 E : }
90 :
91 : bool IsValidSelfReferenceDataToData(
92 : const BlockGraph::Block* code_block,
93 E : const BlockGraph::Reference& ref) {
94 : // Must be 4-byte direct absolute references. We see this in 'meta' case
95 : // tables, where there will be one case table that is used to select among a
96 : // handful of case tables, and then the selected case table will be used for
97 : // the second round of logic. This happens a lot in very complex conditional
98 : // code like that generated by gtest and gmock.
99 : if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
100 E : !ref.IsDirect()) {
101 E : return false;
102 : }
103 E : return true;
104 E : }
105 :
106 : bool IsValidExternalReferenceCodeBlockToCode(
107 : const BlockGraph::Block* code_block,
108 E : const BlockGraph::Reference& ref) {
109 : // Must be direct 4-byte absolute or pc-rel reference to offset 0.
110 : if (ref.type() != BlockGraph::ABSOLUTE_REF &&
111 E : ref.type() != BlockGraph::PC_RELATIVE_REF) {
112 E : return false;
113 : }
114 E : if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
115 E : return false;
116 E : return true;
117 E : }
118 :
119 : bool IsValidExternalReferenceDataBlockToCode(
120 : const BlockGraph::Block* code_block,
121 E : const BlockGraph::Reference& ref) {
122 : // Must be direct 4-byte absolute or relative (PE structures) pointer to
123 : // offset 0.
124 : if (ref.type() != BlockGraph::ABSOLUTE_REF &&
125 E : ref.type() != BlockGraph::RELATIVE_REF) {
126 E : return false;
127 : }
128 E : if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
129 E : return false;
130 E : return true;
131 E : }
132 :
133 : } // namespace
134 :
135 : PETransformPolicy::PETransformPolicy()
136 : : block_result_cache_(new BlockResultCache()),
137 E : allow_inline_assembly_(false) {
138 E : }
139 :
140 : bool PETransformPolicy::BlockIsSafeToBasicBlockDecompose(
141 E : const BlockGraph::Block* block) const {
142 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), block);
143 :
144 E : if (block->type() != BlockGraph::CODE_BLOCK)
145 E : return false;
146 :
147 : // Check attributes directly here, outside of the cache. These are cheap to
148 : // check and can change during decomposition/transformation, so can
149 : // potentially change cached results.
150 E : if (block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
151 i : return true;
152 E : if (!CodeBlockAttributesAreBasicBlockSafe(block, allow_inline_assembly_))
153 E : return false;
154 :
155 : // Look for a cached result. This prevents repeated (expensive) calculations
156 : // and inspections over the block.
157 : BlockResultCache::const_iterator it = block_result_cache_->find(
158 E : block->id());
159 E : if (it != block_result_cache_->end())
160 E : return it->second;
161 :
162 E : bool result = CodeBlockIsSafeToBasicBlockDecompose(block);
163 E : block_result_cache_->insert(std::make_pair(block->id(), result));
164 E : return result;
165 E : }
166 :
167 : bool PETransformPolicy::ReferenceIsSafeToRedirect(
168 : const BlockGraph::Block* referrer,
169 E : const BlockGraph::Reference& reference) const {
170 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), referrer);
171 : // TODO(chrisha): Move IsUnsafeReference here!
172 E : return true;
173 E : }
174 :
175 : bool PETransformPolicy::CodeBlockIsSafeToBasicBlockDecompose(
176 E : const BlockGraph::Block* code_block) const {
177 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
178 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
179 :
180 : // If the code_block was built by our toolchain it's inherently safe.
181 E : if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
182 E : return true;
183 :
184 E : if (!CodeBlockAttributesAreBasicBlockSafe(code_block, allow_inline_assembly_))
185 E : return false;
186 E : if (!CodeBlockHasPrivateSymbols(code_block))
187 E : return false;
188 E : if (!CodeBlockLayoutIsClConsistent(code_block))
189 i : return false;
190 E : if (!CodeBlockReferencesAreClConsistent(code_block))
191 i : return false;
192 E : if (!CodeBlockReferrersAreClConsistent(code_block))
193 E : return false;
194 :
195 E : return true;
196 E : }
197 :
198 : bool PETransformPolicy::CodeBlockHasPrivateSymbols(
199 E : const BlockGraph::Block* code_block) {
200 E : BlockGraph::Block::LabelMap::const_iterator it = code_block->labels().begin();
201 E : for (; it != code_block->labels().end(); ++it) {
202 E : if (it->second.attributes() & ~BlockGraph::PUBLIC_SYMBOL_LABEL)
203 E : return true;
204 E : }
205 E : return false;
206 E : }
207 :
208 : bool PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
209 : const BlockGraph::Block* code_block,
210 E : bool allow_inline_assembly) {
211 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
212 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
213 :
214 : // If the code_block was built by our toolchain it's inherently safe. This
215 : // attribute is used to whitelist a block.
216 E : if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
217 E : return true;
218 :
219 : // Any of the following attributes make it unsafe to basic-block
220 : // decompose the code code_block.
221 : static const BlockGraph::BlockAttributes kDefaultInvalidAttributes =
222 : BlockGraph::GAP_BLOCK |
223 : BlockGraph::PADDING_BLOCK |
224 : BlockGraph::HAS_INLINE_ASSEMBLY |
225 : BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER |
226 : BlockGraph::HAS_EXCEPTION_HANDLING |
227 : BlockGraph::UNSUPPORTED_INSTRUCTIONS;
228 :
229 E : BlockGraph::BlockAttributes invalid_attributes = kDefaultInvalidAttributes;
230 E : if (allow_inline_assembly)
231 E : invalid_attributes ^= BlockGraph::HAS_INLINE_ASSEMBLY;
232 :
233 E : if (code_block->attributes() & invalid_attributes)
234 E : return false;
235 :
236 E : return true;
237 E : }
238 :
239 : bool PETransformPolicy::CodeBlockLayoutIsClConsistent(
240 E : const BlockGraph::Block* code_block) {
241 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
242 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
243 :
244 : // If there are no labels then this is not a valid block.
245 E : if (code_block->labels().empty())
246 E : return false;
247 :
248 : // TODO(chrisha): Ensure that there is a code label at offset zero. If there
249 : // is none then this is not a valid CL-produced code block. This breaks
250 : // a bunch of code and unittests, as we may insert code that violates
251 : // this principle. Changes to the basic-block builder are required before
252 : // putting this in place.
253 :
254 : // Iterate over all labels in reverse order, looking at the labels. We want
255 : // to make sure there are no invalid labels, and that all data labels are
256 : // at the tail end of the block (no non-data label may come after a data
257 : // label).
258 : BlockGraph::Block::LabelMap::const_reverse_iterator it =
259 E : code_block->labels().rbegin();
260 E : bool saw_non_data_label = false;
261 E : for (; it != code_block->labels().rend(); ++it) {
262 E : const BlockGraph::Label& label = it->second;
263 :
264 : // No labels should be beyond the end of the block.
265 E : if (it->first >= static_cast<BlockGraph::Offset>(code_block->size())) {
266 : // Except for a solo debug-end label, which can come after the block if
267 : // there is no post-amble.
268 E : if (label.attributes() == BlockGraph::DEBUG_END_LABEL)
269 E : continue;
270 E : return false;
271 : }
272 :
273 E : if (label.has_attributes(BlockGraph::DATA_LABEL)) {
274 : // There should never be data labels beyond the end of the block.
275 E : if (it->first >= static_cast<BlockGraph::Offset>(code_block->size()))
276 i : return false;
277 :
278 : // If a non-data label was already encountered, and now there's another
279 : // data label then bail: the block does not respect the 'code first,
280 : // data second' supported layout requirement.
281 E : if (saw_non_data_label)
282 E : return false;
283 E : } else {
284 : // Remember that a non-data label was seen. No further data labels should
285 : // be encountered.
286 E : saw_non_data_label = true;
287 : }
288 E : }
289 :
290 E : return true;
291 E : }
292 :
293 : bool PETransformPolicy::CodeBlockReferencesAreClConsistent(
294 E : const BlockGraph::Block* code_block) {
295 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
296 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
297 :
298 : // Iterate over the outgoing references from this code_block.
299 : BlockGraph::Block::ReferenceMap::const_iterator ref_it =
300 E : code_block->references().begin();
301 E : for (; ref_it != code_block->references().end(); ++ref_it) {
302 : // References to data are always safe so we don't inspect them.
303 E : if (ref_it->second.referenced()->type() == BlockGraph::CODE_BLOCK) {
304 : // References to code blocks must be direct.
305 E : if (!ref_it->second.IsDirect())
306 E : return false;
307 : }
308 E : }
309 :
310 E : return true;
311 E : }
312 :
313 : bool PETransformPolicy::CodeBlockReferrersAreClConsistent(
314 E : const BlockGraph::Block* code_block) {
315 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
316 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
317 :
318 : // We expect all data labels to be referenced internally by the code block.
319 E : std::set<BlockGraph::Offset> data_label_offsets;
320 : BlockGraph::Block::LabelMap::const_iterator label_it =
321 E : code_block->labels().begin();
322 E : for (; label_it != code_block->labels().end(); ++label_it) {
323 E : if (label_it->second.has_attributes(BlockGraph::DATA_LABEL))
324 E : data_label_offsets.insert(label_it->first);
325 E : }
326 :
327 : // Determine the transition point that divides code from data. This is only
328 : // a valid calculation if the layout has been checked and is valid.
329 E : BlockGraph::Offset start_of_data = code_block->size();
330 E : if (!data_label_offsets.empty())
331 E : start_of_data = *data_label_offsets.begin();
332 :
333 : // Iterate through the referrers. Since we have to look up back-references
334 : // this is O(n log n).
335 : BlockGraph::Block::ReferrerSet::const_iterator ref_it =
336 E : code_block->referrers().begin();
337 E : for (; ref_it != code_block->referrers().end(); ++ref_it) {
338 : // Get the reference associated with this referrer.
339 E : BlockGraph::Reference ref;
340 E : CHECK(ref_it->first->GetReference(ref_it->second, &ref));
341 :
342 E : if (ref_it->first == code_block) { // Self-reference.
343 E : if (ref_it->second < start_of_data) { // From code
344 E : if (ref.offset() < start_of_data) { // To code.
345 E : if (!IsValidSelfReferenceCodeToCode(code_block, ref))
346 E : return false;
347 E : } else { // To data.
348 E : if (!IsValidSelfReferenceCodeToData(code_block, ref))
349 E : return false;
350 : // Mark the data label as having been seen.
351 E : data_label_offsets.erase(ref.offset());
352 : }
353 E : } else { // From data.
354 E : if (ref.offset() < start_of_data) { // To code.
355 E : if (!IsValidSelfReferenceDataToCode(code_block, ref))
356 E : return false;
357 E : } else { // To data.
358 E : if (!IsValidSelfReferenceDataToData(code_block, ref))
359 E : return false;
360 : // Mark the data label as having been seen.
361 E : data_label_offsets.erase(ref.offset());
362 : }
363 : }
364 E : } else { // External.
365 E : if (ref_it->first->type() == BlockGraph::CODE_BLOCK) { // From code.
366 E : if (ref.offset() < start_of_data) { // To code.
367 E : if (!IsValidExternalReferenceCodeBlockToCode(code_block, ref))
368 E : return false;
369 E : } else { // To data.
370 : // No code block should ever have a pointer to data internal to
371 : // a code block.
372 E : return false;
373 : }
374 E : } else { // From data.
375 E : if (ref.offset() < start_of_data) { // To code.
376 E : if (!IsValidExternalReferenceDataBlockToCode(code_block, ref))
377 E : return false;
378 E : } else { // To data.
379 : // No data block should ever have a pointer to data internal to
380 : // a code block.
381 E : return false;
382 : }
383 : }
384 : }
385 E : }
386 :
387 : // If there are leftover data labels that have not been referenced then we
388 : // are not consistent with CL.EXE compiled code.
389 E : if (!data_label_offsets.empty())
390 E : return false;
391 :
392 E : return true;
393 E : }
394 :
395 : } // namespace pe
|