1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/pe_transform_policy.h"
16 :
17 : namespace pe {
18 :
19 : namespace {
20 :
21 : using block_graph::BlockGraph;
22 :
23 : const size_t kPointerSize = sizeof(core::AbsoluteAddress);
24 :
25 : // Returns true if there is a data label at the given offset,
26 : // false otherwise.
27 : bool HasDataLabel(const BlockGraph::Block* code_block,
28 E : BlockGraph::Offset offset) {
29 : BlockGraph::Block::LabelMap::const_iterator label_it =
30 E : code_block->labels().find(offset);
31 E : if (label_it == code_block->labels().end())
32 E : return false;
33 E : if (!label_it->second.has_attributes(BlockGraph::DATA_LABEL))
34 i : return false;
35 E : return true;
36 E : }
37 :
38 : bool IsValidSelfReferenceCodeToCode(
39 : const BlockGraph::Block* code_block,
40 E : const BlockGraph::Reference& ref) {
41 : // These references must be direct. They may be 1- or 4-byte PC-relative refs,
42 : // or 4-byte absolute refs.
43 E : if (!ref.IsDirect())
44 E : return false;
45 :
46 E : switch (ref.type()) {
47 : case BlockGraph::PC_RELATIVE_REF: {
48 E : if (ref.size() != 1 && ref.size() != kPointerSize)
49 i : return false;
50 E : break;
51 : }
52 :
53 : case BlockGraph::ABSOLUTE_REF: {
54 E : if (ref.size() != kPointerSize)
55 i : return false;
56 E : break;
57 : }
58 :
59 : default: {
60 E : return false;
61 : }
62 : }
63 :
64 E : return true;
65 E : }
66 :
67 : bool IsValidSelfReferenceCodeToData(
68 : const BlockGraph::Block* code_block,
69 E : const BlockGraph::Reference& ref) {
70 : // Must be direct 4-byte absolute references to a data label.
71 : if (ref.type() != BlockGraph::ABSOLUTE_REF ||
72 : ref.size() != kPointerSize ||
73 : !ref.IsDirect() ||
74 E : !HasDataLabel(code_block, ref.offset())) {
75 E : return false;
76 : }
77 E : return true;
78 E : }
79 :
80 : bool IsValidSelfReferenceDataToCode(
81 : const BlockGraph::Block* code_block,
82 E : const BlockGraph::Reference& ref) {
83 : // Must be 4-byte direct absolute references.
84 : if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
85 E : !ref.IsDirect()) {
86 E : return false;
87 : }
88 E : return true;
89 E : }
90 :
91 : bool IsValidSelfReferenceDataToData(
92 : const BlockGraph::Block* code_block,
93 E : const BlockGraph::Reference& ref) {
94 : // Must be 4-byte direct absolute references. We see this in 'meta' case
95 : // tables, where there will be one case table that is used to select among a
96 : // handful of case tables, and then the selected case table will be used for
97 : // the second round of logic. This happens a lot in very complex conditional
98 : // code like that generated by gtest and gmock.
99 : if (ref.type() != BlockGraph::ABSOLUTE_REF || ref.size() != kPointerSize ||
100 E : !ref.IsDirect()) {
101 E : return false;
102 : }
103 E : return true;
104 E : }
105 :
106 : bool IsValidExternalReferenceCodeBlockToCode(
107 : const BlockGraph::Block* code_block,
108 E : const BlockGraph::Reference& ref) {
109 : // Must be direct 4-byte absolute or pc-rel reference to offset 0.
110 : if (ref.type() != BlockGraph::ABSOLUTE_REF &&
111 E : ref.type() != BlockGraph::PC_RELATIVE_REF) {
112 E : return false;
113 : }
114 E : if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
115 E : return false;
116 E : return true;
117 E : }
118 :
119 : bool IsValidExternalReferenceDataBlockToCode(
120 : const BlockGraph::Block* code_block,
121 E : const BlockGraph::Reference& ref) {
122 : // Must be direct 4-byte absolute or relative (PE structures) pointer to
123 : // offset 0.
124 : if (ref.type() != BlockGraph::ABSOLUTE_REF &&
125 E : ref.type() != BlockGraph::RELATIVE_REF) {
126 E : return false;
127 : }
128 E : if (ref.size() != kPointerSize || ref.offset() != 0 || !ref.IsDirect())
129 E : return false;
130 E : return true;
131 E : }
132 :
133 : } // namespace
134 :
135 : PETransformPolicy::PETransformPolicy()
136 : : block_result_cache_(new BlockResultCache()),
137 E : allow_inline_assembly_(false) {
138 E : }
139 :
140 : bool PETransformPolicy::BlockIsSafeToBasicBlockDecompose(
141 E : const BlockGraph::Block* block) const {
142 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), block);
143 :
144 E : if (block->type() != BlockGraph::CODE_BLOCK)
145 E : return false;
146 :
147 : // Look for a cached result. This prevents repeated (expensive) calculations
148 : // and inspections over the block.
149 : BlockResultCache::const_iterator it = block_result_cache_->find(
150 E : block->id());
151 E : if (it != block_result_cache_->end())
152 E : return it->second;
153 :
154 E : bool result = CodeBlockIsSafeToBasicBlockDecompose(block);
155 E : block_result_cache_->insert(std::make_pair(block->id(), result));
156 E : return result;
157 E : }
158 :
159 : bool PETransformPolicy::ReferenceIsSafeToRedirect(
160 : const BlockGraph::Block* referrer,
161 E : const BlockGraph::Reference& reference) const {
162 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), referrer);
163 : // TODO(chrisha): Move IsUnsafeReference here!
164 E : return true;
165 E : }
166 :
167 : bool PETransformPolicy::CodeBlockIsSafeToBasicBlockDecompose(
168 E : const BlockGraph::Block* code_block) const {
169 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
170 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
171 :
172 : // If the code_block was built by our toolchain it's inherently safe.
173 E : if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
174 E : return true;
175 :
176 E : if (!CodeBlockAttributesAreBasicBlockSafe(code_block, allow_inline_assembly_))
177 E : return false;
178 E : if (!CodeBlockHasPrivateSymbols(code_block))
179 E : return false;
180 E : if (!CodeBlockLayoutIsClConsistent(code_block))
181 E : return false;
182 E : if (!CodeBlockReferencesAreClConsistent(code_block))
183 i : return false;
184 E : if (!CodeBlockReferrersAreClConsistent(code_block))
185 E : return false;
186 :
187 E : return true;
188 E : }
189 :
190 : bool PETransformPolicy::CodeBlockHasPrivateSymbols(
191 E : const BlockGraph::Block* code_block) {
192 E : BlockGraph::Block::LabelMap::const_iterator it = code_block->labels().begin();
193 E : for (; it != code_block->labels().end(); ++it) {
194 E : if (it->second.attributes() & ~BlockGraph::PUBLIC_SYMBOL_LABEL)
195 E : return true;
196 E : }
197 E : return false;
198 E : }
199 :
200 : bool PETransformPolicy::CodeBlockAttributesAreBasicBlockSafe(
201 : const BlockGraph::Block* code_block,
202 E : bool allow_inline_assembly) {
203 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
204 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
205 :
206 : // If the code_block was built by our toolchain it's inherently safe. This
207 : // attribute is used to whitelist a block.
208 E : if (code_block->attributes() & BlockGraph::BUILT_BY_SYZYGY)
209 E : return true;
210 :
211 : // Any of the following attributes make it unsafe to basic-block
212 : // decompose the code code_block.
213 : static const BlockGraph::BlockAttributes kDefaultInvalidAttributes =
214 : BlockGraph::GAP_BLOCK |
215 : BlockGraph::PADDING_BLOCK |
216 : BlockGraph::HAS_INLINE_ASSEMBLY |
217 : BlockGraph::BUILT_BY_UNSUPPORTED_COMPILER |
218 : // TODO(chrisha): Remove this once we've moved to the new decomposer!
219 : BlockGraph::ERRORED_DISASSEMBLY |
220 : BlockGraph::HAS_EXCEPTION_HANDLING |
221 : // TODO(chrisha): Remove this once we've moved to the new decomposer!
222 : BlockGraph::DISASSEMBLED_PAST_END;
223 :
224 E : BlockGraph::BlockAttributes invalid_attributes = kDefaultInvalidAttributes;
225 E : if (allow_inline_assembly)
226 E : invalid_attributes ^= BlockGraph::HAS_INLINE_ASSEMBLY;
227 :
228 E : if (code_block->attributes() & invalid_attributes)
229 E : return false;
230 :
231 E : return true;
232 E : }
233 :
234 : bool PETransformPolicy::CodeBlockLayoutIsClConsistent(
235 E : const BlockGraph::Block* code_block) {
236 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
237 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
238 :
239 : // If there are no labels then this is not a valid block.
240 E : if (code_block->labels().empty())
241 E : return false;
242 :
243 : // TODO(chrisha): Ensure that there is a code label at offset zero. If there
244 : // is none then this is not a valid CL-produced code block. This breaks
245 : // a bunch of code and unittests, as we may insert code that violates
246 : // this principle. Changes to the basic-block builder are required before
247 : // putting this in place.
248 :
249 : // Iterate over all labels in reverse order, looking at the labels. We want
250 : // to make sure there are no invalid labels, and that all data labels come
251 : // after all code labels.
252 : BlockGraph::Block::LabelMap::const_reverse_iterator it =
253 E : code_block->labels().rbegin();
254 E : bool saw_non_data_label = false;
255 E : for (; it != code_block->labels().rend(); ++it) {
256 E : const BlockGraph::Label& label = it->second;
257 :
258 : // No labels should be beyond the end of the block.
259 E : if (it->first >= static_cast<BlockGraph::Offset>(code_block->size())) {
260 : // Except for a solo debug-end label, which can come after the block if
261 : // there is no post-amble.
262 E : if (label.attributes() == BlockGraph::DEBUG_END_LABEL)
263 E : continue;
264 E : return false;
265 : }
266 :
267 E : if (label.has_attributes(BlockGraph::DATA_LABEL)) {
268 : // There should never be data labels beyond the end of the block.
269 E : if (it->first >= static_cast<BlockGraph::Offset>(code_block->size()))
270 i : return false;
271 :
272 : // If a non-data label was already encountered, and now there's another
273 : // data label then bail: the block does not respect the 'code first,
274 : // data second' supported layout requirement.
275 E : if (saw_non_data_label)
276 E : return false;
277 E : } else {
278 : // Remember that a non-data label was seen. No further data labels should
279 : // be encountered.
280 E : saw_non_data_label = true;
281 : }
282 E : }
283 :
284 E : return true;
285 E : }
286 :
287 : bool PETransformPolicy::CodeBlockReferencesAreClConsistent(
288 E : const BlockGraph::Block* code_block) {
289 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
290 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
291 :
292 : // Iterate over the outgoing references from this code_block.
293 : BlockGraph::Block::ReferenceMap::const_iterator ref_it =
294 E : code_block->references().begin();
295 E : for (; ref_it != code_block->references().end(); ++ref_it) {
296 : // References to data are always safe so we don't inspect them.
297 E : if (ref_it->second.referenced()->type() == BlockGraph::CODE_BLOCK) {
298 : // References to code blocks must be direct.
299 E : if (!ref_it->second.IsDirect())
300 E : return false;
301 : }
302 E : }
303 :
304 E : return true;
305 E : }
306 :
307 : bool PETransformPolicy::CodeBlockReferrersAreClConsistent(
308 E : const BlockGraph::Block* code_block) {
309 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), code_block);
310 E : DCHECK_EQ(BlockGraph::CODE_BLOCK, code_block->type());
311 :
312 : // We expect all data labels to be referenced internally by the code block.
313 E : std::set<BlockGraph::Offset> data_label_offsets;
314 : BlockGraph::Block::LabelMap::const_iterator label_it =
315 E : code_block->labels().begin();
316 E : for (; label_it != code_block->labels().end(); ++label_it) {
317 E : if (label_it->second.has_attributes(BlockGraph::DATA_LABEL))
318 E : data_label_offsets.insert(label_it->first);
319 E : }
320 :
321 : // Determine the transition point that divides code from data. This is only
322 : // a valid calculation if the layout has been checked and is valid.
323 E : BlockGraph::Offset start_of_data = code_block->size();
324 E : if (!data_label_offsets.empty())
325 E : start_of_data = *data_label_offsets.begin();
326 :
327 : // Iterate through the referrers. Since we have to look up back-references
328 : // this is O(n log n).
329 : BlockGraph::Block::ReferrerSet::const_iterator ref_it =
330 E : code_block->referrers().begin();
331 E : for (; ref_it != code_block->referrers().end(); ++ref_it) {
332 : // Get the reference associated with this referrer.
333 E : BlockGraph::Reference ref;
334 E : CHECK(ref_it->first->GetReference(ref_it->second, &ref));
335 :
336 E : if (ref_it->first == code_block) { // Self-reference.
337 E : if (ref_it->second < start_of_data) { // From code
338 E : if (ref.offset() < start_of_data) { // To code.
339 E : if (!IsValidSelfReferenceCodeToCode(code_block, ref))
340 E : return false;
341 E : } else { // To data.
342 E : if (!IsValidSelfReferenceCodeToData(code_block, ref))
343 E : return false;
344 : // Mark the data label as having been seen.
345 E : data_label_offsets.erase(ref.offset());
346 : }
347 E : } else { // From data.
348 E : if (ref.offset() < start_of_data) { // To code.
349 E : if (!IsValidSelfReferenceDataToCode(code_block, ref))
350 E : return false;
351 E : } else { // To data.
352 E : if (!IsValidSelfReferenceDataToData(code_block, ref))
353 E : return false;
354 : // Mark the data label as having been seen.
355 E : data_label_offsets.erase(ref.offset());
356 : }
357 : }
358 E : } else { // External.
359 E : if (ref_it->first->type() == BlockGraph::CODE_BLOCK) { // From code.
360 E : if (ref.offset() < start_of_data) { // To code.
361 E : if (!IsValidExternalReferenceCodeBlockToCode(code_block, ref))
362 E : return false;
363 E : } else { // To data.
364 : // No code block should ever have a pointer to data internal to
365 : // a code block.
366 E : return false;
367 : }
368 E : } else { // From data.
369 E : if (ref.offset() < start_of_data) { // To code.
370 E : if (!IsValidExternalReferenceDataBlockToCode(code_block, ref))
371 E : return false;
372 E : } else { // To data.
373 : // No data block should ever have a pointer to data internal to
374 : // a code block.
375 E : return false;
376 : }
377 : }
378 : }
379 E : }
380 :
381 : // If there are leftover data labels that have not been referenced then we
382 : // are not consistent with CL.EXE compiled code.
383 E : if (!data_label_offsets.empty())
384 E : return false;
385 :
386 E : return true;
387 E : }
388 :
389 : } // namespace pe
|