1 : // Copyright 2013 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // This class implements the functions inlining transformation.
16 : //
17 : // Performing inline expansion on assembly is not an easy task. As the transform
18 : // runs after the standard compiler WPO, it may face custom calling convention
19 : // and strange stack manipulations. Thus, every expansion must be safe.
20 : //
21 : // The pattern based inlining is able to inline many common cases encounter with
22 : // common compilers. This inlining transformation avoids decomposing the block
23 : // which is much more efficient.
24 : // Example:
25 : // - push ebp
26 : // mov ebp, esp
27 : // pop ebp
28 : // ret
29 : //
30 : // The trivial body inlining is able to inline any trivial accessors.
31 : // Assumptions:
32 : // - No stack manipulations (except local push/pop).
33 : // - No branching instructions (except the last return or jump).
34 : // - No basic blocks reference, data block, jump-table, etc...
35 : // Example:
36 : // - xor eax, eax
37 : // ret
38 :
39 : #include "syzygy/optimize/transforms/inlining_transform.h"
40 :
41 : #include "syzygy/block_graph/basic_block.h"
42 : #include "syzygy/block_graph/basic_block_assembler.h"
43 : #include "syzygy/block_graph/basic_block_decomposer.h"
44 : #include "syzygy/block_graph/block_graph.h"
45 : // TODO(etienneb): liveness analysis internal should be hoisted to an
46 : // instructions helper namespace, and shared between analysis. It is quite
47 : // common to get the information on registers defined or used by an
48 : // instruction, or the memory operand read and written.
49 : #include "syzygy/block_graph/analysis/liveness_analysis_internal.h"
50 : #include "syzygy/optimize/transforms/peephole_transform.h"
51 :
52 : namespace optimize {
53 : namespace transforms {
54 :
55 : namespace {
56 :
57 : using block_graph::BasicBlock;
58 : using block_graph::BasicBlockAssembler;
59 : using block_graph::BasicBlockDecomposer;
60 : using block_graph::BasicBlockReference;
61 : using block_graph::BasicBlockSubGraph;
62 : using block_graph::BasicCodeBlock;
63 : using block_graph::BlockGraph;
64 : using block_graph::Displacement;
65 : using block_graph::Immediate;
66 : using block_graph::Instruction;
67 : using block_graph::Operand;
68 : using block_graph::Successor;
69 : using block_graph::analysis::LivenessAnalysis;
70 :
71 : typedef ApplicationProfile::BlockProfile BlockProfile;
72 : typedef Instruction::BasicBlockReferenceMap BasicBlockReferenceMap;
73 : typedef BasicBlockSubGraph::BBCollection BBCollection;
74 : typedef BasicBlock::Instructions Instructions;
75 :
76 : enum MatchKind {
77 : kInvalidMatch,
78 : kReturnMatch,
79 : kReturnConstantMatch,
80 : kDirectTrampolineMatch,
81 : kIndirectTrampolineMatch,
82 : };
83 :
84 : // Threshold in bytes to consider a block as a candidate for inlining. This size
85 : // must be big enough to don't miss good candidates, but small to avoid the
86 : // overhead of decomposition and simplification of huge block.
87 : const size_t kCodeSizeThreshold = 25;
88 :
89 : // Threshold in bytes to inline a callee in a hot block.
90 : const size_t kHotCodeSizeThreshold = 15;
91 :
92 : // Threshold in bytes to inline a callee in a cold block.
93 : const size_t kColdCodeSizeThreshold = 1;
94 :
95 : // These patterns are often produced by the MSVC compiler. They're common enough
96 : // that the inlining transformation matches them by pattern rather than
97 : // disassembling them.
98 :
99 : // ret
100 : const uint8 kEmptyBody1[] = { 0xC3 };
101 :
102 : // push %ebp
103 : // mov %ebp, %esp
104 : // pop %ebp
105 : // ret
106 : const uint8 kEmptyBody2[] = { 0x55, 0x8B, 0xEC, 0x5D, 0xC3 };
107 :
108 : // push %ebp
109 : // mov %ebp, %esp
110 : // mov %eax, [%ebp + 0x4]
111 : // pop %ebp
112 : // ret
113 : const uint8 kGetProgramCounter[] = {
114 : 0x55, 0x8B, 0xEC, 0x8B, 0x45, 0x04, 0x5D, 0xC3 };
115 :
116 : // Match a call instruction to a direct callee (i.e. no indirect calls).
117 E : bool MatchDirectCall(const Instruction& instr, BlockGraph::Block** callee) {
118 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block**>(NULL), callee);
119 :
120 : // Match a call instruction with one reference.
121 E : const _DInst& repr = instr.representation();
122 : if (!instr.IsCall() ||
123 : repr.ops[0].type != O_PC ||
124 E : instr.references().size() != 1) {
125 E : return false;
126 : }
127 :
128 : // The callee must be the beginning of a code block.
129 E : const BasicBlockReference& ref = instr.references().begin()->second;
130 E : BlockGraph::Block* block = ref.block();
131 : if (block == NULL ||
132 : ref.base() != 0 ||
133 : ref.offset() != 0 ||
134 E : block->type() != BlockGraph::CODE_BLOCK) {
135 E : return false;
136 : }
137 :
138 : // Returns the matched callee.
139 E : *callee = block;
140 E : return true;
141 E : }
142 :
143 : bool MatchRawBytes(BlockGraph::Block* callee,
144 : const uint8* bytes,
145 E : size_t length) {
146 : if (callee->size() != length ||
147 E : ::memcmp(callee->data(), bytes, length) != 0) {
148 E : return false;
149 : }
150 :
151 E : return true;
152 E : }
153 :
154 E : bool MatchGetProgramCounter(BlockGraph::Block* callee) {
155 E : size_t length = sizeof(kGetProgramCounter);
156 E : if (MatchRawBytes(callee, kGetProgramCounter, length))
157 i : return true;
158 :
159 E : return false;
160 E : }
161 :
162 E : bool MatchEmptyBody(BlockGraph::Block* callee) {
163 E : size_t length1 = sizeof(kEmptyBody1);
164 E : if (MatchRawBytes(callee, kEmptyBody1, length1))
165 E : return true;
166 :
167 E : size_t length2 = sizeof(kEmptyBody2);
168 E : if (MatchRawBytes(callee, kEmptyBody2, length2))
169 E : return true;
170 :
171 E : return false;
172 E : }
173 :
174 : // Match trivial body in a subgraph. A trivial body is a single basic block
175 : // without control flow, stack manipulation or other unsupported constructs.
176 : // @param subgraph The subgraph to try matching a trivial body.
177 : // @param kind On a match, receives the kind of match was found.
178 : // @param return_constant Receives the number of bytes to pop from the stack
179 : // after the body (when kind is kReturnConstantMatch).
180 : // @param reference Receives a reference to a target continuation (when kind is
181 : // kDirectTrampolineMatch or kIndirectTrampolineMatch).
182 : // @param body On success, receives the trivial body.
183 : // @returns true on success, false otherwise.
184 : bool MatchTrivialBody(const BasicBlockSubGraph& subgraph,
185 : MatchKind* kind,
186 : size_t* return_constant,
187 : BasicBlockReference* reference,
188 E : BasicCodeBlock** body) {
189 E : DCHECK_NE(reinterpret_cast<MatchKind*>(NULL), kind);
190 E : DCHECK_NE(reinterpret_cast<size_t*>(NULL), return_constant);
191 E : DCHECK_NE(reinterpret_cast<BasicBlockReference*>(NULL), reference);
192 E : DCHECK_NE(reinterpret_cast<BasicCodeBlock**>(NULL), body);
193 :
194 : // Assume no match.
195 E : *kind = kInvalidMatch;
196 :
197 : // Trivial body only has one basic block.
198 E : if (subgraph.basic_blocks().size() != 1)
199 i : return false;
200 E : BasicCodeBlock* bb = BasicCodeBlock::Cast(*subgraph.basic_blocks().begin());
201 E : if (bb == NULL)
202 i : return false;
203 :
204 : // Current local stack depth.
205 E : size_t stack_depth = 0;
206 :
207 : // Iterates through each instruction.
208 E : Instructions::iterator inst_iter = bb->instructions().begin();
209 E : for (; inst_iter != bb->instructions().end(); ++inst_iter) {
210 E : const Instruction& instr = *inst_iter;
211 E : const _DInst& repr = instr.representation();
212 :
213 : // Do not allow any references to a basic block.
214 E : const BasicBlockReferenceMap& references = instr.references();
215 E : BasicBlockReferenceMap::const_iterator ref = references.begin();
216 E : for (; ref != references.end(); ++ref) {
217 : if (ref->second.referred_type() ==
218 i : BasicBlockReference::REFERRED_TYPE_BASIC_BLOCK) {
219 i : return false;
220 : }
221 i : }
222 :
223 : // Return instruction is valid.
224 E : if (instr.IsReturn()) {
225 : // Match return with or without a constant.
226 E : if (repr.ops[0].type == O_NONE) {
227 E : *kind = kReturnMatch;
228 E : } else if (repr.ops[0].type == O_IMM) {
229 E : *kind = kReturnConstantMatch;
230 E : *return_constant = repr.imm.dword;
231 E : } else {
232 i : return false;
233 : }
234 :
235 : // Move to the next instruction and leave loop. This instruction must be
236 : // the last one in the basic block.
237 E : ++inst_iter;
238 E : break;
239 : }
240 :
241 : // Match an indirect jump from a global variable.
242 E : BasicBlockReference target_ref;
243 : if (instr.IsBranch() &&
244 : instr.references().size() == 1 &&
245 : instr.FindOperandReference(0, &target_ref) &&
246 : target_ref.block() != NULL &&
247 : repr.opcode == I_JMP &&
248 : repr.ops[0].type == O_DISP &&
249 : repr.ops[0].size == 32 &&
250 E : repr.ops[0].index == 0) {
251 : // Match displacement to a block.
252 i : *kind = kIndirectTrampolineMatch;
253 i : *reference = target_ref;
254 :
255 : // Move to the next instruction and leave loop. This instruction must be
256 : // the last one in the basic block.
257 i : ++inst_iter;
258 i : break;
259 : }
260 :
261 : // Avoid control flow instructions.
262 E : if (instr.IsControlFlow())
263 i : return false;
264 :
265 : // Avoid unsafe stack manipulation.
266 : if (repr.opcode == I_PUSH &&
267 : (repr.ops[0].type == O_IMM ||
268 : repr.ops[0].type == O_IMM1 ||
269 E : repr.ops[0].type == O_IMM2)) {
270 : // Pushing a constant is valid.
271 E : stack_depth += 4;
272 E : } else if (repr.opcode == I_PUSH &&
273 : repr.ops[0].type == O_REG &&
274 : repr.ops[0].index != R_EBP &&
275 E : repr.ops[0].index != R_ESP) {
276 : // Pushing a register is valid.
277 i : stack_depth += 4;
278 i : } else if (repr.opcode == I_POP &&
279 : repr.ops[0].type == O_REG &&
280 : repr.ops[0].index != R_EBP &&
281 : repr.ops[0].index != R_ESP &&
282 E : stack_depth >= 4) {
283 : // Popping a register is valid.
284 E : stack_depth -= 4;
285 E : } else {
286 E : LivenessAnalysis::State defs;
287 E : LivenessAnalysis::StateHelper::GetDefsOf(instr, &defs);
288 :
289 E : LivenessAnalysis::State uses;
290 E : LivenessAnalysis::StateHelper::GetUsesOf(instr, &uses);
291 :
292 : if (defs.IsLive(core::esp) ||
293 : defs.IsLive(core::ebp) ||
294 : uses.IsLive(core::esp) ||
295 E : uses.IsLive(core::ebp)) {
296 E : return false;
297 : }
298 : }
299 E : }
300 :
301 : // All instructions must have been checked.
302 E : if (inst_iter != bb->instructions().end())
303 i : return false;
304 :
305 E : if (*kind == kInvalidMatch) {
306 : // Try to match a tail-call to an other block.
307 : if (bb->successors().size() != 1 ||
308 E : bb->successors().front().condition() != Successor::kConditionTrue) {
309 E : return false;
310 : }
311 :
312 : // Must match a valid reference to a block.
313 i : const Successor& succ = bb->successors().front();
314 i : const BasicBlockReference& ref = succ.reference();
315 i : if (ref.block() == NULL)
316 i : return false;
317 :
318 : // Matched a direct trampoline.
319 i : *kind = kDirectTrampolineMatch;
320 i : *reference = ref;
321 i : } else {
322 : // The basic block must have a return (to remove the caller address on
323 : // stack) or be an indirect tail-call to an other block and must not have
324 : // successors.
325 E : if (!bb->successors().empty())
326 i : return false;
327 : }
328 :
329 : // Returns the matched body.
330 E : DCHECK_NE(kInvalidMatch, *kind);
331 E : *body = bb;
332 E : return true;
333 E : }
334 :
335 : // Copy the body of the callee at a call-site in the caller.
336 : // @param kind The kind of inlining to perform.
337 : // @param return_constant The number of bytes to pop from the stack.
338 : // @param reference The reference to the continuation.
339 : // @param body The trivial body to be inlined.
340 : // @param target The place where to insert the callee body into the caller.
341 : // @param instructions The caller body that receives a copy of the callee body.
342 : // @returns true on success, false otherwise.
343 : bool InlineTrivialBody(MatchKind kind,
344 : size_t return_constant,
345 : const BasicBlockReference& reference,
346 : const BasicCodeBlock* body,
347 : Instructions::iterator target,
348 E : Instructions* instructions) {
349 E : DCHECK_NE(reinterpret_cast<Instructions*>(NULL), instructions);
350 :
351 E : Instructions new_body;
352 :
353 : // Iterates through each instruction.
354 E : Instructions::const_iterator inst_iter = body->instructions().begin();
355 E : for (; inst_iter != body->instructions().end(); ++inst_iter) {
356 E : const Instruction& instr = *inst_iter;
357 E : const _DInst& repr = instr.representation();
358 :
359 E : if (instr.IsBranch()) {
360 : // Skip the indirect branch instruction.
361 i : DCHECK_EQ(kind, kIndirectTrampolineMatch);
362 E : } else if (instr.IsReturn()) {
363 : // Nothing to do here with return instruction (see below).
364 E : } else {
365 E : new_body.push_back(instr);
366 : }
367 E : }
368 :
369 : // Replacing the return or the tail-call instruction.
370 E : BasicBlockAssembler assembler(target, instructions);
371 E : switch (kind) {
372 : case kReturnMatch:
373 E : break;
374 : case kReturnConstantMatch:
375 : // Replace a 'ret 4' instruction by a 'lea %esp, [%esp + 0x4]'.
376 : // Instruction add cannot be used because flags must be preserved,
377 : assembler.lea(core::esp,
378 E : Operand(core::esp, Displacement(return_constant)));
379 E : break;
380 : case kDirectTrampolineMatch:
381 i : DCHECK(reference.IsValid());
382 : assembler.call(
383 i : Immediate(reference.block(), reference.offset(), reference.base()));
384 i : break;
385 : case kIndirectTrampolineMatch:
386 i : DCHECK(reference.IsValid());
387 : assembler.call(Operand(Displacement(reference.block(),
388 : reference.offset(),
389 i : reference.base())));
390 i : break;
391 : default:
392 i : NOTREACHED();
393 : }
394 :
395 : // Insert the inlined instructions at the call-site.
396 E : instructions->splice(target, new_body);
397 E : return true;
398 E : }
399 :
400 : // Decompose a block to a subgraph.
401 : bool DecomposeToBasicBlock(const BlockGraph::Block* block,
402 E : BasicBlockSubGraph* subgraph) {
403 E : DCHECK_NE(reinterpret_cast<BlockGraph::Block*>(NULL), block);
404 E : DCHECK_NE(reinterpret_cast<BasicBlockSubGraph*>(NULL), subgraph);
405 :
406 : // Decompose block to basic blocks.
407 E : BasicBlockDecomposer decomposer(block, subgraph);
408 E : if (!decomposer.Decompose())
409 i : return false;
410 :
411 E : return true;
412 E : }
413 :
414 E : size_t EstimateSubgraphSize(BasicBlockSubGraph* subgraph) {
415 E : DCHECK_NE(reinterpret_cast<BasicBlockSubGraph*>(NULL), subgraph);
416 E : const size_t kMaxBranchSize = 5;
417 E : size_t size = 0;
418 E : BBCollection& basic_blocks = subgraph->basic_blocks();
419 E : BBCollection::iterator it = basic_blocks.begin();
420 E : for (; it != basic_blocks.end(); ++it) {
421 E : BasicCodeBlock* bb = BasicCodeBlock::Cast(*it);
422 E : BasicBlock::Instructions::iterator inst_iter = bb->instructions().begin();
423 :
424 : // Sum of instructions size.
425 E : for (; inst_iter != bb->instructions().end(); ++inst_iter)
426 E : size += inst_iter->size();
427 :
428 : // Sum of successors size.
429 E : if (!bb->successors().empty())
430 E : size += kMaxBranchSize;
431 E : }
432 :
433 E : return size;
434 E : }
435 :
436 : } // namespace
437 :
438 : bool InliningTransform::TransformBasicBlockSubGraph(
439 : const TransformPolicyInterface* policy,
440 : BlockGraph* block_graph,
441 : BasicBlockSubGraph* subgraph,
442 : ApplicationProfile* profile,
443 E : SubGraphProfile* subgraph_profile) {
444 E : DCHECK_NE(reinterpret_cast<TransformPolicyInterface*>(NULL), policy);
445 E : DCHECK_NE(reinterpret_cast<BlockGraph*>(NULL), block_graph);
446 E : DCHECK_NE(reinterpret_cast<BasicBlockSubGraph*>(NULL), subgraph);
447 E : DCHECK_NE(reinterpret_cast<ApplicationProfile*>(NULL), profile);
448 E : DCHECK_NE(reinterpret_cast<SubGraphProfile*>(NULL), subgraph_profile);
449 :
450 E : const BlockGraph::Block* caller = subgraph->original_block();
451 E : DCHECK_NE(reinterpret_cast<const BlockGraph::Block*>(NULL), caller);
452 :
453 : // Apply the decomposition policy to the caller.
454 E : if (!policy->BlockIsSafeToBasicBlockDecompose(caller))
455 E : return true;
456 :
457 : // Iterates through each basic block.
458 : BasicBlockSubGraph::BBCollection::iterator bb_iter =
459 E : subgraph->basic_blocks().begin();
460 E : for (; bb_iter != subgraph->basic_blocks().end(); ++bb_iter) {
461 E : BasicCodeBlock* bb = BasicCodeBlock::Cast(*bb_iter);
462 E : if (bb == NULL)
463 i : continue;
464 :
465 : // Iterates through each instruction.
466 E : BasicBlock::Instructions::iterator inst_iter = bb->instructions().begin();
467 E : while (inst_iter != bb->instructions().end()) {
468 E : const Instruction& instr = *inst_iter;
469 E : BasicBlock::Instructions::iterator call_iter = inst_iter;
470 E : ++inst_iter;
471 :
472 : // Match a direct call-site.
473 E : BlockGraph::Block* callee = NULL;
474 E : if (!MatchDirectCall(instr, &callee))
475 E : continue;
476 :
477 : // Avoid self recursion inlining.
478 : // Apply the decomposition policy to the callee.
479 : if (caller == callee ||
480 E : !policy->BlockIsSafeToBasicBlockDecompose(callee)) {
481 E : continue;
482 : }
483 :
484 E : if (MatchEmptyBody(callee)) {
485 : // Body is empty, remove call-site.
486 E : bb->instructions().erase(call_iter);
487 E : continue;
488 : }
489 :
490 E : if (MatchGetProgramCounter(callee)) {
491 : // TODO(etienneb): Implement Get Program Counter with a fixup.
492 i : continue;
493 : }
494 :
495 : // Keep only potential candidates.
496 E : if (callee->size() > kCodeSizeThreshold) {
497 i : continue;
498 : }
499 :
500 : BasicBlockSubGraph* callee_subgraph;
501 E : size_t return_constant = 0;
502 E : BasicCodeBlock* body = NULL;
503 E : BasicBlockReference target;
504 E : MatchKind match_kind = kInvalidMatch;
505 :
506 : // Look in the subgraph cache for an already decomposed subgraph.
507 E : SubGraphCache::iterator look = subgraph_cache_.find(callee);
508 E : if (look != subgraph_cache_.end()) {
509 i : callee_subgraph = &look->second;
510 i : } else {
511 : // Not in cache, decompose it.
512 E : callee_subgraph = &subgraph_cache_[callee];
513 E : if (!DecomposeToBasicBlock(callee, callee_subgraph)) {
514 i : subgraph_cache_.erase(callee);
515 i : continue;
516 : }
517 :
518 : // Simplify the subgraph. There is no guarantee that the callee has been
519 : // simplified by the peephole transform. Running one pass should take
520 : // care of the most frequent patterns.
521 E : PeepholeTransform::SimplifySubgraph(callee_subgraph);
522 : }
523 :
524 : // Heuristic to determine whether to inline or not the callee subgraph.
525 E : bool candidate_for_inlining = false;
526 E : size_t subgraph_size = EstimateSubgraphSize(callee_subgraph);
527 :
528 : // For a small callee, try to replace callee instructions in-place.
529 : // This kind of inlining is always a win.
530 E : if (subgraph_size < caller->size() + kColdCodeSizeThreshold)
531 E : candidate_for_inlining = true;
532 :
533 : // TODO(etienneb): Add more rules based on profile data.
534 :
535 E : if (!candidate_for_inlining)
536 E : continue;
537 :
538 : if (MatchTrivialBody(*callee_subgraph, &match_kind, &return_constant,
539 : &target, &body) &&
540 : InlineTrivialBody(match_kind, return_constant, target, body,
541 E : call_iter, &bb->instructions())) {
542 : // Inlining successful, remove call-site.
543 E : bb->instructions().erase(call_iter);
544 : }
545 E : }
546 E : }
547 :
548 E : return true;
549 E : }
550 :
551 : } // namespace transforms
552 : } // namespace optimize
|