1 | // Copyright 2014 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #include "src/compiler/backend/move-optimizer.h" |
6 | |
7 | #include "src/register-configuration.h" |
8 | |
9 | namespace v8 { |
10 | namespace internal { |
11 | namespace compiler { |
12 | |
13 | namespace { |
14 | |
15 | struct MoveKey { |
16 | InstructionOperand source; |
17 | InstructionOperand destination; |
18 | }; |
19 | |
20 | struct MoveKeyCompare { |
21 | bool operator()(const MoveKey& a, const MoveKey& b) const { |
22 | if (a.source.EqualsCanonicalized(b.source)) { |
23 | return a.destination.CompareCanonicalized(b.destination); |
24 | } |
25 | return a.source.CompareCanonicalized(b.source); |
26 | } |
27 | }; |
28 | |
29 | using MoveMap = ZoneMap<MoveKey, unsigned, MoveKeyCompare>; |
30 | |
31 | class OperandSet { |
32 | public: |
33 | explicit OperandSet(ZoneVector<InstructionOperand>* buffer) |
34 | : set_(buffer), fp_reps_(0) { |
35 | buffer->clear(); |
36 | } |
37 | |
38 | void InsertOp(const InstructionOperand& op) { |
39 | set_->push_back(op); |
40 | |
41 | if (!kSimpleFPAliasing && op.IsFPRegister()) |
42 | fp_reps_ |= RepresentationBit(LocationOperand::cast(op).representation()); |
43 | } |
44 | |
45 | bool Contains(const InstructionOperand& op) const { |
46 | for (const InstructionOperand& elem : *set_) { |
47 | if (elem.EqualsCanonicalized(op)) return true; |
48 | } |
49 | return false; |
50 | } |
51 | |
52 | bool ContainsOpOrAlias(const InstructionOperand& op) const { |
53 | if (Contains(op)) return true; |
54 | |
55 | if (!kSimpleFPAliasing && op.IsFPRegister()) { |
56 | // Platforms where FP registers have complex aliasing need extra checks. |
57 | const LocationOperand& loc = LocationOperand::cast(op); |
58 | MachineRepresentation rep = loc.representation(); |
59 | // If haven't encountered mixed rep FP registers, skip the extra checks. |
60 | if (!HasMixedFPReps(fp_reps_ | RepresentationBit(rep))) return false; |
61 | |
62 | // Check register against aliasing registers of other FP representations. |
63 | MachineRepresentation other_rep1, other_rep2; |
64 | switch (rep) { |
65 | case MachineRepresentation::kFloat32: |
66 | other_rep1 = MachineRepresentation::kFloat64; |
67 | other_rep2 = MachineRepresentation::kSimd128; |
68 | break; |
69 | case MachineRepresentation::kFloat64: |
70 | other_rep1 = MachineRepresentation::kFloat32; |
71 | other_rep2 = MachineRepresentation::kSimd128; |
72 | break; |
73 | case MachineRepresentation::kSimd128: |
74 | other_rep1 = MachineRepresentation::kFloat32; |
75 | other_rep2 = MachineRepresentation::kFloat64; |
76 | break; |
77 | default: |
78 | UNREACHABLE(); |
79 | break; |
80 | } |
81 | const RegisterConfiguration* config = RegisterConfiguration::Default(); |
82 | int base = -1; |
83 | int aliases = |
84 | config->GetAliases(rep, loc.register_code(), other_rep1, &base); |
85 | DCHECK(aliases > 0 || (aliases == 0 && base == -1)); |
86 | while (aliases--) { |
87 | if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep1, |
88 | base + aliases))) { |
89 | return true; |
90 | } |
91 | } |
92 | aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base); |
93 | DCHECK(aliases > 0 || (aliases == 0 && base == -1)); |
94 | while (aliases--) { |
95 | if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep2, |
96 | base + aliases))) { |
97 | return true; |
98 | } |
99 | } |
100 | } |
101 | return false; |
102 | } |
103 | |
104 | private: |
105 | static bool HasMixedFPReps(int reps) { |
106 | return reps && !base::bits::IsPowerOfTwo(reps); |
107 | } |
108 | |
109 | ZoneVector<InstructionOperand>* set_; |
110 | int fp_reps_; |
111 | }; |
112 | |
113 | int FindFirstNonEmptySlot(const Instruction* instr) { |
114 | int i = Instruction::FIRST_GAP_POSITION; |
115 | for (; i <= Instruction::LAST_GAP_POSITION; i++) { |
116 | ParallelMove* moves = instr->parallel_moves()[i]; |
117 | if (moves == nullptr) continue; |
118 | for (MoveOperands* move : *moves) { |
119 | if (!move->IsRedundant()) return i; |
120 | move->Eliminate(); |
121 | } |
122 | moves->clear(); // Clear this redundant move. |
123 | } |
124 | return i; |
125 | } |
126 | |
127 | } // namespace |
128 | |
129 | MoveOptimizer::MoveOptimizer(Zone* local_zone, InstructionSequence* code) |
130 | : local_zone_(local_zone), |
131 | code_(code), |
132 | local_vector_(local_zone), |
133 | operand_buffer1(local_zone), |
134 | operand_buffer2(local_zone) {} |
135 | |
136 | void MoveOptimizer::Run() { |
137 | for (Instruction* instruction : code()->instructions()) { |
138 | CompressGaps(instruction); |
139 | } |
140 | for (InstructionBlock* block : code()->instruction_blocks()) { |
141 | CompressBlock(block); |
142 | } |
143 | for (InstructionBlock* block : code()->instruction_blocks()) { |
144 | if (block->PredecessorCount() <= 1) continue; |
145 | if (!block->IsDeferred()) { |
146 | bool has_only_deferred = true; |
147 | for (RpoNumber& pred_id : block->predecessors()) { |
148 | if (!code()->InstructionBlockAt(pred_id)->IsDeferred()) { |
149 | has_only_deferred = false; |
150 | break; |
151 | } |
152 | } |
153 | // This would pull down common moves. If the moves occur in deferred |
154 | // blocks, and the closest common successor is not deferred, we lose the |
155 | // optimization of just spilling/filling in deferred blocks, when the |
156 | // current block is not deferred. |
157 | if (has_only_deferred) continue; |
158 | } |
159 | OptimizeMerge(block); |
160 | } |
161 | for (Instruction* gap : code()->instructions()) { |
162 | FinalizeMoves(gap); |
163 | } |
164 | } |
165 | |
166 | void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) { |
167 | if (instruction->IsCall()) return; |
168 | ParallelMove* moves = instruction->parallel_moves()[0]; |
169 | if (moves == nullptr) return; |
170 | |
171 | DCHECK(instruction->parallel_moves()[1] == nullptr || |
172 | instruction->parallel_moves()[1]->empty()); |
173 | |
174 | OperandSet outputs(&operand_buffer1); |
175 | OperandSet inputs(&operand_buffer2); |
176 | |
177 | // Outputs and temps are treated together as potentially clobbering a |
178 | // destination operand. |
179 | for (size_t i = 0; i < instruction->OutputCount(); ++i) { |
180 | outputs.InsertOp(*instruction->OutputAt(i)); |
181 | } |
182 | for (size_t i = 0; i < instruction->TempCount(); ++i) { |
183 | outputs.InsertOp(*instruction->TempAt(i)); |
184 | } |
185 | |
186 | // Input operands block elisions. |
187 | for (size_t i = 0; i < instruction->InputCount(); ++i) { |
188 | inputs.InsertOp(*instruction->InputAt(i)); |
189 | } |
190 | |
191 | // Elide moves made redundant by the instruction. |
192 | for (MoveOperands* move : *moves) { |
193 | if (outputs.ContainsOpOrAlias(move->destination()) && |
194 | !inputs.ContainsOpOrAlias(move->destination())) { |
195 | move->Eliminate(); |
196 | } |
197 | } |
198 | |
199 | // The ret instruction makes any assignment before it unnecessary, except for |
200 | // the one for its input. |
201 | if (instruction->IsRet() || instruction->IsTailCall()) { |
202 | for (MoveOperands* move : *moves) { |
203 | if (!inputs.ContainsOpOrAlias(move->destination())) { |
204 | move->Eliminate(); |
205 | } |
206 | } |
207 | } |
208 | } |
209 | |
210 | void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) { |
211 | if (from->IsCall()) return; |
212 | |
213 | ParallelMove* from_moves = from->parallel_moves()[0]; |
214 | if (from_moves == nullptr || from_moves->empty()) return; |
215 | |
216 | OperandSet dst_cant_be(&operand_buffer1); |
217 | OperandSet src_cant_be(&operand_buffer2); |
218 | |
219 | // If an operand is an input to the instruction, we cannot move assignments |
220 | // where it appears on the LHS. |
221 | for (size_t i = 0; i < from->InputCount(); ++i) { |
222 | dst_cant_be.InsertOp(*from->InputAt(i)); |
223 | } |
224 | // If an operand is output to the instruction, we cannot move assignments |
225 | // where it appears on the RHS, because we would lose its value before the |
226 | // instruction. |
227 | // Same for temp operands. |
228 | // The output can't appear on the LHS because we performed |
229 | // RemoveClobberedDestinations for the "from" instruction. |
230 | for (size_t i = 0; i < from->OutputCount(); ++i) { |
231 | src_cant_be.InsertOp(*from->OutputAt(i)); |
232 | } |
233 | for (size_t i = 0; i < from->TempCount(); ++i) { |
234 | src_cant_be.InsertOp(*from->TempAt(i)); |
235 | } |
236 | for (MoveOperands* move : *from_moves) { |
237 | if (move->IsRedundant()) continue; |
238 | // Assume dest has a value "V". If we have a "dest = y" move, then we can't |
239 | // move "z = dest", because z would become y rather than "V". |
240 | // We assume CompressMoves has happened before this, which means we don't |
241 | // have more than one assignment to dest. |
242 | src_cant_be.InsertOp(move->destination()); |
243 | } |
244 | |
245 | ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone()); |
246 | // We start with all the moves that don't have conflicting source or |
247 | // destination operands are eligible for being moved down. |
248 | for (MoveOperands* move : *from_moves) { |
249 | if (move->IsRedundant()) continue; |
250 | if (!dst_cant_be.ContainsOpOrAlias(move->destination())) { |
251 | MoveKey key = {move->source(), move->destination()}; |
252 | move_candidates.insert(key); |
253 | } |
254 | } |
255 | if (move_candidates.empty()) return; |
256 | |
257 | // Stabilize the candidate set. |
258 | bool changed = false; |
259 | do { |
260 | changed = false; |
261 | for (auto iter = move_candidates.begin(); iter != move_candidates.end();) { |
262 | auto current = iter; |
263 | ++iter; |
264 | InstructionOperand src = current->source; |
265 | if (src_cant_be.ContainsOpOrAlias(src)) { |
266 | src_cant_be.InsertOp(current->destination); |
267 | move_candidates.erase(current); |
268 | changed = true; |
269 | } |
270 | } |
271 | } while (changed); |
272 | |
273 | ParallelMove to_move(local_zone()); |
274 | for (MoveOperands* move : *from_moves) { |
275 | if (move->IsRedundant()) continue; |
276 | MoveKey key = {move->source(), move->destination()}; |
277 | if (move_candidates.find(key) != move_candidates.end()) { |
278 | to_move.AddMove(move->source(), move->destination(), code_zone()); |
279 | move->Eliminate(); |
280 | } |
281 | } |
282 | if (to_move.empty()) return; |
283 | |
284 | ParallelMove* dest = |
285 | to->GetOrCreateParallelMove(Instruction::GapPosition::START, code_zone()); |
286 | |
287 | CompressMoves(&to_move, dest); |
288 | DCHECK(dest->empty()); |
289 | for (MoveOperands* m : to_move) { |
290 | dest->push_back(m); |
291 | } |
292 | } |
293 | |
294 | void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) { |
295 | if (right == nullptr) return; |
296 | |
297 | MoveOpVector& eliminated = local_vector(); |
298 | DCHECK(eliminated.empty()); |
299 | |
300 | if (!left->empty()) { |
301 | // Modify the right moves in place and collect moves that will be killed by |
302 | // merging the two gaps. |
303 | for (MoveOperands* move : *right) { |
304 | if (move->IsRedundant()) continue; |
305 | left->PrepareInsertAfter(move, &eliminated); |
306 | } |
307 | // Eliminate dead moves. |
308 | for (MoveOperands* to_eliminate : eliminated) { |
309 | to_eliminate->Eliminate(); |
310 | } |
311 | eliminated.clear(); |
312 | } |
313 | // Add all possibly modified moves from right side. |
314 | for (MoveOperands* move : *right) { |
315 | if (move->IsRedundant()) continue; |
316 | left->push_back(move); |
317 | } |
318 | // Nuke right. |
319 | right->clear(); |
320 | DCHECK(eliminated.empty()); |
321 | } |
322 | |
323 | void MoveOptimizer::CompressGaps(Instruction* instruction) { |
324 | int i = FindFirstNonEmptySlot(instruction); |
325 | bool has_moves = i <= Instruction::LAST_GAP_POSITION; |
326 | USE(has_moves); |
327 | |
328 | if (i == Instruction::LAST_GAP_POSITION) { |
329 | std::swap(instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION], |
330 | instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]); |
331 | } else if (i == Instruction::FIRST_GAP_POSITION) { |
332 | CompressMoves( |
333 | instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION], |
334 | instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]); |
335 | } |
336 | // We either have no moves, or, after swapping or compressing, we have |
337 | // all the moves in the first gap position, and none in the second/end gap |
338 | // position. |
339 | ParallelMove* first = |
340 | instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION]; |
341 | ParallelMove* last = |
342 | instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]; |
343 | USE(first); |
344 | USE(last); |
345 | |
346 | DCHECK(!has_moves || |
347 | (first != nullptr && (last == nullptr || last->empty()))); |
348 | } |
349 | |
350 | void MoveOptimizer::CompressBlock(InstructionBlock* block) { |
351 | int first_instr_index = block->first_instruction_index(); |
352 | int last_instr_index = block->last_instruction_index(); |
353 | |
354 | // Start by removing gap assignments where the output of the subsequent |
355 | // instruction appears on LHS, as long as they are not needed by its input. |
356 | Instruction* prev_instr = code()->instructions()[first_instr_index]; |
357 | RemoveClobberedDestinations(prev_instr); |
358 | |
359 | for (int index = first_instr_index + 1; index <= last_instr_index; ++index) { |
360 | Instruction* instr = code()->instructions()[index]; |
361 | // Migrate to the gap of prev_instr eligible moves from instr. |
362 | MigrateMoves(instr, prev_instr); |
363 | // Remove gap assignments clobbered by instr's output. |
364 | RemoveClobberedDestinations(instr); |
365 | prev_instr = instr; |
366 | } |
367 | } |
368 | |
369 | const Instruction* MoveOptimizer::LastInstruction( |
370 | const InstructionBlock* block) const { |
371 | return code()->instructions()[block->last_instruction_index()]; |
372 | } |
373 | |
374 | void MoveOptimizer::OptimizeMerge(InstructionBlock* block) { |
375 | DCHECK_LT(1, block->PredecessorCount()); |
376 | // Ensure that the last instruction in all incoming blocks don't contain |
377 | // things that would prevent moving gap moves across them. |
378 | for (RpoNumber& pred_index : block->predecessors()) { |
379 | const InstructionBlock* pred = code()->InstructionBlockAt(pred_index); |
380 | |
381 | // If the predecessor has more than one successor, we shouldn't attempt to |
382 | // move down to this block (one of the successors) any of the gap moves, |
383 | // because their effect may be necessary to the other successors. |
384 | if (pred->SuccessorCount() > 1) return; |
385 | |
386 | const Instruction* last_instr = |
387 | code()->instructions()[pred->last_instruction_index()]; |
388 | if (last_instr->IsCall()) return; |
389 | if (last_instr->TempCount() != 0) return; |
390 | if (last_instr->OutputCount() != 0) return; |
391 | for (size_t i = 0; i < last_instr->InputCount(); ++i) { |
392 | const InstructionOperand* op = last_instr->InputAt(i); |
393 | if (!op->IsConstant() && !op->IsImmediate()) return; |
394 | } |
395 | } |
396 | // TODO(dcarney): pass a ZoneStats down for this? |
397 | MoveMap move_map(local_zone()); |
398 | size_t correct_counts = 0; |
399 | // Accumulate set of shared moves. |
400 | for (RpoNumber& pred_index : block->predecessors()) { |
401 | const InstructionBlock* pred = code()->InstructionBlockAt(pred_index); |
402 | const Instruction* instr = LastInstruction(pred); |
403 | if (instr->parallel_moves()[0] == nullptr || |
404 | instr->parallel_moves()[0]->empty()) { |
405 | return; |
406 | } |
407 | for (const MoveOperands* move : *instr->parallel_moves()[0]) { |
408 | if (move->IsRedundant()) continue; |
409 | InstructionOperand src = move->source(); |
410 | InstructionOperand dst = move->destination(); |
411 | MoveKey key = {src, dst}; |
412 | auto res = move_map.insert(std::make_pair(key, 1)); |
413 | if (!res.second) { |
414 | res.first->second++; |
415 | if (res.first->second == block->PredecessorCount()) { |
416 | correct_counts++; |
417 | } |
418 | } |
419 | } |
420 | } |
421 | if (move_map.empty() || correct_counts == 0) return; |
422 | |
423 | // Find insertion point. |
424 | Instruction* instr = code()->instructions()[block->first_instruction_index()]; |
425 | |
426 | if (correct_counts != move_map.size()) { |
427 | // Moves that are unique to each predecessor won't be pushed to the common |
428 | // successor. |
429 | OperandSet conflicting_srcs(&operand_buffer1); |
430 | for (auto iter = move_map.begin(), end = move_map.end(); iter != end;) { |
431 | auto current = iter; |
432 | ++iter; |
433 | if (current->second != block->PredecessorCount()) { |
434 | InstructionOperand dest = current->first.destination; |
435 | // Not all the moves in all the gaps are the same. Maybe some are. If |
436 | // there are such moves, we could move them, but the destination of the |
437 | // moves staying behind can't appear as a source of a common move, |
438 | // because the move staying behind will clobber this destination. |
439 | conflicting_srcs.InsertOp(dest); |
440 | move_map.erase(current); |
441 | } |
442 | } |
443 | |
444 | bool changed = false; |
445 | do { |
446 | // If a common move can't be pushed to the common successor, then its |
447 | // destination also can't appear as source to any move being pushed. |
448 | changed = false; |
449 | for (auto iter = move_map.begin(), end = move_map.end(); iter != end;) { |
450 | auto current = iter; |
451 | ++iter; |
452 | DCHECK_EQ(block->PredecessorCount(), current->second); |
453 | if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) { |
454 | conflicting_srcs.InsertOp(current->first.destination); |
455 | move_map.erase(current); |
456 | changed = true; |
457 | } |
458 | } |
459 | } while (changed); |
460 | } |
461 | |
462 | if (move_map.empty()) return; |
463 | |
464 | DCHECK_NOT_NULL(instr); |
465 | bool gap_initialized = true; |
466 | if (instr->parallel_moves()[0] != nullptr && |
467 | !instr->parallel_moves()[0]->empty()) { |
468 | // Will compress after insertion. |
469 | gap_initialized = false; |
470 | std::swap(instr->parallel_moves()[0], instr->parallel_moves()[1]); |
471 | } |
472 | ParallelMove* moves = instr->GetOrCreateParallelMove( |
473 | static_cast<Instruction::GapPosition>(0), code_zone()); |
474 | // Delete relevant entries in predecessors and move everything to block. |
475 | bool first_iteration = true; |
476 | for (RpoNumber& pred_index : block->predecessors()) { |
477 | const InstructionBlock* pred = code()->InstructionBlockAt(pred_index); |
478 | for (MoveOperands* move : *LastInstruction(pred)->parallel_moves()[0]) { |
479 | if (move->IsRedundant()) continue; |
480 | MoveKey key = {move->source(), move->destination()}; |
481 | auto it = move_map.find(key); |
482 | if (it != move_map.end()) { |
483 | if (first_iteration) { |
484 | moves->AddMove(move->source(), move->destination()); |
485 | } |
486 | move->Eliminate(); |
487 | } |
488 | } |
489 | first_iteration = false; |
490 | } |
491 | // Compress. |
492 | if (!gap_initialized) { |
493 | CompressMoves(instr->parallel_moves()[0], instr->parallel_moves()[1]); |
494 | } |
495 | CompressBlock(block); |
496 | } |
497 | |
498 | namespace { |
499 | |
500 | bool IsSlot(const InstructionOperand& op) { |
501 | return op.IsStackSlot() || op.IsFPStackSlot(); |
502 | } |
503 | |
504 | bool LoadCompare(const MoveOperands* a, const MoveOperands* b) { |
505 | if (!a->source().EqualsCanonicalized(b->source())) { |
506 | return a->source().CompareCanonicalized(b->source()); |
507 | } |
508 | if (IsSlot(a->destination()) && !IsSlot(b->destination())) return false; |
509 | if (!IsSlot(a->destination()) && IsSlot(b->destination())) return true; |
510 | return a->destination().CompareCanonicalized(b->destination()); |
511 | } |
512 | |
513 | } // namespace |
514 | |
515 | // Split multiple loads of the same constant or stack slot off into the second |
516 | // slot and keep remaining moves in the first slot. |
517 | void MoveOptimizer::FinalizeMoves(Instruction* instr) { |
518 | MoveOpVector& loads = local_vector(); |
519 | DCHECK(loads.empty()); |
520 | |
521 | ParallelMove* parallel_moves = instr->parallel_moves()[0]; |
522 | if (parallel_moves == nullptr) return; |
523 | // Find all the loads. |
524 | for (MoveOperands* move : *parallel_moves) { |
525 | if (move->IsRedundant()) continue; |
526 | if (move->source().IsConstant() || IsSlot(move->source())) { |
527 | loads.push_back(move); |
528 | } |
529 | } |
530 | if (loads.empty()) return; |
531 | // Group the loads by source, moving the preferred destination to the |
532 | // beginning of the group. |
533 | std::sort(loads.begin(), loads.end(), LoadCompare); |
534 | MoveOperands* group_begin = nullptr; |
535 | for (MoveOperands* load : loads) { |
536 | // New group. |
537 | if (group_begin == nullptr || |
538 | !load->source().EqualsCanonicalized(group_begin->source())) { |
539 | group_begin = load; |
540 | continue; |
541 | } |
542 | // Nothing to be gained from splitting here. |
543 | if (IsSlot(group_begin->destination())) continue; |
544 | // Insert new move into slot 1. |
545 | ParallelMove* slot_1 = instr->GetOrCreateParallelMove( |
546 | static_cast<Instruction::GapPosition>(1), code_zone()); |
547 | slot_1->AddMove(group_begin->destination(), load->destination()); |
548 | load->Eliminate(); |
549 | } |
550 | loads.clear(); |
551 | } |
552 | |
553 | } // namespace compiler |
554 | } // namespace internal |
555 | } // namespace v8 |
556 | |