1 | // Copyright 2015 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #include "src/compiler/backend/instruction-scheduler.h" |
6 | |
7 | #include "src/base/adapters.h" |
8 | #include "src/base/utils/random-number-generator.h" |
9 | #include "src/isolate.h" |
10 | |
11 | namespace v8 { |
12 | namespace internal { |
13 | namespace compiler { |
14 | |
15 | void InstructionScheduler::SchedulingQueueBase::AddNode( |
16 | ScheduleGraphNode* node) { |
17 | // We keep the ready list sorted by total latency so that we can quickly find |
18 | // the next best candidate to schedule. |
19 | auto it = nodes_.begin(); |
20 | while ((it != nodes_.end()) && |
21 | ((*it)->total_latency() >= node->total_latency())) { |
22 | ++it; |
23 | } |
24 | nodes_.insert(it, node); |
25 | } |
26 | |
27 | InstructionScheduler::ScheduleGraphNode* |
28 | InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { |
29 | DCHECK(!IsEmpty()); |
30 | auto candidate = nodes_.end(); |
31 | for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { |
32 | // We only consider instructions that have all their operands ready. |
33 | if (cycle >= (*iterator)->start_cycle()) { |
34 | candidate = iterator; |
35 | break; |
36 | } |
37 | } |
38 | |
39 | if (candidate != nodes_.end()) { |
40 | ScheduleGraphNode* result = *candidate; |
41 | nodes_.erase(candidate); |
42 | return result; |
43 | } |
44 | |
45 | return nullptr; |
46 | } |
47 | |
48 | InstructionScheduler::ScheduleGraphNode* |
49 | InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { |
50 | DCHECK(!IsEmpty()); |
51 | // Choose a random element from the ready list. |
52 | auto candidate = nodes_.begin(); |
53 | std::advance(candidate, isolate()->random_number_generator()->NextInt( |
54 | static_cast<int>(nodes_.size()))); |
55 | ScheduleGraphNode* result = *candidate; |
56 | nodes_.erase(candidate); |
57 | return result; |
58 | } |
59 | |
60 | InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone, |
61 | Instruction* instr) |
62 | : instr_(instr), |
63 | successors_(zone), |
64 | unscheduled_predecessors_count_(0), |
65 | latency_(GetInstructionLatency(instr)), |
66 | total_latency_(-1), |
67 | start_cycle_(-1) {} |
68 | |
69 | void InstructionScheduler::ScheduleGraphNode::AddSuccessor( |
70 | ScheduleGraphNode* node) { |
71 | successors_.push_back(node); |
72 | node->unscheduled_predecessors_count_++; |
73 | } |
74 | |
75 | InstructionScheduler::InstructionScheduler(Zone* zone, |
76 | InstructionSequence* sequence) |
77 | : zone_(zone), |
78 | sequence_(sequence), |
79 | graph_(zone), |
80 | last_side_effect_instr_(nullptr), |
81 | pending_loads_(zone), |
82 | last_live_in_reg_marker_(nullptr), |
83 | last_deopt_or_trap_(nullptr), |
84 | operands_map_(zone) {} |
85 | |
86 | void InstructionScheduler::StartBlock(RpoNumber rpo) { |
87 | DCHECK(graph_.empty()); |
88 | DCHECK_NULL(last_side_effect_instr_); |
89 | DCHECK(pending_loads_.empty()); |
90 | DCHECK_NULL(last_live_in_reg_marker_); |
91 | DCHECK_NULL(last_deopt_or_trap_); |
92 | DCHECK(operands_map_.empty()); |
93 | sequence()->StartBlock(rpo); |
94 | } |
95 | |
96 | void InstructionScheduler::EndBlock(RpoNumber rpo) { |
97 | if (FLAG_turbo_stress_instruction_scheduling) { |
98 | ScheduleBlock<StressSchedulerQueue>(); |
99 | } else { |
100 | ScheduleBlock<CriticalPathFirstQueue>(); |
101 | } |
102 | sequence()->EndBlock(rpo); |
103 | graph_.clear(); |
104 | last_side_effect_instr_ = nullptr; |
105 | pending_loads_.clear(); |
106 | last_live_in_reg_marker_ = nullptr; |
107 | last_deopt_or_trap_ = nullptr; |
108 | operands_map_.clear(); |
109 | } |
110 | |
111 | void InstructionScheduler::AddTerminator(Instruction* instr) { |
112 | ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); |
113 | // Make sure that basic block terminators are not moved by adding them |
114 | // as successor of every instruction. |
115 | for (ScheduleGraphNode* node : graph_) { |
116 | node->AddSuccessor(new_node); |
117 | } |
118 | graph_.push_back(new_node); |
119 | } |
120 | |
121 | void InstructionScheduler::AddInstruction(Instruction* instr) { |
122 | ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); |
123 | |
124 | // We should not have branches in the middle of a block. |
125 | DCHECK_NE(instr->flags_mode(), kFlags_branch); |
126 | DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison); |
127 | |
128 | if (IsFixedRegisterParameter(instr)) { |
129 | if (last_live_in_reg_marker_ != nullptr) { |
130 | last_live_in_reg_marker_->AddSuccessor(new_node); |
131 | } |
132 | last_live_in_reg_marker_ = new_node; |
133 | } else { |
134 | if (last_live_in_reg_marker_ != nullptr) { |
135 | last_live_in_reg_marker_->AddSuccessor(new_node); |
136 | } |
137 | |
138 | // Make sure that instructions are not scheduled before the last |
139 | // deoptimization or trap point when they depend on it. |
140 | if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) { |
141 | last_deopt_or_trap_->AddSuccessor(new_node); |
142 | } |
143 | |
144 | // Instructions with side effects and memory operations can't be |
145 | // reordered with respect to each other. |
146 | if (HasSideEffect(instr)) { |
147 | if (last_side_effect_instr_ != nullptr) { |
148 | last_side_effect_instr_->AddSuccessor(new_node); |
149 | } |
150 | for (ScheduleGraphNode* load : pending_loads_) { |
151 | load->AddSuccessor(new_node); |
152 | } |
153 | pending_loads_.clear(); |
154 | last_side_effect_instr_ = new_node; |
155 | } else if (IsLoadOperation(instr)) { |
156 | // Load operations can't be reordered with side effects instructions but |
157 | // independent loads can be reordered with respect to each other. |
158 | if (last_side_effect_instr_ != nullptr) { |
159 | last_side_effect_instr_->AddSuccessor(new_node); |
160 | } |
161 | pending_loads_.push_back(new_node); |
162 | } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) { |
163 | // Ensure that deopts or traps are not reordered with respect to |
164 | // side-effect instructions. |
165 | if (last_side_effect_instr_ != nullptr) { |
166 | last_side_effect_instr_->AddSuccessor(new_node); |
167 | } |
168 | last_deopt_or_trap_ = new_node; |
169 | } |
170 | |
171 | // Look for operand dependencies. |
172 | for (size_t i = 0; i < instr->InputCount(); ++i) { |
173 | const InstructionOperand* input = instr->InputAt(i); |
174 | if (input->IsUnallocated()) { |
175 | int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); |
176 | auto it = operands_map_.find(vreg); |
177 | if (it != operands_map_.end()) { |
178 | it->second->AddSuccessor(new_node); |
179 | } |
180 | } |
181 | } |
182 | |
183 | // Record the virtual registers defined by this instruction. |
184 | for (size_t i = 0; i < instr->OutputCount(); ++i) { |
185 | const InstructionOperand* output = instr->OutputAt(i); |
186 | if (output->IsUnallocated()) { |
187 | operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = |
188 | new_node; |
189 | } else if (output->IsConstant()) { |
190 | operands_map_[ConstantOperand::cast(output)->virtual_register()] = |
191 | new_node; |
192 | } |
193 | } |
194 | } |
195 | |
196 | graph_.push_back(new_node); |
197 | } |
198 | |
199 | template <typename QueueType> |
200 | void InstructionScheduler::ScheduleBlock() { |
201 | QueueType ready_list(this); |
202 | |
203 | // Compute total latencies so that we can schedule the critical path first. |
204 | ComputeTotalLatencies(); |
205 | |
206 | // Add nodes which don't have dependencies to the ready list. |
207 | for (ScheduleGraphNode* node : graph_) { |
208 | if (!node->HasUnscheduledPredecessor()) { |
209 | ready_list.AddNode(node); |
210 | } |
211 | } |
212 | |
213 | // Go through the ready list and schedule the instructions. |
214 | int cycle = 0; |
215 | while (!ready_list.IsEmpty()) { |
216 | ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); |
217 | |
218 | if (candidate != nullptr) { |
219 | sequence()->AddInstruction(candidate->instruction()); |
220 | |
221 | for (ScheduleGraphNode* successor : candidate->successors()) { |
222 | successor->DropUnscheduledPredecessor(); |
223 | successor->set_start_cycle( |
224 | std::max(successor->start_cycle(), cycle + candidate->latency())); |
225 | |
226 | if (!successor->HasUnscheduledPredecessor()) { |
227 | ready_list.AddNode(successor); |
228 | } |
229 | } |
230 | } |
231 | |
232 | cycle++; |
233 | } |
234 | } |
235 | |
236 | int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { |
237 | switch (instr->arch_opcode()) { |
238 | case kArchNop: |
239 | case kArchFramePointer: |
240 | case kArchParentFramePointer: |
241 | case kArchStackSlot: // Despite its name this opcode will produce a |
242 | // reference to a frame slot, so it is not affected |
243 | // by the arm64 dual stack issues mentioned below. |
244 | case kArchComment: |
245 | case kArchDeoptimize: |
246 | case kArchJmp: |
247 | case kArchBinarySearchSwitch: |
248 | case kArchLookupSwitch: |
249 | case kArchRet: |
250 | case kArchTableSwitch: |
251 | case kArchThrowTerminator: |
252 | return kNoOpcodeFlags; |
253 | |
254 | case kArchTruncateDoubleToI: |
255 | case kIeee754Float64Acos: |
256 | case kIeee754Float64Acosh: |
257 | case kIeee754Float64Asin: |
258 | case kIeee754Float64Asinh: |
259 | case kIeee754Float64Atan: |
260 | case kIeee754Float64Atanh: |
261 | case kIeee754Float64Atan2: |
262 | case kIeee754Float64Cbrt: |
263 | case kIeee754Float64Cos: |
264 | case kIeee754Float64Cosh: |
265 | case kIeee754Float64Exp: |
266 | case kIeee754Float64Expm1: |
267 | case kIeee754Float64Log: |
268 | case kIeee754Float64Log1p: |
269 | case kIeee754Float64Log10: |
270 | case kIeee754Float64Log2: |
271 | case kIeee754Float64Pow: |
272 | case kIeee754Float64Sin: |
273 | case kIeee754Float64Sinh: |
274 | case kIeee754Float64Tan: |
275 | case kIeee754Float64Tanh: |
276 | return kNoOpcodeFlags; |
277 | |
278 | case kArchStackPointer: |
279 | // ArchStackPointer instruction loads the current stack pointer value and |
280 | // must not be reordered with instruction with side effects. |
281 | return kIsLoadOperation; |
282 | |
283 | case kArchWordPoisonOnSpeculation: |
284 | // While poisoning operations have no side effect, they must not be |
285 | // reordered relative to branches. |
286 | return kHasSideEffect; |
287 | |
288 | case kArchPrepareCallCFunction: |
289 | case kArchSaveCallerRegisters: |
290 | case kArchRestoreCallerRegisters: |
291 | case kArchPrepareTailCall: |
292 | case kArchCallCFunction: |
293 | case kArchCallCodeObject: |
294 | case kArchCallJSFunction: |
295 | case kArchCallWasmFunction: |
296 | case kArchCallBuiltinPointer: |
297 | case kArchTailCallCodeObjectFromJSFunction: |
298 | case kArchTailCallCodeObject: |
299 | case kArchTailCallAddress: |
300 | case kArchTailCallWasm: |
301 | case kArchDebugAbort: |
302 | case kArchDebugBreak: |
303 | return kHasSideEffect; |
304 | |
305 | case kArchStoreWithWriteBarrier: |
306 | return kHasSideEffect; |
307 | |
308 | case kWord32AtomicLoadInt8: |
309 | case kWord32AtomicLoadUint8: |
310 | case kWord32AtomicLoadInt16: |
311 | case kWord32AtomicLoadUint16: |
312 | case kWord32AtomicLoadWord32: |
313 | return kIsLoadOperation; |
314 | |
315 | case kWord32AtomicStoreWord8: |
316 | case kWord32AtomicStoreWord16: |
317 | case kWord32AtomicStoreWord32: |
318 | return kHasSideEffect; |
319 | |
320 | case kWord32AtomicExchangeInt8: |
321 | case kWord32AtomicExchangeUint8: |
322 | case kWord32AtomicExchangeInt16: |
323 | case kWord32AtomicExchangeUint16: |
324 | case kWord32AtomicExchangeWord32: |
325 | case kWord32AtomicCompareExchangeInt8: |
326 | case kWord32AtomicCompareExchangeUint8: |
327 | case kWord32AtomicCompareExchangeInt16: |
328 | case kWord32AtomicCompareExchangeUint16: |
329 | case kWord32AtomicCompareExchangeWord32: |
330 | case kWord32AtomicAddInt8: |
331 | case kWord32AtomicAddUint8: |
332 | case kWord32AtomicAddInt16: |
333 | case kWord32AtomicAddUint16: |
334 | case kWord32AtomicAddWord32: |
335 | case kWord32AtomicSubInt8: |
336 | case kWord32AtomicSubUint8: |
337 | case kWord32AtomicSubInt16: |
338 | case kWord32AtomicSubUint16: |
339 | case kWord32AtomicSubWord32: |
340 | case kWord32AtomicAndInt8: |
341 | case kWord32AtomicAndUint8: |
342 | case kWord32AtomicAndInt16: |
343 | case kWord32AtomicAndUint16: |
344 | case kWord32AtomicAndWord32: |
345 | case kWord32AtomicOrInt8: |
346 | case kWord32AtomicOrUint8: |
347 | case kWord32AtomicOrInt16: |
348 | case kWord32AtomicOrUint16: |
349 | case kWord32AtomicOrWord32: |
350 | case kWord32AtomicXorInt8: |
351 | case kWord32AtomicXorUint8: |
352 | case kWord32AtomicXorInt16: |
353 | case kWord32AtomicXorUint16: |
354 | case kWord32AtomicXorWord32: |
355 | return kHasSideEffect; |
356 | |
357 | #define CASE(Name) case k##Name: |
358 | TARGET_ARCH_OPCODE_LIST(CASE) |
359 | #undef CASE |
360 | return GetTargetInstructionFlags(instr); |
361 | } |
362 | |
363 | UNREACHABLE(); |
364 | } |
365 | |
366 | void InstructionScheduler::ComputeTotalLatencies() { |
367 | for (ScheduleGraphNode* node : base::Reversed(graph_)) { |
368 | int max_latency = 0; |
369 | |
370 | for (ScheduleGraphNode* successor : node->successors()) { |
371 | DCHECK_NE(-1, successor->total_latency()); |
372 | if (successor->total_latency() > max_latency) { |
373 | max_latency = successor->total_latency(); |
374 | } |
375 | } |
376 | |
377 | node->set_total_latency(max_latency + node->latency()); |
378 | } |
379 | } |
380 | |
381 | } // namespace compiler |
382 | } // namespace internal |
383 | } // namespace v8 |
384 | |