1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/backend/instruction-scheduler.h"
6
7#include "src/base/adapters.h"
8#include "src/base/utils/random-number-generator.h"
9#include "src/isolate.h"
10
11namespace v8 {
12namespace internal {
13namespace compiler {
14
15void InstructionScheduler::SchedulingQueueBase::AddNode(
16 ScheduleGraphNode* node) {
17 // We keep the ready list sorted by total latency so that we can quickly find
18 // the next best candidate to schedule.
19 auto it = nodes_.begin();
20 while ((it != nodes_.end()) &&
21 ((*it)->total_latency() >= node->total_latency())) {
22 ++it;
23 }
24 nodes_.insert(it, node);
25}
26
27InstructionScheduler::ScheduleGraphNode*
28InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 DCHECK(!IsEmpty());
30 auto candidate = nodes_.end();
31 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 // We only consider instructions that have all their operands ready.
33 if (cycle >= (*iterator)->start_cycle()) {
34 candidate = iterator;
35 break;
36 }
37 }
38
39 if (candidate != nodes_.end()) {
40 ScheduleGraphNode* result = *candidate;
41 nodes_.erase(candidate);
42 return result;
43 }
44
45 return nullptr;
46}
47
48InstructionScheduler::ScheduleGraphNode*
49InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50 DCHECK(!IsEmpty());
51 // Choose a random element from the ready list.
52 auto candidate = nodes_.begin();
53 std::advance(candidate, isolate()->random_number_generator()->NextInt(
54 static_cast<int>(nodes_.size())));
55 ScheduleGraphNode* result = *candidate;
56 nodes_.erase(candidate);
57 return result;
58}
59
60InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61 Instruction* instr)
62 : instr_(instr),
63 successors_(zone),
64 unscheduled_predecessors_count_(0),
65 latency_(GetInstructionLatency(instr)),
66 total_latency_(-1),
67 start_cycle_(-1) {}
68
69void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70 ScheduleGraphNode* node) {
71 successors_.push_back(node);
72 node->unscheduled_predecessors_count_++;
73}
74
75InstructionScheduler::InstructionScheduler(Zone* zone,
76 InstructionSequence* sequence)
77 : zone_(zone),
78 sequence_(sequence),
79 graph_(zone),
80 last_side_effect_instr_(nullptr),
81 pending_loads_(zone),
82 last_live_in_reg_marker_(nullptr),
83 last_deopt_or_trap_(nullptr),
84 operands_map_(zone) {}
85
86void InstructionScheduler::StartBlock(RpoNumber rpo) {
87 DCHECK(graph_.empty());
88 DCHECK_NULL(last_side_effect_instr_);
89 DCHECK(pending_loads_.empty());
90 DCHECK_NULL(last_live_in_reg_marker_);
91 DCHECK_NULL(last_deopt_or_trap_);
92 DCHECK(operands_map_.empty());
93 sequence()->StartBlock(rpo);
94}
95
96void InstructionScheduler::EndBlock(RpoNumber rpo) {
97 if (FLAG_turbo_stress_instruction_scheduling) {
98 ScheduleBlock<StressSchedulerQueue>();
99 } else {
100 ScheduleBlock<CriticalPathFirstQueue>();
101 }
102 sequence()->EndBlock(rpo);
103 graph_.clear();
104 last_side_effect_instr_ = nullptr;
105 pending_loads_.clear();
106 last_live_in_reg_marker_ = nullptr;
107 last_deopt_or_trap_ = nullptr;
108 operands_map_.clear();
109}
110
111void InstructionScheduler::AddTerminator(Instruction* instr) {
112 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
113 // Make sure that basic block terminators are not moved by adding them
114 // as successor of every instruction.
115 for (ScheduleGraphNode* node : graph_) {
116 node->AddSuccessor(new_node);
117 }
118 graph_.push_back(new_node);
119}
120
121void InstructionScheduler::AddInstruction(Instruction* instr) {
122 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
123
124 // We should not have branches in the middle of a block.
125 DCHECK_NE(instr->flags_mode(), kFlags_branch);
126 DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
127
128 if (IsFixedRegisterParameter(instr)) {
129 if (last_live_in_reg_marker_ != nullptr) {
130 last_live_in_reg_marker_->AddSuccessor(new_node);
131 }
132 last_live_in_reg_marker_ = new_node;
133 } else {
134 if (last_live_in_reg_marker_ != nullptr) {
135 last_live_in_reg_marker_->AddSuccessor(new_node);
136 }
137
138 // Make sure that instructions are not scheduled before the last
139 // deoptimization or trap point when they depend on it.
140 if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
141 last_deopt_or_trap_->AddSuccessor(new_node);
142 }
143
144 // Instructions with side effects and memory operations can't be
145 // reordered with respect to each other.
146 if (HasSideEffect(instr)) {
147 if (last_side_effect_instr_ != nullptr) {
148 last_side_effect_instr_->AddSuccessor(new_node);
149 }
150 for (ScheduleGraphNode* load : pending_loads_) {
151 load->AddSuccessor(new_node);
152 }
153 pending_loads_.clear();
154 last_side_effect_instr_ = new_node;
155 } else if (IsLoadOperation(instr)) {
156 // Load operations can't be reordered with side effects instructions but
157 // independent loads can be reordered with respect to each other.
158 if (last_side_effect_instr_ != nullptr) {
159 last_side_effect_instr_->AddSuccessor(new_node);
160 }
161 pending_loads_.push_back(new_node);
162 } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
163 // Ensure that deopts or traps are not reordered with respect to
164 // side-effect instructions.
165 if (last_side_effect_instr_ != nullptr) {
166 last_side_effect_instr_->AddSuccessor(new_node);
167 }
168 last_deopt_or_trap_ = new_node;
169 }
170
171 // Look for operand dependencies.
172 for (size_t i = 0; i < instr->InputCount(); ++i) {
173 const InstructionOperand* input = instr->InputAt(i);
174 if (input->IsUnallocated()) {
175 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
176 auto it = operands_map_.find(vreg);
177 if (it != operands_map_.end()) {
178 it->second->AddSuccessor(new_node);
179 }
180 }
181 }
182
183 // Record the virtual registers defined by this instruction.
184 for (size_t i = 0; i < instr->OutputCount(); ++i) {
185 const InstructionOperand* output = instr->OutputAt(i);
186 if (output->IsUnallocated()) {
187 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
188 new_node;
189 } else if (output->IsConstant()) {
190 operands_map_[ConstantOperand::cast(output)->virtual_register()] =
191 new_node;
192 }
193 }
194 }
195
196 graph_.push_back(new_node);
197}
198
199template <typename QueueType>
200void InstructionScheduler::ScheduleBlock() {
201 QueueType ready_list(this);
202
203 // Compute total latencies so that we can schedule the critical path first.
204 ComputeTotalLatencies();
205
206 // Add nodes which don't have dependencies to the ready list.
207 for (ScheduleGraphNode* node : graph_) {
208 if (!node->HasUnscheduledPredecessor()) {
209 ready_list.AddNode(node);
210 }
211 }
212
213 // Go through the ready list and schedule the instructions.
214 int cycle = 0;
215 while (!ready_list.IsEmpty()) {
216 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
217
218 if (candidate != nullptr) {
219 sequence()->AddInstruction(candidate->instruction());
220
221 for (ScheduleGraphNode* successor : candidate->successors()) {
222 successor->DropUnscheduledPredecessor();
223 successor->set_start_cycle(
224 std::max(successor->start_cycle(), cycle + candidate->latency()));
225
226 if (!successor->HasUnscheduledPredecessor()) {
227 ready_list.AddNode(successor);
228 }
229 }
230 }
231
232 cycle++;
233 }
234}
235
236int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
237 switch (instr->arch_opcode()) {
238 case kArchNop:
239 case kArchFramePointer:
240 case kArchParentFramePointer:
241 case kArchStackSlot: // Despite its name this opcode will produce a
242 // reference to a frame slot, so it is not affected
243 // by the arm64 dual stack issues mentioned below.
244 case kArchComment:
245 case kArchDeoptimize:
246 case kArchJmp:
247 case kArchBinarySearchSwitch:
248 case kArchLookupSwitch:
249 case kArchRet:
250 case kArchTableSwitch:
251 case kArchThrowTerminator:
252 return kNoOpcodeFlags;
253
254 case kArchTruncateDoubleToI:
255 case kIeee754Float64Acos:
256 case kIeee754Float64Acosh:
257 case kIeee754Float64Asin:
258 case kIeee754Float64Asinh:
259 case kIeee754Float64Atan:
260 case kIeee754Float64Atanh:
261 case kIeee754Float64Atan2:
262 case kIeee754Float64Cbrt:
263 case kIeee754Float64Cos:
264 case kIeee754Float64Cosh:
265 case kIeee754Float64Exp:
266 case kIeee754Float64Expm1:
267 case kIeee754Float64Log:
268 case kIeee754Float64Log1p:
269 case kIeee754Float64Log10:
270 case kIeee754Float64Log2:
271 case kIeee754Float64Pow:
272 case kIeee754Float64Sin:
273 case kIeee754Float64Sinh:
274 case kIeee754Float64Tan:
275 case kIeee754Float64Tanh:
276 return kNoOpcodeFlags;
277
278 case kArchStackPointer:
279 // ArchStackPointer instruction loads the current stack pointer value and
280 // must not be reordered with instruction with side effects.
281 return kIsLoadOperation;
282
283 case kArchWordPoisonOnSpeculation:
284 // While poisoning operations have no side effect, they must not be
285 // reordered relative to branches.
286 return kHasSideEffect;
287
288 case kArchPrepareCallCFunction:
289 case kArchSaveCallerRegisters:
290 case kArchRestoreCallerRegisters:
291 case kArchPrepareTailCall:
292 case kArchCallCFunction:
293 case kArchCallCodeObject:
294 case kArchCallJSFunction:
295 case kArchCallWasmFunction:
296 case kArchCallBuiltinPointer:
297 case kArchTailCallCodeObjectFromJSFunction:
298 case kArchTailCallCodeObject:
299 case kArchTailCallAddress:
300 case kArchTailCallWasm:
301 case kArchDebugAbort:
302 case kArchDebugBreak:
303 return kHasSideEffect;
304
305 case kArchStoreWithWriteBarrier:
306 return kHasSideEffect;
307
308 case kWord32AtomicLoadInt8:
309 case kWord32AtomicLoadUint8:
310 case kWord32AtomicLoadInt16:
311 case kWord32AtomicLoadUint16:
312 case kWord32AtomicLoadWord32:
313 return kIsLoadOperation;
314
315 case kWord32AtomicStoreWord8:
316 case kWord32AtomicStoreWord16:
317 case kWord32AtomicStoreWord32:
318 return kHasSideEffect;
319
320 case kWord32AtomicExchangeInt8:
321 case kWord32AtomicExchangeUint8:
322 case kWord32AtomicExchangeInt16:
323 case kWord32AtomicExchangeUint16:
324 case kWord32AtomicExchangeWord32:
325 case kWord32AtomicCompareExchangeInt8:
326 case kWord32AtomicCompareExchangeUint8:
327 case kWord32AtomicCompareExchangeInt16:
328 case kWord32AtomicCompareExchangeUint16:
329 case kWord32AtomicCompareExchangeWord32:
330 case kWord32AtomicAddInt8:
331 case kWord32AtomicAddUint8:
332 case kWord32AtomicAddInt16:
333 case kWord32AtomicAddUint16:
334 case kWord32AtomicAddWord32:
335 case kWord32AtomicSubInt8:
336 case kWord32AtomicSubUint8:
337 case kWord32AtomicSubInt16:
338 case kWord32AtomicSubUint16:
339 case kWord32AtomicSubWord32:
340 case kWord32AtomicAndInt8:
341 case kWord32AtomicAndUint8:
342 case kWord32AtomicAndInt16:
343 case kWord32AtomicAndUint16:
344 case kWord32AtomicAndWord32:
345 case kWord32AtomicOrInt8:
346 case kWord32AtomicOrUint8:
347 case kWord32AtomicOrInt16:
348 case kWord32AtomicOrUint16:
349 case kWord32AtomicOrWord32:
350 case kWord32AtomicXorInt8:
351 case kWord32AtomicXorUint8:
352 case kWord32AtomicXorInt16:
353 case kWord32AtomicXorUint16:
354 case kWord32AtomicXorWord32:
355 return kHasSideEffect;
356
357#define CASE(Name) case k##Name:
358 TARGET_ARCH_OPCODE_LIST(CASE)
359#undef CASE
360 return GetTargetInstructionFlags(instr);
361 }
362
363 UNREACHABLE();
364}
365
366void InstructionScheduler::ComputeTotalLatencies() {
367 for (ScheduleGraphNode* node : base::Reversed(graph_)) {
368 int max_latency = 0;
369
370 for (ScheduleGraphNode* successor : node->successors()) {
371 DCHECK_NE(-1, successor->total_latency());
372 if (successor->total_latency() > max_latency) {
373 max_latency = successor->total_latency();
374 }
375 }
376
377 node->set_total_latency(max_latency + node->latency());
378 }
379}
380
381} // namespace compiler
382} // namespace internal
383} // namespace v8
384