instruction-scheduler.cc source code [v8/src/compiler/backend/instruction-scheduler.cc]

1	// Copyright 2015 the V8 project authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style license that can be
3	// found in the LICENSE file.
4
5	#include "src/compiler/backend/instruction-scheduler.h"
6
7	#include "src/base/adapters.h"
8	#include "src/base/utils/random-number-generator.h"
9	#include "src/isolate.h"
10
11	namespace v8 {
12	namespace internal {
13	namespace compiler {
14
15	void InstructionScheduler::SchedulingQueueBase::AddNode(
16	ScheduleGraphNode* node) {
17	// We keep the ready list sorted by total latency so that we can quickly find
18	// the next best candidate to schedule.
19	auto it = nodes_.begin();
20	while ((it != nodes_.end()) &&
21	((*it)->total_latency() >= node->total_latency())) {
22	++it;
23	}
24	nodes_.insert(it, node);
25	}
26
27	InstructionScheduler::ScheduleGraphNode*
28	InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29	DCHECK(!IsEmpty());
30	auto candidate = nodes_.end();
31	for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32	// We only consider instructions that have all their operands ready.
33	if (cycle >= (*iterator)->start_cycle()) {
34	candidate = iterator;
35	break;
36	}
37	}
38
39	if (candidate != nodes_.end()) {
40	ScheduleGraphNode* result = *candidate;
41	nodes_.erase(candidate);
42	return result;
43	}
44
45	return nullptr;
46	}
47
48	InstructionScheduler::ScheduleGraphNode*
49	InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50	DCHECK(!IsEmpty());
51	// Choose a random element from the ready list.
52	auto candidate = nodes_.begin();
53	std::advance(candidate, isolate()->random_number_generator()->NextInt(
54	static_cast<int>(nodes_.size())));
55	ScheduleGraphNode* result = *candidate;
56	nodes_.erase(candidate);
57	return result;
58	}
59
60	InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61	Instruction* instr)
62	: instr_(instr),
63	successors_(zone),
64	unscheduled_predecessors_count_(`0`),
65	latency_(GetInstructionLatency(instr)),
66	total_latency_(-`1`),
67	start_cycle_(-`1`) {}
68
69	void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70	ScheduleGraphNode* node) {
71	successors_.push_back(node);
72	node->unscheduled_predecessors_count_++;
73	}
74
75	InstructionScheduler::InstructionScheduler(Zone* zone,
76	InstructionSequence* sequence)
77	: zone_(zone),
78	sequence_(sequence),
79	graph_(zone),
80	last_side_effect_instr_(nullptr),
81	pending_loads_(zone),
82	last_live_in_reg_marker_(nullptr),
83	last_deopt_or_trap_(nullptr),
84	operands_map_(zone) {}
85
86	void InstructionScheduler::StartBlock(RpoNumber rpo) {
87	DCHECK(graph_.empty());
88	DCHECK_NULL(last_side_effect_instr_);
89	DCHECK(pending_loads_.empty());
90	DCHECK_NULL(last_live_in_reg_marker_);
91	DCHECK_NULL(last_deopt_or_trap_);
92	DCHECK(operands_map_.empty());
93	sequence()->StartBlock(rpo);
94	}
95
96	void InstructionScheduler::EndBlock(RpoNumber rpo) {
97	if (FLAG_turbo_stress_instruction_scheduling) {
98	ScheduleBlock<StressSchedulerQueue>();
99	} else {
100	ScheduleBlock<CriticalPathFirstQueue>();
101	}
102	sequence()->EndBlock(rpo);
103	graph_.clear();
104	last_side_effect_instr_ = nullptr;
105	pending_loads_.clear();
106	last_live_in_reg_marker_ = nullptr;
107	last_deopt_or_trap_ = nullptr;
108	operands_map_.clear();
109	}
110
111	void InstructionScheduler::AddTerminator(Instruction* instr) {
112	ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode (zone(), instr);
113	// Make sure that basic block terminators are not moved by adding them
114	// as successor of every instruction.
115	for (ScheduleGraphNode* node : graph_) {
116	node->AddSuccessor(new_node);
117	}
118	graph_.push_back(new_node);
119	}
120
121	void InstructionScheduler::AddInstruction(Instruction* instr) {
122	ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode (zone(), instr);
123
124	// We should not have branches in the middle of a block.
125	DCHECK_NE(instr->flags_mode(), kFlags_branch);
126	DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
127
128	if (IsFixedRegisterParameter(instr)) {
129	if (last_live_in_reg_marker_ != nullptr) {
130	last_live_in_reg_marker_->AddSuccessor(new_node);
131	}
132	last_live_in_reg_marker_ = new_node;
133	} else {
134	if (last_live_in_reg_marker_ != nullptr) {
135	last_live_in_reg_marker_->AddSuccessor(new_node);
136	}
137
138	// Make sure that instructions are not scheduled before the last
139	// deoptimization or trap point when they depend on it.
140	if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
141	last_deopt_or_trap_->AddSuccessor(new_node);
142	}
143
144	// Instructions with side effects and memory operations can't be
145	// reordered with respect to each other.
146	if (HasSideEffect(instr)) {
147	if (last_side_effect_instr_ != nullptr) {
148	last_side_effect_instr_->AddSuccessor(new_node);
149	}
150	for (ScheduleGraphNode* load : pending_loads_) {
151	load->AddSuccessor(new_node);
152	}
153	pending_loads_.clear();
154	last_side_effect_instr_ = new_node;
155	} else if (IsLoadOperation(instr)) {
156	// Load operations can't be reordered with side effects instructions but
157	// independent loads can be reordered with respect to each other.
158	if (last_side_effect_instr_ != nullptr) {
159	last_side_effect_instr_->AddSuccessor(new_node);
160	}
161	pending_loads_.push_back(new_node);
162	} else if (instr->IsDeoptimizeCall() \|\| instr->IsTrap()) {
163	// Ensure that deopts or traps are not reordered with respect to
164	// side-effect instructions.
165	if (last_side_effect_instr_ != nullptr) {
166	last_side_effect_instr_->AddSuccessor(new_node);
167	}
168	last_deopt_or_trap_ = new_node;
169	}
170
171	// Look for operand dependencies.
172	for (size_t i = `0`; i < instr->InputCount(); ++i) {
173	const InstructionOperand* input = instr->InputAt(i);
174	if (input->IsUnallocated()) {
175	int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
176	auto it = operands_map_.find(vreg);
177	if (it != operands_map_.end()) {
178	it ->second->AddSuccessor(new_node);
179	}
180	}
181	}
182
183	// Record the virtual registers defined by this instruction.
184	for (size_t i = `0`; i < instr->OutputCount(); ++i) {
185	const InstructionOperand* output = instr->OutputAt(i);
186	if (output->IsUnallocated()) {
187	operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
188	new_node;
189	} else if (output->IsConstant()) {
190	operands_map_[ConstantOperand::cast(output)->virtual_register()] =
191	new_node;
192	}
193	}
194	}
195
196	graph_.push_back(new_node);
197	}
198
199	template <typename QueueType>
200	void InstructionScheduler::ScheduleBlock() {
201	QueueType ready_list(this);
202
203	// Compute total latencies so that we can schedule the critical path first.
204	ComputeTotalLatencies();
205
206	// Add nodes which don't have dependencies to the ready list.
207	for (ScheduleGraphNode* node : graph_) {
208	if (!node->HasUnscheduledPredecessor()) {
209	ready_list.AddNode(node);
210	}
211	}
212
213	// Go through the ready list and schedule the instructions.
214	int cycle = `0`;
215	while (!ready_list.IsEmpty()) {
216	ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
217
218	if (candidate != nullptr) {
219	sequence()->AddInstruction(candidate->instruction());
220
221	for (ScheduleGraphNode* successor : candidate->successors()) {
222	successor->DropUnscheduledPredecessor();
223	successor->set_start_cycle(
224	std::max(successor->start_cycle(), cycle + candidate->latency()));
225
226	if (!successor->HasUnscheduledPredecessor()) {
227	ready_list.AddNode(successor);
228	}
229	}
230	}
231
232	cycle++;
233	}
234	}
235
236	int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
237	switch (instr->arch_opcode()) {
238	case kArchNop:
239	case kArchFramePointer:
240	case kArchParentFramePointer:
241	case kArchStackSlot: // Despite its name this opcode will produce a
242	// reference to a frame slot, so it is not affected
243	// by the arm64 dual stack issues mentioned below.
244	case kArchComment:
245	case kArchDeoptimize:
246	case kArchJmp:
247	case kArchBinarySearchSwitch:
248	case kArchLookupSwitch:
249	case kArchRet:
250	case kArchTableSwitch:
251	case kArchThrowTerminator:
252	return kNoOpcodeFlags;
253
254	case kArchTruncateDoubleToI:
255	case kIeee754Float64Acos:
256	case kIeee754Float64Acosh:
257	case kIeee754Float64Asin:
258	case kIeee754Float64Asinh:
259	case kIeee754Float64Atan:
260	case kIeee754Float64Atanh:
261	case kIeee754Float64Atan2:
262	case kIeee754Float64Cbrt:
263	case kIeee754Float64Cos:
264	case kIeee754Float64Cosh:
265	case kIeee754Float64Exp:
266	case kIeee754Float64Expm1:
267	case kIeee754Float64Log:
268	case kIeee754Float64Log1p:
269	case kIeee754Float64Log10:
270	case kIeee754Float64Log2:
271	case kIeee754Float64Pow:
272	case kIeee754Float64Sin:
273	case kIeee754Float64Sinh:
274	case kIeee754Float64Tan:
275	case kIeee754Float64Tanh:
276	return kNoOpcodeFlags;
277
278	case kArchStackPointer:
279	// ArchStackPointer instruction loads the current stack pointer value and
280	// must not be reordered with instruction with side effects.
281	return kIsLoadOperation;
282
283	case kArchWordPoisonOnSpeculation:
284	// While poisoning operations have no side effect, they must not be
285	// reordered relative to branches.
286	return kHasSideEffect;
287
288	case kArchPrepareCallCFunction:
289	case kArchSaveCallerRegisters:
290	case kArchRestoreCallerRegisters:
291	case kArchPrepareTailCall:
292	case kArchCallCFunction:
293	case kArchCallCodeObject:
294	case kArchCallJSFunction:
295	case kArchCallWasmFunction:
296	case kArchCallBuiltinPointer:
297	case kArchTailCallCodeObjectFromJSFunction:
298	case kArchTailCallCodeObject:
299	case kArchTailCallAddress:
300	case kArchTailCallWasm:
301	case kArchDebugAbort:
302	case kArchDebugBreak:
303	return kHasSideEffect;
304
305	case kArchStoreWithWriteBarrier:
306	return kHasSideEffect;
307
308	case kWord32AtomicLoadInt8:
309	case kWord32AtomicLoadUint8:
310	case kWord32AtomicLoadInt16:
311	case kWord32AtomicLoadUint16:
312	case kWord32AtomicLoadWord32:
313	return kIsLoadOperation;
314
315	case kWord32AtomicStoreWord8:
316	case kWord32AtomicStoreWord16:
317	case kWord32AtomicStoreWord32:
318	return kHasSideEffect;
319
320	case kWord32AtomicExchangeInt8:
321	case kWord32AtomicExchangeUint8:
322	case kWord32AtomicExchangeInt16:
323	case kWord32AtomicExchangeUint16:
324	case kWord32AtomicExchangeWord32:
325	case kWord32AtomicCompareExchangeInt8:
326	case kWord32AtomicCompareExchangeUint8:
327	case kWord32AtomicCompareExchangeInt16:
328	case kWord32AtomicCompareExchangeUint16:
329	case kWord32AtomicCompareExchangeWord32:
330	case kWord32AtomicAddInt8:
331	case kWord32AtomicAddUint8:
332	case kWord32AtomicAddInt16:
333	case kWord32AtomicAddUint16:
334	case kWord32AtomicAddWord32:
335	case kWord32AtomicSubInt8:
336	case kWord32AtomicSubUint8:
337	case kWord32AtomicSubInt16:
338	case kWord32AtomicSubUint16:
339	case kWord32AtomicSubWord32:
340	case kWord32AtomicAndInt8:
341	case kWord32AtomicAndUint8:
342	case kWord32AtomicAndInt16:
343	case kWord32AtomicAndUint16:
344	case kWord32AtomicAndWord32:
345	case kWord32AtomicOrInt8:
346	case kWord32AtomicOrUint8:
347	case kWord32AtomicOrInt16:
348	case kWord32AtomicOrUint16:
349	case kWord32AtomicOrWord32:
350	case kWord32AtomicXorInt8:
351	case kWord32AtomicXorUint8:
352	case kWord32AtomicXorInt16:
353	case kWord32AtomicXorUint16:
354	case kWord32AtomicXorWord32:
355	return kHasSideEffect;
356
357	#define CASE(Name) case k##Name:
358	TARGET_ARCH_OPCODE_LIST(CASE)
359	#undef CASE
360	return GetTargetInstructionFlags(instr);
361	}
362
363	UNREACHABLE();
364	}
365
366	void InstructionScheduler::ComputeTotalLatencies() {
367	for (ScheduleGraphNode* node : base::Reversed(graph_)) {
368	int max_latency = `0`;
369
370	for (ScheduleGraphNode* successor : node->successors()) {
371	DCHECK_NE(-`1`, successor->total_latency());
372	if (successor->total_latency() > max_latency) {
373	max_latency = successor->total_latency();
374	}
375	}
376
377	node->set_total_latency(max_latency + node->latency());
378	}
379	}
380
381	} // namespace compiler
382	} // namespace internal
383	} // namespace v8
384

Browse the source code of v8/src/compiler/backend/instruction-scheduler.cc