1 | // Copyright 2015 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #include "src/compiler/js-inlining-heuristic.h" |
6 | |
7 | #include "src/compiler/common-operator.h" |
8 | #include "src/compiler/compiler-source-position-table.h" |
9 | #include "src/compiler/node-matchers.h" |
10 | #include "src/compiler/simplified-operator.h" |
11 | #include "src/objects-inl.h" |
12 | #include "src/optimized-compilation-info.h" |
13 | |
14 | namespace v8 { |
15 | namespace internal { |
16 | namespace compiler { |
17 | |
18 | #define TRACE(...) \ |
19 | do { \ |
20 | if (FLAG_trace_turbo_inlining) PrintF(__VA_ARGS__); \ |
21 | } while (false) |
22 | |
23 | namespace { |
24 | |
25 | bool IsSmallInlineFunction(BytecodeArrayRef bytecode) { |
26 | // Forcibly inline small functions. |
27 | if (bytecode.length() <= FLAG_max_inlined_bytecode_size_small) { |
28 | return true; |
29 | } |
30 | return false; |
31 | } |
32 | |
33 | } // namespace |
34 | |
35 | JSInliningHeuristic::Candidate JSInliningHeuristic::CollectFunctions( |
36 | Node* node, int functions_size) { |
37 | DCHECK_NE(0, functions_size); |
38 | Node* callee = node->InputAt(0); |
39 | Candidate out; |
40 | out.node = node; |
41 | |
42 | HeapObjectMatcher m(callee); |
43 | if (m.HasValue() && m.Ref(broker()).IsJSFunction()) { |
44 | out.functions[0] = m.Ref(broker()).AsJSFunction(); |
45 | JSFunctionRef function = out.functions[0].value(); |
46 | if (function.IsSerializedForCompilation()) { |
47 | out.bytecode[0] = function.shared().GetBytecodeArray(); |
48 | } |
49 | out.num_functions = 1; |
50 | return out; |
51 | } |
52 | if (m.IsPhi()) { |
53 | int const value_input_count = m.node()->op()->ValueInputCount(); |
54 | if (value_input_count > functions_size) { |
55 | out.num_functions = 0; |
56 | return out; |
57 | } |
58 | for (int n = 0; n < value_input_count; ++n) { |
59 | HeapObjectMatcher m(callee->InputAt(n)); |
60 | if (!m.HasValue() || !m.Ref(broker()).IsJSFunction()) { |
61 | out.num_functions = 0; |
62 | return out; |
63 | } |
64 | |
65 | out.functions[n] = m.Ref(broker()).AsJSFunction(); |
66 | JSFunctionRef function = out.functions[n].value(); |
67 | if (function.IsSerializedForCompilation()) { |
68 | out.bytecode[n] = function.shared().GetBytecodeArray(), isolate(); |
69 | } |
70 | } |
71 | out.num_functions = value_input_count; |
72 | return out; |
73 | } |
74 | if (m.IsJSCreateClosure()) { |
75 | CreateClosureParameters const& p = CreateClosureParametersOf(m.op()); |
76 | DCHECK(!out.functions[0].has_value()); |
77 | out.shared_info = SharedFunctionInfoRef(broker(), p.shared_info()); |
78 | SharedFunctionInfoRef shared_info = out.shared_info.value(); |
79 | if (shared_info.HasBytecodeArray()) { |
80 | out.bytecode[0] = shared_info.GetBytecodeArray(); |
81 | } |
82 | out.num_functions = 1; |
83 | return out; |
84 | } |
85 | out.num_functions = 0; |
86 | return out; |
87 | } |
88 | |
89 | Reduction JSInliningHeuristic::Reduce(Node* node) { |
90 | DisallowHeapAccessIf no_heap_acess(FLAG_concurrent_inlining); |
91 | |
92 | if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange(); |
93 | |
94 | // Check if we already saw that {node} before, and if so, just skip it. |
95 | if (seen_.find(node->id()) != seen_.end()) return NoChange(); |
96 | seen_.insert(node->id()); |
97 | |
98 | // Check if the {node} is an appropriate candidate for inlining. |
99 | Candidate candidate = CollectFunctions(node, kMaxCallPolymorphism); |
100 | if (candidate.num_functions == 0) { |
101 | return NoChange(); |
102 | } else if (candidate.num_functions > 1 && !FLAG_polymorphic_inlining) { |
103 | TRACE( |
104 | "Not considering call site #%d:%s, because polymorphic inlining " |
105 | "is disabled\n" , |
106 | node->id(), node->op()->mnemonic()); |
107 | return NoChange(); |
108 | } |
109 | |
110 | bool can_inline = false, force_inline_small = true; |
111 | candidate.total_size = 0; |
112 | Node* frame_state = NodeProperties::GetFrameStateInput(node); |
113 | FrameStateInfo const& frame_info = FrameStateInfoOf(frame_state->op()); |
114 | Handle<SharedFunctionInfo> frame_shared_info; |
115 | for (int i = 0; i < candidate.num_functions; ++i) { |
116 | if (!candidate.bytecode[i].has_value()) { |
117 | // We're already missing critical data which wouldn't allow us to |
118 | // continue the inlining checks. Log a warning and continue. |
119 | if (candidate.functions[i].has_value()) { |
120 | TRACE_BROKER(broker(), |
121 | "Missing bytecode array trying to inline function " |
122 | << candidate.functions[i].value().object().address()); |
123 | } else { |
124 | TRACE_BROKER( |
125 | broker(), |
126 | "Missing bytecode array trying to inline function with SFI " |
127 | << candidate.shared_info.value().object().address()); |
128 | } |
129 | // Those functions that don't have their bytecode serialized probably |
130 | // don't have the SFI either, so we exit the loop early. |
131 | candidate.can_inline_function[i] = false; |
132 | continue; |
133 | } |
134 | |
135 | SharedFunctionInfoRef shared = candidate.functions[i].has_value() |
136 | ? candidate.functions[i].value().shared() |
137 | : candidate.shared_info.value(); |
138 | candidate.can_inline_function[i] = shared.IsInlineable(); |
139 | // Do not allow direct recursion i.e. f() -> f(). We still allow indirect |
140 | // recurion like f() -> g() -> f(). The indirect recursion is helpful in |
141 | // cases where f() is a small dispatch function that calls the appropriate |
142 | // function. In the case of direct recursion, we only have some static |
143 | // information for the first level of inlining and it may not be that useful |
144 | // to just inline one level in recursive calls. In some cases like tail |
145 | // recursion we may benefit from recursive inlining, if we have additional |
146 | // analysis that converts them to iterative implementations. Though it is |
147 | // not obvious if such an anlysis is needed. |
148 | if (frame_info.shared_info().ToHandle(&frame_shared_info) && |
149 | frame_shared_info.equals(shared.object())) { |
150 | TRACE("Not considering call site #%d:%s, because of recursive inlining\n" , |
151 | node->id(), node->op()->mnemonic()); |
152 | candidate.can_inline_function[i] = false; |
153 | } |
154 | // A function reaching this point should always have its bytecode |
155 | // serialized. |
156 | BytecodeArrayRef bytecode = candidate.bytecode[i].value(); |
157 | if (candidate.can_inline_function[i]) { |
158 | can_inline = true; |
159 | candidate.total_size += bytecode.length(); |
160 | } |
161 | // We don't force inline small functions if any of them is not inlineable. |
162 | if (!IsSmallInlineFunction(bytecode)) { |
163 | force_inline_small = false; |
164 | } |
165 | } |
166 | if (!can_inline) return NoChange(); |
167 | |
168 | // Gather feedback on how often this call site has been hit before. |
169 | if (node->opcode() == IrOpcode::kJSCall) { |
170 | CallParameters const p = CallParametersOf(node->op()); |
171 | candidate.frequency = p.frequency(); |
172 | } else { |
173 | ConstructParameters const p = ConstructParametersOf(node->op()); |
174 | candidate.frequency = p.frequency(); |
175 | } |
176 | |
177 | // Handling of special inlining modes right away: |
178 | // - For restricted inlining: stop all handling at this point. |
179 | // - For stressing inlining: immediately handle all functions. |
180 | switch (mode_) { |
181 | case kRestrictedInlining: |
182 | return NoChange(); |
183 | case kStressInlining: |
184 | return InlineCandidate(candidate, false); |
185 | case kGeneralInlining: |
186 | break; |
187 | } |
188 | |
189 | // Don't consider a {candidate} whose frequency is below the |
190 | // threshold, i.e. a call site that is only hit once every N |
191 | // invocations of the caller. |
192 | if (candidate.frequency.IsKnown() && |
193 | candidate.frequency.value() < FLAG_min_inlining_frequency) { |
194 | return NoChange(); |
195 | } |
196 | |
197 | // Forcibly inline small functions here. In the case of polymorphic inlining |
198 | // force_inline_small is set only when all functions are small. |
199 | if (force_inline_small && |
200 | cumulative_count_ < FLAG_max_inlined_bytecode_size_absolute) { |
201 | TRACE("Inlining small function(s) at call site #%d:%s\n" , node->id(), |
202 | node->op()->mnemonic()); |
203 | return InlineCandidate(candidate, true); |
204 | } |
205 | |
206 | // In the general case we remember the candidate for later. |
207 | candidates_.insert(candidate); |
208 | return NoChange(); |
209 | } |
210 | |
211 | void JSInliningHeuristic::Finalize() { |
212 | if (candidates_.empty()) return; // Nothing to do without candidates. |
213 | if (FLAG_trace_turbo_inlining) PrintCandidates(); |
214 | |
215 | // We inline at most one candidate in every iteration of the fixpoint. |
216 | // This is to ensure that we don't consume the full inlining budget |
217 | // on things that aren't called very often. |
218 | // TODO(bmeurer): Use std::priority_queue instead of std::set here. |
219 | while (!candidates_.empty()) { |
220 | auto i = candidates_.begin(); |
221 | Candidate candidate = *i; |
222 | candidates_.erase(i); |
223 | |
224 | // Make sure we have some extra budget left, so that any small functions |
225 | // exposed by this function would be given a chance to inline. |
226 | double size_of_candidate = |
227 | candidate.total_size * FLAG_reserve_inline_budget_scale_factor; |
228 | int total_size = cumulative_count_ + static_cast<int>(size_of_candidate); |
229 | if (total_size > FLAG_max_inlined_bytecode_size_cumulative) { |
230 | // Try if any smaller functions are available to inline. |
231 | continue; |
232 | } |
233 | |
234 | // Make sure we don't try to inline dead candidate nodes. |
235 | if (!candidate.node->IsDead()) { |
236 | Reduction const reduction = InlineCandidate(candidate, false); |
237 | if (reduction.Changed()) return; |
238 | } |
239 | } |
240 | } |
241 | |
242 | namespace { |
243 | |
244 | struct NodeAndIndex { |
245 | Node* node; |
246 | int index; |
247 | }; |
248 | |
249 | bool CollectStateValuesOwnedUses(Node* node, Node* state_values, |
250 | NodeAndIndex* uses_buffer, size_t* use_count, |
251 | size_t max_uses) { |
252 | // Only accumulate states that are not shared with other users. |
253 | if (state_values->UseCount() > 1) return true; |
254 | for (int i = 0; i < state_values->InputCount(); i++) { |
255 | Node* input = state_values->InputAt(i); |
256 | if (input->opcode() == IrOpcode::kStateValues) { |
257 | if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count, |
258 | max_uses)) { |
259 | return false; |
260 | } |
261 | } else if (input == node) { |
262 | if (*use_count >= max_uses) return false; |
263 | uses_buffer[*use_count] = {state_values, i}; |
264 | (*use_count)++; |
265 | } |
266 | } |
267 | return true; |
268 | } |
269 | |
270 | } // namespace |
271 | |
272 | Node* JSInliningHeuristic::DuplicateStateValuesAndRename(Node* state_values, |
273 | Node* from, Node* to, |
274 | StateCloneMode mode) { |
275 | // Only rename in states that are not shared with other users. This needs to |
276 | // be in sync with the condition in {CollectStateValuesOwnedUses}. |
277 | if (state_values->UseCount() > 1) return state_values; |
278 | Node* copy = mode == kChangeInPlace ? state_values : nullptr; |
279 | for (int i = 0; i < state_values->InputCount(); i++) { |
280 | Node* input = state_values->InputAt(i); |
281 | Node* processed; |
282 | if (input->opcode() == IrOpcode::kStateValues) { |
283 | processed = DuplicateStateValuesAndRename(input, from, to, mode); |
284 | } else if (input == from) { |
285 | processed = to; |
286 | } else { |
287 | processed = input; |
288 | } |
289 | if (processed != input) { |
290 | if (!copy) { |
291 | copy = graph()->CloneNode(state_values); |
292 | } |
293 | copy->ReplaceInput(i, processed); |
294 | } |
295 | } |
296 | return copy ? copy : state_values; |
297 | } |
298 | |
299 | namespace { |
300 | |
301 | bool CollectFrameStateUniqueUses(Node* node, Node* frame_state, |
302 | NodeAndIndex* uses_buffer, size_t* use_count, |
303 | size_t max_uses) { |
304 | // Only accumulate states that are not shared with other users. |
305 | if (frame_state->UseCount() > 1) return true; |
306 | if (frame_state->InputAt(kFrameStateStackInput) == node) { |
307 | if (*use_count >= max_uses) return false; |
308 | uses_buffer[*use_count] = {frame_state, kFrameStateStackInput}; |
309 | (*use_count)++; |
310 | } |
311 | if (!CollectStateValuesOwnedUses(node, |
312 | frame_state->InputAt(kFrameStateLocalsInput), |
313 | uses_buffer, use_count, max_uses)) { |
314 | return false; |
315 | } |
316 | return true; |
317 | } |
318 | |
319 | } // namespace |
320 | |
321 | Node* JSInliningHeuristic::DuplicateFrameStateAndRename(Node* frame_state, |
322 | Node* from, Node* to, |
323 | StateCloneMode mode) { |
324 | // Only rename in states that are not shared with other users. This needs to |
325 | // be in sync with the condition in {DuplicateFrameStateAndRename}. |
326 | if (frame_state->UseCount() > 1) return frame_state; |
327 | Node* copy = mode == kChangeInPlace ? frame_state : nullptr; |
328 | if (frame_state->InputAt(kFrameStateStackInput) == from) { |
329 | if (!copy) { |
330 | copy = graph()->CloneNode(frame_state); |
331 | } |
332 | copy->ReplaceInput(kFrameStateStackInput, to); |
333 | } |
334 | Node* locals = frame_state->InputAt(kFrameStateLocalsInput); |
335 | Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode); |
336 | if (new_locals != locals) { |
337 | if (!copy) { |
338 | copy = graph()->CloneNode(frame_state); |
339 | } |
340 | copy->ReplaceInput(kFrameStateLocalsInput, new_locals); |
341 | } |
342 | return copy ? copy : frame_state; |
343 | } |
344 | |
345 | bool JSInliningHeuristic::TryReuseDispatch(Node* node, Node* callee, |
346 | Candidate const& candidate, |
347 | Node** if_successes, Node** calls, |
348 | Node** inputs, int input_count) { |
349 | // We will try to reuse the control flow branch created for computing |
350 | // the {callee} target of the call. We only reuse the branch if there |
351 | // is no side-effect between the call and the branch, and if the callee is |
352 | // only used as the target (and possibly also in the related frame states). |
353 | |
354 | int const num_calls = candidate.num_functions; |
355 | |
356 | DCHECK_EQ(IrOpcode::kPhi, callee->opcode()); |
357 | DCHECK_EQ(num_calls, callee->op()->ValueInputCount()); |
358 | |
359 | // We are trying to match the following pattern: |
360 | // |
361 | // C1 C2 |
362 | // . . |
363 | // | | |
364 | // Merge(merge) <-----------------+ |
365 | // ^ ^ | |
366 | // V1 V2 | | E1 E2 | |
367 | // . . | +----+ . . | |
368 | // | | | | | | | |
369 | // Phi(callee) EffectPhi(effect_phi) | |
370 | // ^ ^ | |
371 | // | | | |
372 | // +----+ | | |
373 | // | | | | |
374 | // | StateValues | | |
375 | // | ^ | | |
376 | // +----+ | | | |
377 | // | | | | | |
378 | // | FrameState | | |
379 | // | ^ | | |
380 | // | | | +---+ |
381 | // | | | | | |
382 | // +----+ Checkpoint(checkpoint) | |
383 | // | | ^ | |
384 | // | StateValues | +-------------+ |
385 | // | | | | |
386 | // +-----+ | | | |
387 | // | | | | | |
388 | // | FrameState | | |
389 | // | ^ | | |
390 | // +-----------+ | | | |
391 | // Call(node) |
392 | // | |
393 | // | |
394 | // . |
395 | // |
396 | // The {callee} here is a phi that merges the possible call targets, {node} |
397 | // is the actual call that we will try to duplicate and connect to the |
398 | // control that comes into {merge}. There can be a {checkpoint} between |
399 | // the call and the calle phi. |
400 | // |
401 | // The idea is to get rid of the merge, effect phi and phi, then duplicate |
402 | // the call (with all the frame states and such), and connect the duplicated |
403 | // calls and states directly to the inputs of the ex-phi, ex-effect-phi and |
404 | // ex-merge. The tricky part is to make sure that there is no interference |
405 | // from the outside. In particular, there should not be any unaccounted uses |
406 | // of the phi, effect-phi and merge because we will remove them from |
407 | // the graph. |
408 | // |
409 | // V1 E1 C1 V2 E2 C2 |
410 | // . . . . . . |
411 | // | | | | | | |
412 | // +----+ | | +----+ | |
413 | // | | | | | | | |
414 | // | StateValues | | | StateValues | |
415 | // | ^ | | | ^ | |
416 | // +----+ | | | +----+ | | |
417 | // | | | | | | | | | |
418 | // | FrameState | | | FrameState | |
419 | // | ^ | | | ^ | |
420 | // | | | | | | | |
421 | // | | | | | | | |
422 | // +----+ Checkpoint | +----+ Checkpoint | |
423 | // | | ^ | | | ^ | |
424 | // | StateValues | | | StateValues | | |
425 | // | | | | | | | | |
426 | // +-----+ | | | +-----+ | | | |
427 | // | | | | | | | | | | |
428 | // | FrameState | | | FrameState | | |
429 | // | ^ | | | ^ | | |
430 | // +-------------+| | | +-------------+| | | |
431 | // Call----+ Call----+ |
432 | // | | |
433 | // +-------+ +------------+ |
434 | // | | |
435 | // Merge |
436 | // EffectPhi |
437 | // Phi |
438 | // | |
439 | // ... |
440 | |
441 | // If there is a control node between the callee computation |
442 | // and the call, bail out. |
443 | Node* merge = NodeProperties::GetControlInput(callee); |
444 | if (NodeProperties::GetControlInput(node) != merge) return false; |
445 | |
446 | // If there is a non-checkpoint effect node between the callee computation |
447 | // and the call, bail out. We will drop any checkpoint between the call and |
448 | // the callee phi because the callee computation should have its own |
449 | // checkpoint that the call can fall back to. |
450 | Node* checkpoint = nullptr; |
451 | Node* effect = NodeProperties::GetEffectInput(node); |
452 | if (effect->opcode() == IrOpcode::kCheckpoint) { |
453 | checkpoint = effect; |
454 | if (NodeProperties::GetControlInput(checkpoint) != merge) return false; |
455 | effect = NodeProperties::GetEffectInput(effect); |
456 | } |
457 | if (effect->opcode() != IrOpcode::kEffectPhi) return false; |
458 | if (NodeProperties::GetControlInput(effect) != merge) return false; |
459 | Node* effect_phi = effect; |
460 | |
461 | // The effect phi, the callee, the call and the checkpoint must be the only |
462 | // users of the merge. |
463 | for (Node* merge_use : merge->uses()) { |
464 | if (merge_use != effect_phi && merge_use != callee && merge_use != node && |
465 | merge_use != checkpoint) { |
466 | return false; |
467 | } |
468 | } |
469 | |
470 | // The effect phi must be only used by the checkpoint or the call. |
471 | for (Node* effect_phi_use : effect_phi->uses()) { |
472 | if (effect_phi_use != node && effect_phi_use != checkpoint) return false; |
473 | } |
474 | |
475 | // We must replace the callee phi with the appropriate constant in |
476 | // the entire subgraph reachable by inputs from the call (terminating |
477 | // at phis and merges). Since we do not want to walk (and later duplicate) |
478 | // the subgraph here, we limit the possible uses to this set: |
479 | // |
480 | // 1. In the call (as a target). |
481 | // 2. The checkpoint between the call and the callee computation merge. |
482 | // 3. The lazy deoptimization frame state. |
483 | // |
484 | // This corresponds to the most common pattern, where the function is |
485 | // called with only local variables or constants as arguments. |
486 | // |
487 | // To check the uses, we first collect all the occurrences of callee in 1, 2 |
488 | // and 3, and then we check that all uses of callee are in the collected |
489 | // occurrences. If there is an unaccounted use, we do not try to rewire |
490 | // the control flow. |
491 | // |
492 | // Note: With CFG, this would be much easier and more robust - we would just |
493 | // duplicate all the nodes between the merge and the call, replacing all |
494 | // occurrences of the {callee} phi with the appropriate constant. |
495 | |
496 | // First compute the set of uses that are only reachable from 2 and 3. |
497 | const size_t kMaxUses = 8; |
498 | NodeAndIndex replaceable_uses[kMaxUses]; |
499 | size_t replaceable_uses_count = 0; |
500 | |
501 | // Collect the uses to check case 2. |
502 | Node* checkpoint_state = nullptr; |
503 | if (checkpoint) { |
504 | checkpoint_state = checkpoint->InputAt(0); |
505 | if (!CollectFrameStateUniqueUses(callee, checkpoint_state, replaceable_uses, |
506 | &replaceable_uses_count, kMaxUses)) { |
507 | return false; |
508 | } |
509 | } |
510 | |
511 | // Collect the uses to check case 3. |
512 | Node* frame_state = NodeProperties::GetFrameStateInput(node); |
513 | if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses, |
514 | &replaceable_uses_count, kMaxUses)) { |
515 | return false; |
516 | } |
517 | |
518 | // Bail out if there is a use of {callee} that is not reachable from 1, 2 |
519 | // and 3. |
520 | for (Edge edge : callee->use_edges()) { |
521 | // Case 1 (use by the call as a target). |
522 | if (edge.from() == node && edge.index() == 0) continue; |
523 | // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states. |
524 | bool found = false; |
525 | for (size_t i = 0; i < replaceable_uses_count; i++) { |
526 | if (replaceable_uses[i].node == edge.from() && |
527 | replaceable_uses[i].index == edge.index()) { |
528 | found = true; |
529 | break; |
530 | } |
531 | } |
532 | if (!found) return false; |
533 | } |
534 | |
535 | // Clone the call and the framestate, including the uniquely reachable |
536 | // state values, making sure that we replace the phi with the constant. |
537 | for (int i = 0; i < num_calls; ++i) { |
538 | // Clone the calls for each branch. |
539 | // We need to specialize the calls to the correct target, effect, and |
540 | // control. We also need to duplicate the checkpoint and the lazy |
541 | // frame state, and change all the uses of the callee to the constant |
542 | // callee. |
543 | Node* target = callee->InputAt(i); |
544 | Node* effect = effect_phi->InputAt(i); |
545 | Node* control = merge->InputAt(i); |
546 | |
547 | if (checkpoint) { |
548 | // Duplicate the checkpoint. |
549 | Node* new_checkpoint_state = DuplicateFrameStateAndRename( |
550 | checkpoint_state, callee, target, |
551 | (i == num_calls - 1) ? kChangeInPlace : kCloneState); |
552 | effect = graph()->NewNode(checkpoint->op(), new_checkpoint_state, effect, |
553 | control); |
554 | } |
555 | |
556 | // Duplicate the call. |
557 | Node* new_lazy_frame_state = DuplicateFrameStateAndRename( |
558 | frame_state, callee, target, |
559 | (i == num_calls - 1) ? kChangeInPlace : kCloneState); |
560 | inputs[0] = target; |
561 | inputs[input_count - 3] = new_lazy_frame_state; |
562 | inputs[input_count - 2] = effect; |
563 | inputs[input_count - 1] = control; |
564 | calls[i] = if_successes[i] = |
565 | graph()->NewNode(node->op(), input_count, inputs); |
566 | } |
567 | |
568 | // Mark the control inputs dead, so that we can kill the merge. |
569 | node->ReplaceInput(input_count - 1, jsgraph()->Dead()); |
570 | callee->ReplaceInput(num_calls, jsgraph()->Dead()); |
571 | effect_phi->ReplaceInput(num_calls, jsgraph()->Dead()); |
572 | if (checkpoint) { |
573 | checkpoint->ReplaceInput(2, jsgraph()->Dead()); |
574 | } |
575 | |
576 | merge->Kill(); |
577 | return true; |
578 | } |
579 | |
580 | void JSInliningHeuristic::CreateOrReuseDispatch(Node* node, Node* callee, |
581 | Candidate const& candidate, |
582 | Node** if_successes, |
583 | Node** calls, Node** inputs, |
584 | int input_count) { |
585 | SourcePositionTable::Scope position( |
586 | source_positions_, source_positions_->GetSourcePosition(node)); |
587 | if (TryReuseDispatch(node, callee, candidate, if_successes, calls, inputs, |
588 | input_count)) { |
589 | return; |
590 | } |
591 | |
592 | Node* fallthrough_control = NodeProperties::GetControlInput(node); |
593 | int const num_calls = candidate.num_functions; |
594 | |
595 | // Create the appropriate control flow to dispatch to the cloned calls. |
596 | for (int i = 0; i < num_calls; ++i) { |
597 | // TODO(2206): Make comparison be based on underlying SharedFunctionInfo |
598 | // instead of the target JSFunction reference directly. |
599 | Node* target = jsgraph()->Constant(candidate.functions[i].value()); |
600 | if (i != (num_calls - 1)) { |
601 | Node* check = |
602 | graph()->NewNode(simplified()->ReferenceEqual(), callee, target); |
603 | Node* branch = |
604 | graph()->NewNode(common()->Branch(), check, fallthrough_control); |
605 | fallthrough_control = graph()->NewNode(common()->IfFalse(), branch); |
606 | if_successes[i] = graph()->NewNode(common()->IfTrue(), branch); |
607 | } else { |
608 | if_successes[i] = fallthrough_control; |
609 | } |
610 | |
611 | // Clone the calls for each branch. |
612 | // The first input to the call is the actual target (which we specialize |
613 | // to the known {target}); the last input is the control dependency. |
614 | // We also specialize the new.target of JSConstruct {node}s if it refers |
615 | // to the same node as the {node}'s target input, so that we can later |
616 | // properly inline the JSCreate operations. |
617 | if (node->opcode() == IrOpcode::kJSConstruct && inputs[0] == inputs[1]) { |
618 | inputs[1] = target; |
619 | } |
620 | inputs[0] = target; |
621 | inputs[input_count - 1] = if_successes[i]; |
622 | calls[i] = if_successes[i] = |
623 | graph()->NewNode(node->op(), input_count, inputs); |
624 | } |
625 | } |
626 | |
627 | Reduction JSInliningHeuristic::InlineCandidate(Candidate const& candidate, |
628 | bool small_function) { |
629 | int const num_calls = candidate.num_functions; |
630 | Node* const node = candidate.node; |
631 | if (num_calls == 1) { |
632 | Reduction const reduction = inliner_.ReduceJSCall(node); |
633 | if (reduction.Changed()) { |
634 | cumulative_count_ += candidate.bytecode[0].value().length(); |
635 | } |
636 | return reduction; |
637 | } |
638 | |
639 | // Expand the JSCall/JSConstruct node to a subgraph first if |
640 | // we have multiple known target functions. |
641 | DCHECK_LT(1, num_calls); |
642 | Node* calls[kMaxCallPolymorphism + 1]; |
643 | Node* if_successes[kMaxCallPolymorphism]; |
644 | Node* callee = NodeProperties::GetValueInput(node, 0); |
645 | |
646 | // Setup the inputs for the cloned call nodes. |
647 | int const input_count = node->InputCount(); |
648 | Node** inputs = graph()->zone()->NewArray<Node*>(input_count); |
649 | for (int i = 0; i < input_count; ++i) { |
650 | inputs[i] = node->InputAt(i); |
651 | } |
652 | |
653 | // Create the appropriate control flow to dispatch to the cloned calls. |
654 | CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs, |
655 | input_count); |
656 | |
657 | // Check if we have an exception projection for the call {node}. |
658 | Node* if_exception = nullptr; |
659 | if (NodeProperties::IsExceptionalCall(node, &if_exception)) { |
660 | Node* if_exceptions[kMaxCallPolymorphism + 1]; |
661 | for (int i = 0; i < num_calls; ++i) { |
662 | if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]); |
663 | if_exceptions[i] = |
664 | graph()->NewNode(common()->IfException(), calls[i], calls[i]); |
665 | } |
666 | |
667 | // Morph the {if_exception} projection into a join. |
668 | Node* exception_control = |
669 | graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions); |
670 | if_exceptions[num_calls] = exception_control; |
671 | Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls), |
672 | num_calls + 1, if_exceptions); |
673 | Node* exception_value = graph()->NewNode( |
674 | common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1, |
675 | if_exceptions); |
676 | ReplaceWithValue(if_exception, exception_value, exception_effect, |
677 | exception_control); |
678 | } |
679 | |
680 | // Morph the original call site into a join of the dispatched call sites. |
681 | Node* control = |
682 | graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes); |
683 | calls[num_calls] = control; |
684 | Node* effect = |
685 | graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls); |
686 | Node* value = |
687 | graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, num_calls), |
688 | num_calls + 1, calls); |
689 | ReplaceWithValue(node, value, effect, control); |
690 | |
691 | // Inline the individual, cloned call sites. |
692 | for (int i = 0; i < num_calls; ++i) { |
693 | Node* node = calls[i]; |
694 | if (candidate.can_inline_function[i] && |
695 | (small_function || |
696 | cumulative_count_ < FLAG_max_inlined_bytecode_size_cumulative)) { |
697 | Reduction const reduction = inliner_.ReduceJSCall(node); |
698 | if (reduction.Changed()) { |
699 | // Killing the call node is not strictly necessary, but it is safer to |
700 | // make sure we do not resurrect the node. |
701 | node->Kill(); |
702 | // Small functions don't count towards the budget. |
703 | if (!small_function) { |
704 | cumulative_count_ += candidate.bytecode[i]->length(); |
705 | } |
706 | } |
707 | } |
708 | } |
709 | |
710 | return Replace(value); |
711 | } |
712 | |
713 | bool JSInliningHeuristic::CandidateCompare::operator()( |
714 | const Candidate& left, const Candidate& right) const { |
715 | if (right.frequency.IsUnknown()) { |
716 | if (left.frequency.IsUnknown()) { |
717 | // If left and right are both unknown then the ordering is indeterminate, |
718 | // which breaks strict weak ordering requirements, so we fall back to the |
719 | // node id as a tie breaker. |
720 | return left.node->id() > right.node->id(); |
721 | } |
722 | return true; |
723 | } else if (left.frequency.IsUnknown()) { |
724 | return false; |
725 | } else if (left.frequency.value() > right.frequency.value()) { |
726 | return true; |
727 | } else if (left.frequency.value() < right.frequency.value()) { |
728 | return false; |
729 | } else { |
730 | return left.node->id() > right.node->id(); |
731 | } |
732 | } |
733 | |
734 | void JSInliningHeuristic::PrintCandidates() { |
735 | StdoutStream os; |
736 | os << "Candidates for inlining (size=" << candidates_.size() << "):\n" ; |
737 | for (const Candidate& candidate : candidates_) { |
738 | os << " #" << candidate.node->id() << ":" |
739 | << candidate.node->op()->mnemonic() |
740 | << ", frequency: " << candidate.frequency << std::endl; |
741 | for (int i = 0; i < candidate.num_functions; ++i) { |
742 | SharedFunctionInfoRef shared = |
743 | candidate.functions[i].has_value() |
744 | ? candidate.functions[i].value().shared() |
745 | : candidate.shared_info.value(); |
746 | PrintF(" - size:%d, name: %s\n" , candidate.bytecode[i].value().length(), |
747 | shared.object()->DebugName()->ToCString().get()); |
748 | } |
749 | } |
750 | } |
751 | |
752 | Graph* JSInliningHeuristic::graph() const { return jsgraph()->graph(); } |
753 | |
754 | CommonOperatorBuilder* JSInliningHeuristic::common() const { |
755 | return jsgraph()->common(); |
756 | } |
757 | |
758 | SimplifiedOperatorBuilder* JSInliningHeuristic::simplified() const { |
759 | return jsgraph()->simplified(); |
760 | } |
761 | |
762 | #undef TRACE |
763 | |
764 | } // namespace compiler |
765 | } // namespace internal |
766 | } // namespace v8 |
767 | |