1/*
2 * Copyright (C) 2011-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "DFGCSEPhase.h"
28
29#if ENABLE(DFG_JIT)
30
31#include "DFGAbstractHeap.h"
32#include "DFGBlockMapInlines.h"
33#include "DFGClobberSet.h"
34#include "DFGClobberize.h"
35#include "DFGDominators.h"
36#include "DFGGraph.h"
37#include "DFGPhase.h"
38#include "JSCInlines.h"
39#include <array>
40
41namespace JSC { namespace DFG {
42
43// This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
44// in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
45// compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
46// optimization opportunities by virtue of being global.
47
48namespace {
49
50namespace DFGCSEPhaseInternal {
51static const bool verbose = false;
52}
53
54class ImpureDataSlot {
55 WTF_MAKE_NONCOPYABLE(ImpureDataSlot);
56 WTF_MAKE_FAST_ALLOCATED;
57public:
58 ImpureDataSlot(HeapLocation key, LazyNode value, unsigned hash)
59 : key(key), value(value), hash(hash)
60 { }
61
62 HeapLocation key;
63 LazyNode value;
64 unsigned hash;
65};
66
67struct ImpureDataSlotHash : public DefaultHash<std::unique_ptr<ImpureDataSlot>>::Hash {
68 static unsigned hash(const std::unique_ptr<ImpureDataSlot>& key)
69 {
70 return key->hash;
71 }
72
73 static bool equal(const std::unique_ptr<ImpureDataSlot>& a, const std::unique_ptr<ImpureDataSlot>& b)
74 {
75 // The ImpureDataSlot are unique per table per HeapLocation. This lets us compare the key
76 // by just comparing the pointers of the unique ImpureDataSlots.
77 ASSERT(a != b || a->key == b->key);
78 return a == b;
79 }
80};
81
82struct ImpureDataTranslator {
83 static unsigned hash(const HeapLocation& key)
84 {
85 return key.hash();
86 }
87
88 static bool equal(const std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key)
89 {
90 if (!slot)
91 return false;
92 if (HashTraits<std::unique_ptr<ImpureDataSlot>>::isDeletedValue(slot))
93 return false;
94 return slot->key == key;
95 }
96
97 static void translate(std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key, unsigned hashCode)
98 {
99 new (NotNull, std::addressof(slot)) std::unique_ptr<ImpureDataSlot>(new ImpureDataSlot {key, LazyNode(), hashCode});
100 }
101};
102
103class ImpureMap {
104 WTF_MAKE_FAST_ALLOCATED;
105 WTF_MAKE_NONCOPYABLE(ImpureMap);
106public:
107 ImpureMap() = default;
108
109 ImpureMap(ImpureMap&& other)
110 {
111 m_abstractHeapStackMap.swap(other.m_abstractHeapStackMap);
112 m_fallbackStackMap.swap(other.m_fallbackStackMap);
113 m_heapMap.swap(other.m_heapMap);
114#if !defined(NDEBUG)
115 m_debugImpureData.swap(other.m_debugImpureData);
116#endif
117 }
118
119 const ImpureDataSlot* add(const HeapLocation& location, const LazyNode& node)
120 {
121 const ImpureDataSlot* result = addImpl(location, node);
122
123#if !defined(NDEBUG)
124 auto addResult = m_debugImpureData.add(location, node);
125 ASSERT(!!result == !addResult.isNewEntry);
126#endif
127 return result;
128 }
129
130 LazyNode get(const HeapLocation& location) const
131 {
132 LazyNode result = getImpl(location);
133#if !defined(NDEBUG)
134 ASSERT(result == m_debugImpureData.get(location));
135#endif
136 return result;
137 }
138
139 void clobber(AbstractHeap heap)
140 {
141 switch (heap.kind()) {
142 case World: {
143 clear();
144 break;
145 }
146 case SideState:
147 break;
148 case Stack: {
149 ASSERT(!heap.payload().isTop());
150 ASSERT(heap.payload().value() == heap.payload().value32());
151 m_abstractHeapStackMap.remove(heap.payload().value32());
152 clobber(m_fallbackStackMap, heap);
153 break;
154 }
155 default:
156 clobber(m_heapMap, heap);
157 break;
158 }
159#if !defined(NDEBUG)
160 m_debugImpureData.removeIf([heap](const HashMap<HeapLocation, LazyNode>::KeyValuePairType& pair) -> bool {
161 return heap.overlaps(pair.key.heap());
162 });
163 ASSERT(m_debugImpureData.size()
164 == (m_heapMap.size()
165 + m_abstractHeapStackMap.size()
166 + m_fallbackStackMap.size()));
167
168 const bool verifyClobber = false;
169 if (verifyClobber) {
170 for (auto& pair : m_debugImpureData)
171 ASSERT(!!get(pair.key));
172 }
173#endif
174 }
175
176 void clear()
177 {
178 m_abstractHeapStackMap.clear();
179 m_fallbackStackMap.clear();
180 m_heapMap.clear();
181#if !defined(NDEBUG)
182 m_debugImpureData.clear();
183#endif
184 }
185
186private:
187 typedef HashSet<std::unique_ptr<ImpureDataSlot>, ImpureDataSlotHash> Map;
188
189 const ImpureDataSlot* addImpl(const HeapLocation& location, const LazyNode& node)
190 {
191 switch (location.heap().kind()) {
192 case World:
193 case SideState:
194 RELEASE_ASSERT_NOT_REACHED();
195 case Stack: {
196 AbstractHeap abstractHeap = location.heap();
197 if (abstractHeap.payload().isTop())
198 return add(m_fallbackStackMap, location, node);
199 ASSERT(abstractHeap.payload().value() == abstractHeap.payload().value32());
200 auto addResult = m_abstractHeapStackMap.add(abstractHeap.payload().value32(), nullptr);
201 if (addResult.isNewEntry) {
202 addResult.iterator->value.reset(new ImpureDataSlot {location, node, 0});
203 return nullptr;
204 }
205 if (addResult.iterator->value->key == location)
206 return addResult.iterator->value.get();
207 return add(m_fallbackStackMap, location, node);
208 }
209 default:
210 return add(m_heapMap, location, node);
211 }
212 return nullptr;
213 }
214
215 LazyNode getImpl(const HeapLocation& location) const
216 {
217 switch (location.heap().kind()) {
218 case World:
219 case SideState:
220 RELEASE_ASSERT_NOT_REACHED();
221 case Stack: {
222 ASSERT(location.heap().payload().value() == location.heap().payload().value32());
223 auto iterator = m_abstractHeapStackMap.find(location.heap().payload().value32());
224 if (iterator != m_abstractHeapStackMap.end()
225 && iterator->value->key == location)
226 return iterator->value->value;
227 return get(m_fallbackStackMap, location);
228 }
229 default:
230 return get(m_heapMap, location);
231 }
232 return LazyNode();
233 }
234
235 static const ImpureDataSlot* add(Map& map, const HeapLocation& location, const LazyNode& node)
236 {
237 auto result = map.add<ImpureDataTranslator>(location);
238 if (result.isNewEntry) {
239 (*result.iterator)->value = node;
240 return nullptr;
241 }
242 return result.iterator->get();
243 }
244
245 static LazyNode get(const Map& map, const HeapLocation& location)
246 {
247 auto iterator = map.find<ImpureDataTranslator>(location);
248 if (iterator != map.end())
249 return (*iterator)->value;
250 return LazyNode();
251 }
252
253 static void clobber(Map& map, AbstractHeap heap)
254 {
255 map.removeIf([heap](const std::unique_ptr<ImpureDataSlot>& slot) -> bool {
256 return heap.overlaps(slot->key.heap());
257 });
258 }
259
260 // The majority of Impure Stack Slots are unique per value.
261 // This is very useful for fast clobber(), we can just remove the slot addressed by AbstractHeap
262 // in O(1).
263 //
264 // When there are conflict, any additional HeapLocation is added in the fallback map.
265 // This works well because fallbackStackMap remains tiny.
266 //
267 // One cannot assume a unique ImpureData is in m_abstractHeapStackMap. It may have been
268 // a duplicate in the past and now only live in m_fallbackStackMap.
269 //
270 // Obviously, TOP always goes into m_fallbackStackMap since it does not have a unique value.
271 HashMap<int32_t, std::unique_ptr<ImpureDataSlot>, DefaultHash<int32_t>::Hash, WTF::SignedWithZeroKeyHashTraits<int32_t>> m_abstractHeapStackMap;
272 Map m_fallbackStackMap;
273
274 Map m_heapMap;
275
276#if !defined(NDEBUG)
277 HashMap<HeapLocation, LazyNode> m_debugImpureData;
278#endif
279};
280
281class LocalCSEPhase : public Phase {
282public:
283 LocalCSEPhase(Graph& graph)
284 : Phase(graph, "local common subexpression elimination")
285 , m_smallBlock(graph)
286 , m_largeBlock(graph)
287 {
288 }
289
290 bool run()
291 {
292 ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
293 ASSERT(m_graph.m_form == ThreadedCPS || m_graph.m_form == LoadStore);
294
295 bool changed = false;
296
297 m_graph.clearReplacements();
298
299 for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
300 BasicBlock* block = m_graph.block(blockIndex);
301 if (!block)
302 continue;
303
304 if (block->size() <= SmallMaps::capacity)
305 changed |= m_smallBlock.run(block);
306 else
307 changed |= m_largeBlock.run(block);
308 }
309
310 return changed;
311 }
312
313private:
314 class SmallMaps {
315 public:
316 // This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
317 // fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
318 // Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
319 // in one of these "small" list-based maps. That number still seems largeish, except that
320 // the overhead of HashMaps can be quite high currently: clearing them, or even removing
321 // enough things from them, deletes (or resizes) their backing store eagerly. Hence
322 // HashMaps induce a lot of malloc traffic.
323 static const unsigned capacity = 100;
324
325 SmallMaps()
326 : m_pureLength(0)
327 , m_impureLength(0)
328 {
329 }
330
331 void clear()
332 {
333 m_pureLength = 0;
334 m_impureLength = 0;
335 }
336
337 void write(AbstractHeap heap)
338 {
339 if (heap.kind() == SideState)
340 return;
341
342 for (unsigned i = 0; i < m_impureLength; ++i) {
343 if (heap.overlaps(m_impureMap[i].key.heap()))
344 m_impureMap[i--] = m_impureMap[--m_impureLength];
345 }
346 }
347
348 Node* addPure(PureValue value, Node* node)
349 {
350 for (unsigned i = m_pureLength; i--;) {
351 if (m_pureMap[i].key == value)
352 return m_pureMap[i].value;
353 }
354
355 ASSERT(m_pureLength < capacity);
356 m_pureMap[m_pureLength++] = WTF::KeyValuePair<PureValue, Node*>(value, node);
357 return nullptr;
358 }
359
360 LazyNode findReplacement(HeapLocation location)
361 {
362 for (unsigned i = m_impureLength; i--;) {
363 if (m_impureMap[i].key == location)
364 return m_impureMap[i].value;
365 }
366 return nullptr;
367 }
368
369 LazyNode addImpure(HeapLocation location, LazyNode node)
370 {
371 // FIXME: If we are using small maps, we must not def() derived values.
372 // For now the only derived values we def() are constant-based.
373 if (location.index() && !location.index().isNode())
374 return nullptr;
375 if (LazyNode result = findReplacement(location))
376 return result;
377 ASSERT(m_impureLength < capacity);
378 m_impureMap[m_impureLength++] = WTF::KeyValuePair<HeapLocation, LazyNode>(location, node);
379 return nullptr;
380 }
381
382 private:
383 WTF::KeyValuePair<PureValue, Node*> m_pureMap[capacity];
384 WTF::KeyValuePair<HeapLocation, LazyNode> m_impureMap[capacity];
385 unsigned m_pureLength;
386 unsigned m_impureLength;
387 };
388
389 class LargeMaps {
390 public:
391 LargeMaps()
392 {
393 }
394
395 void clear()
396 {
397 m_pureMap.clear();
398 m_impureMap.clear();
399 }
400
401 void write(AbstractHeap heap)
402 {
403 m_impureMap.clobber(heap);
404 }
405
406 Node* addPure(PureValue value, Node* node)
407 {
408 auto result = m_pureMap.add(value, node);
409 if (result.isNewEntry)
410 return nullptr;
411 return result.iterator->value;
412 }
413
414 LazyNode findReplacement(HeapLocation location)
415 {
416 return m_impureMap.get(location);
417 }
418
419 LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
420 {
421 if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
422 return slot->value;
423 return LazyNode();
424 }
425
426 private:
427 HashMap<PureValue, Node*> m_pureMap;
428 ImpureMap m_impureMap;
429 };
430
431 template<typename Maps>
432 class BlockCSE {
433 public:
434 BlockCSE(Graph& graph)
435 : m_graph(graph)
436 , m_insertionSet(graph)
437 {
438 }
439
440 bool run(BasicBlock* block)
441 {
442 m_maps.clear();
443 m_changed = false;
444 m_block = block;
445
446 for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) {
447 m_node = block->at(nodeIndex);
448 m_graph.performSubstitution(m_node);
449
450 if (m_node->op() == Identity || m_node->op() == IdentityWithProfile) {
451 m_node->replaceWith(m_graph, m_node->child1().node());
452 m_changed = true;
453 } else {
454 // This rule only makes sense for local CSE, since in SSA form we have already
455 // factored the bounds check out of the PutByVal. It's kind of gross, but we
456 // still have reason to believe that PutByValAlias is a good optimization and
457 // that it's better to do it with a single node rather than separating out the
458 // CheckInBounds.
459 if (m_node->op() == PutByVal || m_node->op() == PutByValDirect) {
460 HeapLocation heap;
461
462 Node* base = m_graph.varArgChild(m_node, 0).node();
463 Node* index = m_graph.varArgChild(m_node, 1).node();
464 LocationKind indexedPropertyLoc = indexedPropertyLocForResultType(m_node->result());
465
466 ArrayMode mode = m_node->arrayMode();
467 switch (mode.type()) {
468 case Array::Int32:
469 if (!mode.isInBounds())
470 break;
471 heap = HeapLocation(indexedPropertyLoc, IndexedInt32Properties, base, index);
472 break;
473
474 case Array::Double: {
475 if (!mode.isInBounds())
476 break;
477 LocationKind kind = mode.isSaneChain() ? IndexedPropertyDoubleSaneChainLoc : IndexedPropertyDoubleLoc;
478 heap = HeapLocation(kind, IndexedDoubleProperties, base, index);
479 break;
480 }
481
482 case Array::Contiguous:
483 if (!mode.isInBounds())
484 break;
485 heap = HeapLocation(indexedPropertyLoc, IndexedContiguousProperties, base, index);
486 break;
487
488 case Array::Int8Array:
489 case Array::Int16Array:
490 case Array::Int32Array:
491 case Array::Uint8Array:
492 case Array::Uint8ClampedArray:
493 case Array::Uint16Array:
494 case Array::Uint32Array:
495 case Array::Float32Array:
496 case Array::Float64Array:
497 if (!mode.isInBounds())
498 break;
499 heap = HeapLocation(
500 indexedPropertyLoc, TypedArrayProperties, base, index);
501 break;
502
503 default:
504 break;
505 }
506
507 if (!!heap && m_maps.findReplacement(heap))
508 m_node->setOp(PutByValAlias);
509 }
510
511 clobberize(m_graph, m_node, *this);
512 }
513 }
514
515 m_insertionSet.execute(block);
516
517 return m_changed;
518 }
519
520 void read(AbstractHeap) { }
521
522 void write(AbstractHeap heap)
523 {
524 m_maps.write(heap);
525 }
526
527 void def(PureValue value)
528 {
529 Node* match = m_maps.addPure(value, m_node);
530 if (!match)
531 return;
532
533 m_node->replaceWith(m_graph, match);
534 m_changed = true;
535 }
536
537 void def(const HeapLocation& location, const LazyNode& value)
538 {
539 LazyNode match = m_maps.addImpure(location, value);
540 if (!match)
541 return;
542
543 if (m_node->op() == GetLocal) {
544 // Usually the CPS rethreading phase does this. But it's OK for us to mess with
545 // locals so long as:
546 //
547 // - We dethread the graph. Any changes we make may invalidate the assumptions of
548 // our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
549 //
550 // - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
551 // totally wrong but it would pessimize the code. Just because there is a
552 // GetLocal doesn't mean that the child was live. Simply rerouting the all uses
553 // of this GetLocal will preserve the live-at-exit information just fine.
554 //
555 // We accomplish the latter by just clearing the child; then the Phantom that we
556 // introduce won't have children and so it will eventually just be deleted.
557
558 m_node->child1() = Edge();
559 m_graph.dethread();
560 }
561
562 if (value.isNode() && value.asNode() == m_node) {
563 match.ensureIsNode(m_insertionSet, m_block, 0)->owner = m_block;
564 ASSERT(match.isNode());
565 m_node->replaceWith(m_graph, match.asNode());
566 m_changed = true;
567 }
568 }
569
570 private:
571 Graph& m_graph;
572
573 bool m_changed;
574 Node* m_node;
575 BasicBlock* m_block;
576
577 Maps m_maps;
578
579 InsertionSet m_insertionSet;
580 };
581
582 BlockCSE<SmallMaps> m_smallBlock;
583 BlockCSE<LargeMaps> m_largeBlock;
584};
585
586class GlobalCSEPhase : public Phase {
587public:
588 GlobalCSEPhase(Graph& graph)
589 : Phase(graph, "global common subexpression elimination")
590 , m_impureDataMap(graph)
591 , m_insertionSet(graph)
592 {
593 }
594
595 bool run()
596 {
597 ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
598 ASSERT(m_graph.m_form == SSA);
599
600 m_graph.initializeNodeOwners();
601 m_graph.ensureSSADominators();
602
603 m_preOrder = m_graph.blocksInPreOrder();
604
605 // First figure out what gets clobbered by blocks. Node that this uses the preOrder list
606 // for convenience only.
607 for (unsigned i = m_preOrder.size(); i--;) {
608 m_block = m_preOrder[i];
609 m_impureData = &m_impureDataMap[m_block];
610 for (unsigned nodeIndex = m_block->size(); nodeIndex--;)
611 addWrites(m_graph, m_block->at(nodeIndex), m_impureData->writes);
612 }
613
614 // Based on my experience doing this before, what follows might have to be made iterative.
615 // Right now it doesn't have to be iterative because everything is dominator-bsed. But when
616 // validation is enabled, we check if iterating would find new CSE opportunities.
617
618 bool changed = iterate();
619
620 // FIXME: It should be possible to assert that CSE will not find any new opportunities if you
621 // run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
622 // this, we'd have to first reset all of our state.
623 // https://bugs.webkit.org/show_bug.cgi?id=145853
624
625 return changed;
626 }
627
628 bool iterate()
629 {
630 if (DFGCSEPhaseInternal::verbose)
631 dataLog("Performing iteration.\n");
632
633 m_changed = false;
634 m_graph.clearReplacements();
635
636 for (unsigned i = 0; i < m_preOrder.size(); ++i) {
637 m_block = m_preOrder[i];
638 m_impureData = &m_impureDataMap[m_block];
639 m_writesSoFar.clear();
640
641 if (DFGCSEPhaseInternal::verbose)
642 dataLog("Processing block ", *m_block, ":\n");
643
644 for (unsigned nodeIndex = 0; nodeIndex < m_block->size(); ++nodeIndex) {
645 m_nodeIndex = nodeIndex;
646 m_node = m_block->at(nodeIndex);
647 if (DFGCSEPhaseInternal::verbose)
648 dataLog(" Looking at node ", m_node, ":\n");
649
650 m_graph.performSubstitution(m_node);
651
652 if (m_node->op() == Identity || m_node->op() == IdentityWithProfile) {
653 m_node->replaceWith(m_graph, m_node->child1().node());
654 m_changed = true;
655 } else
656 clobberize(m_graph, m_node, *this);
657 }
658
659 m_insertionSet.execute(m_block);
660
661 m_impureData->didVisit = true;
662 }
663
664 return m_changed;
665 }
666
667 void read(AbstractHeap) { }
668
669 void write(AbstractHeap heap)
670 {
671 m_impureData->availableAtTail.clobber(heap);
672 m_writesSoFar.add(heap);
673 }
674
675 void def(PureValue value)
676 {
677 // With pure values we do not have to worry about the possibility of some control flow path
678 // clobbering the value. So, we just search for all of the like values that have been
679 // computed. We pick one that is in a block that dominates ours. Note that this means that
680 // a PureValue will map to a list of nodes, since there may be many places in the control
681 // flow graph that compute a value but only one of them that dominates us. We may build up
682 // a large list of nodes that compute some value in the case of gnarly control flow. This
683 // is probably OK.
684
685 auto result = m_pureValues.add(value, Vector<Node*>());
686 if (result.isNewEntry) {
687 result.iterator->value.append(m_node);
688 return;
689 }
690
691 for (unsigned i = result.iterator->value.size(); i--;) {
692 Node* candidate = result.iterator->value[i];
693 if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
694 m_node->replaceWith(m_graph, candidate);
695 m_changed = true;
696 return;
697 }
698 }
699
700 result.iterator->value.append(m_node);
701 }
702
703 LazyNode findReplacement(HeapLocation location)
704 {
705 // At this instant, our "availableAtTail" reflects the set of things that are available in
706 // this block so far. We check this map to find block-local CSE opportunities before doing
707 // a global search.
708 LazyNode match = m_impureData->availableAtTail.get(location);
709 if (!!match) {
710 if (DFGCSEPhaseInternal::verbose)
711 dataLog(" Found local match: ", match, "\n");
712 return match;
713 }
714
715 // If it's not available at this point in the block, and at some prior point in the block
716 // we have clobbered this heap location, then there is no point in doing a global search.
717 if (m_writesSoFar.overlaps(location.heap())) {
718 if (DFGCSEPhaseInternal::verbose)
719 dataLog(" Not looking globally because of local clobber: ", m_writesSoFar, "\n");
720 return nullptr;
721 }
722
723 // This perfoms a backward search over the control flow graph to find some possible
724 // non-local def() that matches our heap location. Such a match is only valid if there does
725 // not exist any path from that def() to our block that contains a write() that overlaps
726 // our heap. This algorithm looks for both of these things (the matching def and the
727 // overlapping writes) in one backwards DFS pass.
728 //
729 // This starts by looking at the starting block's predecessors, and then it continues along
730 // their predecessors. As soon as this finds a possible def() - one that defines the heap
731 // location we want while dominating our starting block - it assumes that this one must be
732 // the match. It then lets the DFS over predecessors complete, but it doesn't add the
733 // def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
734 // from the def() to us. As soon as the DFG finds a write() that overlaps the location's
735 // heap, it stops, assuming that there is no possible match. Note that the write() case may
736 // trigger before we find a def(), or after. Either way, the write() case causes this
737 // function to immediately return nullptr.
738 //
739 // If the write() is found before we find the def(), then we know that any def() we would
740 // find would have a path to us that trips over the write() and hence becomes invalid. This
741 // is just a direct outcome of us looking for a def() that dominates us. Given a block A
742 // that dominates block B - so that A is the one that would have the def() and B is our
743 // starting block - we know that any other block must either be on the path from A to B, or
744 // it must be on a path from the root to A, but not both. So, if we haven't found A yet but
745 // we already have found a block C that has a write(), then C must be on some path from A
746 // to B, which means that A's def() is invalid for our purposes. Hence, before we find the
747 // def(), stopping on write() is the right thing to do.
748 //
749 // Stopping on write() is also the right thing to do after we find the def(). After we find
750 // the def(), we don't add that block's predecessors to the search worklist. That means
751 // that henceforth the only blocks we will see in the search are blocks on the path from
752 // the def() to us. If any such block has a write() that clobbers our heap then we should
753 // give up.
754 //
755 // Hence this graph search algorithm ends up being deceptively simple: any overlapping
756 // write() causes us to immediately return nullptr, and a matching def() means that we just
757 // record it and neglect to visit its precessors.
758
759 Vector<BasicBlock*, 8> worklist;
760 Vector<BasicBlock*, 8> seenList;
761 BitVector seen;
762
763 for (unsigned i = m_block->predecessors.size(); i--;) {
764 BasicBlock* predecessor = m_block->predecessors[i];
765 if (!seen.get(predecessor->index)) {
766 worklist.append(predecessor);
767 seen.set(predecessor->index);
768 }
769 }
770
771 while (!worklist.isEmpty()) {
772 BasicBlock* block = worklist.takeLast();
773 seenList.append(block);
774
775 if (DFGCSEPhaseInternal::verbose)
776 dataLog(" Searching in block ", *block, "\n");
777 ImpureBlockData& data = m_impureDataMap[block];
778
779 // We require strict domination because this would only see things in our own block if
780 // they came *after* our position in the block. Clearly, while our block dominates
781 // itself, the things in the block after us don't dominate us.
782 if (m_graph.m_ssaDominators->strictlyDominates(block, m_block)) {
783 if (DFGCSEPhaseInternal::verbose)
784 dataLog(" It strictly dominates.\n");
785 DFG_ASSERT(m_graph, m_node, data.didVisit);
786 DFG_ASSERT(m_graph, m_node, !match);
787 match = data.availableAtTail.get(location);
788 if (DFGCSEPhaseInternal::verbose)
789 dataLog(" Availability: ", match, "\n");
790 if (!!match) {
791 // Don't examine the predecessors of a match. At this point we just want to
792 // establish that other blocks on the path from here to there don't clobber
793 // the location we're interested in.
794 continue;
795 }
796 }
797
798 if (DFGCSEPhaseInternal::verbose)
799 dataLog(" Dealing with write set ", data.writes, "\n");
800 if (data.writes.overlaps(location.heap())) {
801 if (DFGCSEPhaseInternal::verbose)
802 dataLog(" Clobbered.\n");
803 return nullptr;
804 }
805
806 for (unsigned i = block->predecessors.size(); i--;) {
807 BasicBlock* predecessor = block->predecessors[i];
808 if (!seen.get(predecessor->index)) {
809 worklist.append(predecessor);
810 seen.set(predecessor->index);
811 }
812 }
813 }
814
815 if (!match)
816 return nullptr;
817
818 // Cache the results for next time. We cache them both for this block and for all of our
819 // predecessors, since even though we've already visited our predecessors, our predecessors
820 // probably have successors other than us.
821 // FIXME: Consider caching failed searches as well, when match is null. It's not clear that
822 // the reduction in compile time would warrant the increase in complexity, though.
823 // https://bugs.webkit.org/show_bug.cgi?id=134876
824 for (BasicBlock* block : seenList)
825 m_impureDataMap[block].availableAtTail.add(location, match);
826 m_impureData->availableAtTail.add(location, match);
827
828 return match;
829 }
830
831 void def(HeapLocation location, LazyNode value)
832 {
833 if (DFGCSEPhaseInternal::verbose)
834 dataLog(" Got heap location def: ", location, " -> ", value, "\n");
835
836 LazyNode match = findReplacement(location);
837
838 if (DFGCSEPhaseInternal::verbose)
839 dataLog(" Got match: ", match, "\n");
840
841 if (!match) {
842 if (DFGCSEPhaseInternal::verbose)
843 dataLog(" Adding at-tail mapping: ", location, " -> ", value, "\n");
844 auto result = m_impureData->availableAtTail.add(location, value);
845 ASSERT_UNUSED(result, !result);
846 return;
847 }
848
849 if (value.isNode() && value.asNode() == m_node) {
850 if (!match.isNode()) {
851 // We need to properly record the constant in order to use an existing one if applicable.
852 // This ensures that re-running GCSE will not find new optimizations.
853 match.ensureIsNode(m_insertionSet, m_block, m_nodeIndex)->owner = m_block;
854 auto result = m_pureValues.add(PureValue(match.asNode(), match->constant()), Vector<Node*>());
855 bool replaced = false;
856 if (!result.isNewEntry) {
857 for (unsigned i = result.iterator->value.size(); i--;) {
858 Node* candidate = result.iterator->value[i];
859 if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
860 ASSERT(candidate);
861 match->replaceWith(m_graph, candidate);
862 match.setNode(candidate);
863 replaced = true;
864 break;
865 }
866 }
867 }
868 if (!replaced)
869 result.iterator->value.append(match.asNode());
870 }
871 ASSERT(match.asNode());
872 m_node->replaceWith(m_graph, match.asNode());
873 m_changed = true;
874 }
875 }
876
877 struct ImpureBlockData {
878 ImpureBlockData()
879 : didVisit(false)
880 {
881 }
882
883 ClobberSet writes;
884 ImpureMap availableAtTail;
885 bool didVisit;
886 };
887
888 Vector<BasicBlock*> m_preOrder;
889
890 PureMultiMap m_pureValues;
891 BlockMap<ImpureBlockData> m_impureDataMap;
892
893 BasicBlock* m_block;
894 Node* m_node;
895 unsigned m_nodeIndex;
896 ImpureBlockData* m_impureData;
897 ClobberSet m_writesSoFar;
898 InsertionSet m_insertionSet;
899
900 bool m_changed;
901};
902
903} // anonymous namespace
904
905bool performLocalCSE(Graph& graph)
906{
907 return runPhase<LocalCSEPhase>(graph);
908}
909
910bool performGlobalCSE(Graph& graph)
911{
912 return runPhase<GlobalCSEPhase>(graph);
913}
914
915} } // namespace JSC::DFG
916
917#endif // ENABLE(DFG_JIT)
918