1/*
2 * Copyright (C) 2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include <wtf/DataLog.h>
29#include <wtf/LockAlgorithm.h>
30
31namespace WTF {
32
33// This is mostly just a word-sized WTF::Lock. It supports basically everything that lock supports. But as
34// a bonus, it atomically counts lock() calls and allows you to perform an optimistic read transaction by
35// comparing the count before and after the transaction. If at the start of the transaction the lock is
36// not held and the count remains the same throughout the transaction, then you know that nobody could
37// have modified your data structure while you ran. You can even use this to optimistically read pointers
38// that could become dangling under concurrent writes, if you just revalidate the count every time you're
39// about to do something dangerous.
40//
41// This is largely inspired by StampedLock from Java:
42// https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/locks/CountingLock.html
43//
44// This is simplified a lot compared to StampedLock. Unlike StampedLock, it uses an exclusive lock as a
45// fallback. There is no way to acquire a CountingLock for read. The only read access is via optimistic
46// read transactions.
47//
48// CountingLock provides two ways of doing optimistic reads:
49//
50// - The easy way, where CountingLock does all of the fencing for you. That fencing is free on x86 but
51// somewhat expensive on ARM.
52// - The hard way, where you do fencing yourself using Dependency. This allows you to be fenceless on both
53// x86 and ARM.
54//
55// The latter is important for us because some GC paths are known to be sensitive to fences on ARM.
56
57class CountingLock {
58 WTF_MAKE_NONCOPYABLE(CountingLock);
59 WTF_MAKE_FAST_ALLOCATED;
60
61 typedef unsigned LockType;
62
63 static constexpr LockType isHeldBit = 1;
64 static constexpr LockType hasParkedBit = 2;
65 static constexpr LockType mask = isHeldBit | hasParkedBit;
66 static constexpr LockType shift = 2;
67 static constexpr LockType countUnit = 4;
68
69 struct LockHooks {
70 static LockType lockHook(LockType value)
71 {
72 return value + countUnit;
73 }
74
75 static LockType unlockHook(LockType value) { return value; }
76 static LockType parkHook(LockType value) { return value; }
77 static LockType handoffHook(LockType value) { return value; }
78 };
79
80 typedef LockAlgorithm<LockType, isHeldBit, hasParkedBit, LockHooks> ExclusiveAlgorithm;
81
82public:
83 CountingLock() = default;
84
85 bool tryLock()
86 {
87 return ExclusiveAlgorithm::tryLock(m_word);
88 }
89
90 void lock()
91 {
92 if (UNLIKELY(!ExclusiveAlgorithm::lockFast(m_word)))
93 lockSlow();
94 }
95
96 void unlock()
97 {
98 if (UNLIKELY(!ExclusiveAlgorithm::unlockFast(m_word)))
99 unlockSlow();
100 }
101
102 bool isHeld() const
103 {
104 return ExclusiveAlgorithm::isLocked(m_word);
105 }
106
107 bool isLocked() const
108 {
109 return isHeld();
110 }
111
112 // The only thing you're allowed to infer from this value is that if it's zero, then you didn't get
113 // a real count.
114 class Count {
115 public:
116 explicit operator bool() const { return !!m_value; }
117
118 bool operator==(const Count& other) const { return m_value == other.m_value; }
119 bool operator!=(const Count& other) const { return m_value != other.m_value; }
120
121 private:
122 friend class CountingLock;
123
124 LockType m_value { 0 };
125 };
126
127 // Example of how to use this:
128 //
129 // int read()
130 // {
131 // if (CountingLock::Count count = m_lock.tryOptimisticRead()) {
132 // int value = m_things;
133 // if (m_lock.validate(count))
134 // return value; // success!
135 // }
136 // auto locker = holdLock(m_lock);
137 // int value = m_things;
138 // return value;
139 // }
140 //
141 // If tryOptimisitcRead() runs when the lock is not held, this thread will run a critical section
142 // without ever writing to memory. However, on ARM, this requires fencing. We use a load-acquire for
143 // tryOptimisticRead(). We have no choice but to use the more expensive `dmb ish` in validate(). If
144 // you want to avoid that, you could try to use tryOptimisticFencelessRead().
145 Count tryOptimisticRead()
146 {
147 LockType currentValue = m_word.load();
148 // FIXME: We could eliminate this check, if we think it's OK to proceed with the optimistic read
149 // path even after knowing that it must fail. That's probably good for perf since we expect
150 // failure to be super rare. We would get rid of this check and instead of calling getCount below,
151 // we would return currentValue ^ mask. If the lock state was empty to begin with, the result
152 // would be a properly blessed count (both low bits set). If the lock state was anything else, we
153 // would get an improperly blessed count that would not possibly succeed in validate. We could
154 // actually do something like "return (currentValue | hasParkedBit) ^ isHeldBit", which would mean
155 // that we allow parked-but-not-held-locks through.
156 // https://bugs.webkit.org/show_bug.cgi?id=180394
157 if (currentValue & isHeldBit)
158 return Count();
159 return getCount(currentValue);
160 }
161
162 bool validate(Count count)
163 {
164 WTF::loadLoadFence();
165 LockType currentValue = m_word.loadRelaxed();
166 return getCount(currentValue) == count;
167 }
168
169 // Example of how to use this:
170 //
171 // int read()
172 // {
173 // return m_lock.doOptimizedRead(
174 // [&] () -> int {
175 // int value = m_things;
176 // return value;
177 // });
178 // }
179 template<typename Func>
180 auto doOptimizedRead(const Func& func)
181 {
182 Count count = tryOptimisticRead();
183 if (count) {
184 auto result = func();
185 if (validate(count))
186 return result;
187 }
188 lock();
189 auto result = func();
190 unlock();
191 return result;
192 }
193
194 // Example of how to use this:
195 //
196 // int read()
197 // {
198 // auto result = m_lock.tryOptimisticFencelessRead();
199 // if (CountingLock::Count count = result.value) {
200 // Dependency fenceBefore = Dependency::fence(result.input);
201 // auto* fencedThis = fenceBefore.consume(this);
202 // int value = fencedThis->m_things;
203 // if (m_lock.fencelessValidate(count, Dependency::fence(value)))
204 // return value; // success!
205 // }
206 // auto locker = holdLock(m_lock);
207 // int value = m_things;
208 // return value;
209 // }
210 //
211 // Use this to create a read transaction using dependency chains only. You have to be careful to
212 // thread the dependency input (the `input` field that the returns) through a Dependency, and then
213 // thread that Dependency into every load (except for loads that are chasing pointers loaded from
214 // loads that already uses that dependency). Then, to validate the read transaction, you have to pass
215 // both the count and another Dependency that is based on whatever loads you used to produce the
216 // output.
217 //
218 // On non-ARM platforms, the Dependency objects don't do anything except for Dependency::fence, which
219 // is a load-load fence. The idiom above does the right thing on both ARM and TSO.
220 //
221 // WARNING: This can be hard to get right. Please only use this for very short critical sections that
222 // are known to be sufficiently perf-critical to justify the risk.
223 InputAndValue<LockType, Count> tryOptimisticFencelessRead()
224 {
225 LockType currentValue = m_word.loadRelaxed();
226 if (currentValue & isHeldBit)
227 return inputAndValue(currentValue, Count());
228 return inputAndValue(currentValue, getCount(currentValue));
229 }
230
231 bool fencelessValidate(Count count, Dependency dependency)
232 {
233 LockType currentValue = dependency.consume(this)->m_word.loadRelaxed();
234 return getCount(currentValue) == count;
235 }
236
237 template<typename OptimisticFunc, typename Func>
238 auto doOptimizedFencelessRead(const OptimisticFunc& optimisticFunc, const Func& func)
239 {
240 auto count = tryOptimisticFencelessRead();
241 if (count.value) {
242 Dependency dependency = Dependency::fence(count.input);
243 auto result = optimisticFunc(dependency, count.value);
244 if (fencelessValidate(count.value, dependency))
245 return result;
246 }
247 lock();
248 auto result = func();
249 unlock();
250 return result;
251 }
252
253private:
254 WTF_EXPORT_PRIVATE void lockSlow();
255 WTF_EXPORT_PRIVATE void unlockSlow();
256
257 Count getCount(LockType value)
258 {
259 Count result;
260 result.m_value = value | mask;
261 return result;
262 }
263
264 Atomic<LockType> m_word { 0 };
265};
266
267} // namespace WTF
268
269using WTF::CountingLock;
270
271