1/*
2 * Copyright (C) 2005-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#pragma once
23
24#include <unicode/utypes.h>
25#include <wtf/text/LChar.h>
26
27namespace WTF {
28
29// Paul Hsieh's SuperFastHash
30// http://www.azillionmonkeys.com/qed/hash.html
31
32// LChar data is interpreted as Latin-1-encoded (zero-extended to 16 bits).
33
34// NOTE: The hash computation here must stay in sync with the create_hash_table script in
35// JavaScriptCore and the CodeGeneratorJS.pm script in WebCore.
36
37// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero.
38static constexpr const unsigned stringHashingStartValue = 0x9E3779B9U;
39
40class StringHasher {
41public:
42 static constexpr const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.
43 static constexpr const unsigned maskHash = (1U << (sizeof(unsigned) * 8 - flagCount)) - 1;
44
45 struct DefaultConverter {
46 template<typename CharType>
47 static constexpr UChar convert(CharType character)
48 {
49 return static_cast<std::make_unsigned_t<CharType>>((character));
50 }
51 };
52
53 StringHasher() = default;
54
55 // The hasher hashes two characters at a time, and thus an "aligned" hasher is one
56 // where an even number of characters have been added. Callers that always add
57 // characters two at a time can use the "assuming aligned" functions.
58 void addCharactersAssumingAligned(UChar a, UChar b)
59 {
60 ASSERT(!m_hasPendingCharacter);
61 m_hash = calculateWithTwoCharacters(m_hash, a, b);
62 }
63
64 void addCharacter(UChar character)
65 {
66 if (m_hasPendingCharacter) {
67 m_hasPendingCharacter = false;
68 addCharactersAssumingAligned(m_pendingCharacter, character);
69 return;
70 }
71
72 m_pendingCharacter = character;
73 m_hasPendingCharacter = true;
74 }
75
76 void addCharacters(UChar a, UChar b)
77 {
78 if (m_hasPendingCharacter) {
79#if !ASSERT_DISABLED
80 m_hasPendingCharacter = false;
81#endif
82 addCharactersAssumingAligned(m_pendingCharacter, a);
83 m_pendingCharacter = b;
84#if !ASSERT_DISABLED
85 m_hasPendingCharacter = true;
86#endif
87 return;
88 }
89
90 addCharactersAssumingAligned(a, b);
91 }
92
93 template<typename T, typename Converter> void addCharactersAssumingAligned(const T* data, unsigned length)
94 {
95 ASSERT(!m_hasPendingCharacter);
96
97 bool remainder = length & 1;
98 length >>= 1;
99
100 while (length--) {
101 addCharactersAssumingAligned(Converter::convert(data[0]), Converter::convert(data[1]));
102 data += 2;
103 }
104
105 if (remainder)
106 addCharacter(Converter::convert(*data));
107 }
108
109 template<typename T> void addCharactersAssumingAligned(const T* data, unsigned length)
110 {
111 addCharactersAssumingAligned<T, DefaultConverter>(data, length);
112 }
113
114 template<typename T, typename Converter> void addCharactersAssumingAligned(const T* data)
115 {
116 ASSERT(!m_hasPendingCharacter);
117
118 while (T a = *data++) {
119 T b = *data++;
120 if (!b) {
121 addCharacter(Converter::convert(a));
122 break;
123 }
124 addCharactersAssumingAligned(Converter::convert(a), Converter::convert(b));
125 }
126 }
127
128 template<typename T> void addCharactersAssumingAligned(const T* data)
129 {
130 addCharactersAssumingAligned<T, DefaultConverter>(data);
131 }
132
133 template<typename T, typename Converter> void addCharacters(const T* data, unsigned length)
134 {
135 if (!length)
136 return;
137 if (m_hasPendingCharacter) {
138 m_hasPendingCharacter = false;
139 addCharactersAssumingAligned(m_pendingCharacter, Converter::convert(*data++));
140 --length;
141 }
142 addCharactersAssumingAligned<T, Converter>(data, length);
143 }
144
145 template<typename T> void addCharacters(const T* data, unsigned length)
146 {
147 addCharacters<T, DefaultConverter>(data, length);
148 }
149
150 template<typename T, typename Converter> void addCharacters(const T* data)
151 {
152 if (m_hasPendingCharacter && *data) {
153 m_hasPendingCharacter = false;
154 addCharactersAssumingAligned(m_pendingCharacter, Converter::convert(*data++));
155 }
156 addCharactersAssumingAligned<T, Converter>(data);
157 }
158
159 template<typename T> void addCharacters(const T* data)
160 {
161 addCharacters<T, DefaultConverter>(data);
162 }
163
164 unsigned hashWithTop8BitsMasked() const
165 {
166 return finalizeAndMaskTop8Bits(processPendingCharacter());
167 }
168
169 unsigned hash() const
170 {
171 return finalize(processPendingCharacter());
172 }
173
174 template<typename T, typename Converter> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
175 {
176 return finalizeAndMaskTop8Bits(computeHashImpl<T, Converter>(data, length));
177 }
178
179 template<typename T, typename Converter> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data)
180 {
181 return finalizeAndMaskTop8Bits(computeHashImpl<T, Converter>(data));
182 }
183
184 template<typename T> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
185 {
186 return computeHashAndMaskTop8Bits<T, DefaultConverter>(data, length);
187 }
188
189 template<typename T> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data)
190 {
191 return computeHashAndMaskTop8Bits<T, DefaultConverter>(data);
192 }
193
194 template<typename T, typename Converter> static constexpr unsigned computeHash(const T* data, unsigned length)
195 {
196 return finalize(computeHashImpl<T, Converter>(data, length));
197 }
198
199 template<typename T, typename Converter> static constexpr unsigned computeHash(const T* data)
200 {
201 return finalize(computeHashImpl<T, Converter>(data));
202 }
203
204 template<typename T> static constexpr unsigned computeHash(const T* data, unsigned length)
205 {
206 return computeHash<T, DefaultConverter>(data, length);
207 }
208
209 template<typename T> static constexpr unsigned computeHash(const T* data)
210 {
211 return computeHash<T, DefaultConverter>(data);
212 }
213
214
215 template<typename T, unsigned charactersCount>
216 static constexpr unsigned computeLiteralHash(const T (&characters)[charactersCount])
217 {
218 return computeHash<T, DefaultConverter>(characters, charactersCount - 1);
219 }
220
221 template<typename T, unsigned charactersCount>
222 static constexpr unsigned computeLiteralHashAndMaskTop8Bits(const T (&characters)[charactersCount])
223 {
224 return computeHashAndMaskTop8Bits<T, DefaultConverter>(characters, charactersCount - 1);
225 }
226
227 static unsigned hashMemory(const void* data, unsigned length)
228 {
229 size_t lengthInUChar = length / sizeof(UChar);
230 StringHasher hasher;
231 hasher.addCharactersAssumingAligned(static_cast<const UChar*>(data), lengthInUChar);
232
233 for (size_t i = 0; i < length % sizeof(UChar); ++i)
234 hasher.addCharacter(static_cast<const char*>(data)[lengthInUChar * sizeof(UChar) + i]);
235
236 return hasher.hash();
237 }
238
239 template<size_t length> static unsigned hashMemory(const void* data)
240 {
241 return hashMemory(data, length);
242 }
243
244private:
245 ALWAYS_INLINE static constexpr unsigned avalancheBits(unsigned hash)
246 {
247 unsigned result = hash;
248
249 result ^= result << 3;
250 result += result >> 5;
251 result ^= result << 2;
252 result += result >> 15;
253 result ^= result << 10;
254
255 return result;
256 }
257
258 static constexpr unsigned finalize(unsigned hash)
259 {
260 return avoidZero(avalancheBits(hash));
261 }
262
263 static constexpr unsigned finalizeAndMaskTop8Bits(unsigned hash)
264 {
265 // Reserving space from the high bits for flags preserves most of the hash's
266 // value, since hash lookup typically masks out the high bits anyway.
267 return avoidZero(avalancheBits(hash) & StringHasher::maskHash);
268 }
269
270 // This avoids ever returning a hash code of 0, since that is used to
271 // signal "hash not computed yet". Setting the high bit maintains
272 // reasonable fidelity to a hash code of 0 because it is likely to yield
273 // exactly 0 when hash lookup masks out the high bits.
274 ALWAYS_INLINE static constexpr unsigned avoidZero(unsigned hash)
275 {
276 if (hash)
277 return hash;
278 return 0x80000000 >> flagCount;
279 }
280
281 static constexpr unsigned calculateWithRemainingLastCharacter(unsigned hash, unsigned character)
282 {
283 unsigned result = hash;
284
285 result += character;
286 result ^= result << 11;
287 result += result >> 17;
288
289 return result;
290 }
291
292 ALWAYS_INLINE static constexpr unsigned calculateWithTwoCharacters(unsigned hash, unsigned firstCharacter, unsigned secondCharacter)
293 {
294 unsigned result = hash;
295
296 result += firstCharacter;
297 result = (result << 16) ^ ((secondCharacter << 11) ^ result);
298 result += result >> 11;
299
300 return result;
301 }
302
303 template<typename T, typename Converter>
304 static constexpr unsigned computeHashImpl(const T* characters, unsigned length)
305 {
306 unsigned result = stringHashingStartValue;
307 bool remainder = length & 1;
308 length >>= 1;
309
310 while (length--) {
311 result = calculateWithTwoCharacters(result, Converter::convert(characters[0]), Converter::convert(characters[1]));
312 characters += 2;
313 }
314
315 if (remainder)
316 return calculateWithRemainingLastCharacter(result, Converter::convert(characters[0]));
317 return result;
318 }
319
320 template<typename T, typename Converter>
321 static constexpr unsigned computeHashImpl(const T* characters)
322 {
323 unsigned result = stringHashingStartValue;
324 while (T a = *characters++) {
325 T b = *characters++;
326 if (!b)
327 return calculateWithRemainingLastCharacter(result, Converter::convert(a));
328 result = calculateWithTwoCharacters(result, Converter::convert(a), Converter::convert(b));
329 }
330 return result;
331 }
332
333 unsigned processPendingCharacter() const
334 {
335 unsigned result = m_hash;
336
337 // Handle end case.
338 if (m_hasPendingCharacter)
339 return calculateWithRemainingLastCharacter(result, m_pendingCharacter);
340 return result;
341 }
342
343 unsigned m_hash { stringHashingStartValue };
344 UChar m_pendingCharacter { 0 };
345 bool m_hasPendingCharacter { false };
346};
347
348} // namespace WTF
349
350using WTF::StringHasher;
351