BloomFilter.h source code [jsc/WebKitBuild/Debug/DerivedSources/ForwardingHeaders/wtf/BloomFilter.h]

1	/*
2	* Copyright (C) 2011, 2015 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15	* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23	* THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#pragma once
27
28	#include <array>
29	#include <wtf/text/AtomString.h>
30
31	namespace WTF {
32
33	// Bloom filter with k=2. Uses 2^keyBits/8 bytes of memory.
34	// False positive rate is approximately (1-e^(-2n/m))^2, where n is the number of unique
35	// keys and m is the table size (==2^keyBits).
36	// See http://en.wikipedia.org/wiki/Bloom_filter
37	template <unsigned keyBits>
38	class BloomFilter {
39	WTF_MAKE_FAST_ALLOCATED;
40	public:
41	static const size_t tableSize = `1` << keyBits;
42
43	BloomFilter();
44
45	void add(unsigned hash);
46	// For example SHA1::Digest.
47	template <size_t hashSize> void add(const std::array<uint8_t, hashSize>&);
48
49	void add(const BloomFilter&);
50
51	// The filter may give false positives (claim it may contain a key it doesn't)
52	// but never false negatives (claim it doesn't contain a key it does).
53	bool mayContain(unsigned hash) const;
54	template <size_t hashSize> bool mayContain(const std::array<uint8_t, hashSize>&) const;
55
56	void clear();
57
58	void add(const AtomString& string) { add(string.impl()->existingHash()); }
59	void add(const String& string) { add(string.impl()->hash()); }
60	bool mayContain(const AtomString& string) const { return mayContain(string.impl()->existingHash()); }
61	bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); }
62
63	private:
64	static const unsigned bitsPerPosition = `8` * sizeof(unsigned);
65	static const unsigned keyMask = (`1` << keyBits) - `1`;
66	static unsigned arrayIndex(unsigned key) { return key / bitsPerPosition; }
67	static unsigned bitMask(unsigned key) { return `1` << (key % bitsPerPosition); }
68	template <size_t hashSize> static std::pair<unsigned, unsigned> keysFromHash(const std::array<uint8_t, hashSize>&);
69
70	bool isBitSet(unsigned key) const;
71	void setBit(unsigned key);
72
73	std::array<unsigned, tableSize / bitsPerPosition> m_bitArray;
74	};
75
76	template <unsigned keyBits>
77	inline BloomFilter<keyBits>::BloomFilter()
78	: m_bitArray()
79	{
80	}
81
82	template <unsigned keyBits>
83	inline bool BloomFilter<keyBits>::mayContain(unsigned hash) const
84	{
85	// The top and bottom bits of the incoming hash are treated as independent bloom filter hash functions.
86	// This works well as long as the filter size is not much above 2^16.
87	return isBitSet(hash) && isBitSet(hash >> `16`);
88	}
89
90	template <unsigned keyBits>
91	inline void BloomFilter<keyBits>::add(unsigned hash)
92	{
93	setBit(hash);
94	setBit(hash >> `16`);
95	}
96
97	template <unsigned keyBits>
98	template <size_t hashSize>
99	inline std::pair<unsigned, unsigned> BloomFilter<keyBits>::keysFromHash(const std::array<uint8_t, hashSize>& hash)
100	{
101	// We could use larger k value than 2 for long hashes.
102	static_assert(hashSize >= `2` * sizeof(unsigned), "Hash array too short");
103	return {
104	*reinterpret_cast<const unsigned*>(hash.data()),
105	*reinterpret_cast<const unsigned>(hash.data() + sizeof(unsigned*))
106	};
107	}
108
109	template <unsigned keyBits>
110	template <size_t hashSize>
111	inline bool BloomFilter<keyBits>::mayContain(const std::array<uint8_t, hashSize>& hash) const
112	{
113	auto keys = keysFromHash(hash);
114	return isBitSet(keys.first) && isBitSet(keys.second);
115	}
116
117	template <unsigned keyBits>
118	template <size_t hashSize>
119	inline void BloomFilter<keyBits>::add(const std::array<uint8_t, hashSize>& hash)
120	{
121	auto keys = keysFromHash(hash);
122	setBit(keys.first);
123	setBit(keys.second);
124	}
125
126	template <unsigned keyBits>
127	inline void BloomFilter<keyBits>::add(const BloomFilter& other)
128	{
129	for (size_t i = `0`; i < m_bitArray.size(); ++i)
130	m_bitArray[i] \|= other.m_bitArray[i];
131	}
132
133	template <unsigned keyBits>
134	bool BloomFilter<keyBits>::isBitSet(unsigned key) const
135	{
136	unsigned maskedKey = key & keyMask;
137	ASSERT(arrayIndex(maskedKey) < m_bitArray.size());
138	return m_bitArray[arrayIndex(maskedKey)] & bitMask(maskedKey);
139	}
140
141	template <unsigned keyBits>
142	void BloomFilter<keyBits>::setBit(unsigned key)
143	{
144	unsigned maskedKey = key & keyMask;
145	ASSERT(arrayIndex(maskedKey) < m_bitArray.size());
146	m_bitArray[arrayIndex(maskedKey)] \|= bitMask(maskedKey);
147	}
148
149	template <unsigned keyBits>
150	inline void BloomFilter<keyBits>::clear()
151	{
152	m_bitArray.fill(`0`);
153	}
154
155	// Counting bloom filter with 8 bit counters. Uses 2^keyBits bytes of memory. Error rates as above.
156	// See http://en.wikipedia.org/wiki/Bloom_filter#Counting_filters
157	template <unsigned keyBits>
158	class CountingBloomFilter {
159	WTF_MAKE_FAST_ALLOCATED;
160	public:
161	static const size_t tableSize = `1` << keyBits;
162	static unsigned maximumCount() { return std::numeric_limits<uint8_t>::max(); }
163
164	CountingBloomFilter();
165
166	void add(unsigned hash);
167	void remove(unsigned hash);
168
169	// The filter may give false positives (claim it may contain a key it doesn't)
170	// but never false negatives (claim it doesn't contain a key it does).
171	bool mayContain(unsigned hash) const { return firstBucket(hash) && secondBucket(hash); }
172
173	// The filter must be cleared before reuse even if all keys are removed.
174	// Otherwise overflowed keys will stick around.
175	void clear();
176
177	void add(const AtomString& string) { add(string.impl()->existingHash()); }
178	void add(const String& string) { add(string.impl()->hash()); }
179	void remove(const AtomString& string) { remove(string.impl()->existingHash()); }
180	void remove(const String& string) { remove(string.impl()->hash()); }
181
182	bool mayContain(const AtomString& string) const { return mayContain(string.impl()->existingHash()); }
183	bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); }
184
185	#if !ASSERT_DISABLED
186	// Slow.
187	bool likelyEmpty() const;
188	bool isClear() const;
189	#endif
190
191	private:
192	static const unsigned keyMask = (`1` << keyBits) - `1`;
193
194	uint8_t& firstBucket(unsigned hash) { return m_buckets[hash & keyMask]; }
195	uint8_t& secondBucket(unsigned hash) { return m_buckets[(hash >> `16`) & keyMask]; }
196	const uint8_t& firstBucket(unsigned hash) const { return m_buckets[hash & keyMask]; }
197	const uint8_t& secondBucket(unsigned hash) const { return m_buckets[(hash >> `16`) & keyMask]; }
198
199	std::array<uint8_t, tableSize> m_buckets;
200	};
201
202	template <unsigned keyBits>
203	inline CountingBloomFilter<keyBits>::CountingBloomFilter()
204	: m_buckets()
205	{
206	}
207
208	template <unsigned keyBits>
209	inline void CountingBloomFilter<keyBits>::add(unsigned hash)
210	{
211	auto& first = firstBucket(hash);
212	auto& second = secondBucket(hash);
213	if (LIKELY(first < maximumCount()))
214	++first;
215	if (LIKELY(second < maximumCount()))
216	++second;
217	}
218
219	template <unsigned keyBits>
220	inline void CountingBloomFilter<keyBits>::remove(unsigned hash)
221	{
222	auto& first = firstBucket(hash);
223	auto& second = secondBucket(hash);
224	ASSERT(first);
225	ASSERT(second);
226	// In case of an overflow, the bucket sticks in the table until clear().
227	if (LIKELY(first < maximumCount()))
228	--first;
229	if (LIKELY(second < maximumCount()))
230	--second;
231	}
232
233	template <unsigned keyBits>
234	inline void CountingBloomFilter<keyBits>::clear()
235	{
236	m_buckets.fill(`0`);
237	}
238
239	#if !ASSERT_DISABLED
240	template <unsigned keyBits>
241	bool CountingBloomFilter<keyBits>::likelyEmpty() const
242	{
243	for (auto& bucket : m_buckets) {
244	if (bucket && bucket != maximumCount())
245	return false;
246	}
247	return true;
248	}
249
250	template <unsigned keyBits>
251	bool CountingBloomFilter<keyBits>::isClear() const
252	{
253	for (auto& bucket : m_buckets) {
254	if (bucket)
255	return false;
256	}
257	return true;
258	}
259	#endif
260
261	}
262
263	using WTF::BloomFilter;
264	using WTF::CountingBloomFilter;
265

Browse the source code of jsc/WebKitBuild/Debug/DerivedSources/ForwardingHeaders/wtf/BloomFilter.h