1/*
2 * Copyright (C) 2007 David Smith (catfish.man@gmail.com)
3 * Copyright (C) 2007, 2008, 2011-2014 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
19 */
20
21#include "config.h"
22#include "SpaceSplitString.h"
23
24#include "HTMLParserIdioms.h"
25#include <wtf/HashMap.h>
26#include <wtf/NeverDestroyed.h>
27#include <wtf/text/AtomStringHash.h>
28
29namespace WebCore {
30
31COMPILE_ASSERT(!(sizeof(SpaceSplitStringData) % sizeof(uintptr_t)), SpaceSplitStringDataTailIsAlignedToWordSize);
32
33template <typename CharacterType, typename TokenProcessor>
34static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const CharacterType* characters, unsigned length)
35{
36 for (unsigned start = 0; ; ) {
37 while (start < length && isHTMLSpace(characters[start]))
38 ++start;
39 if (start >= length)
40 break;
41 unsigned end = start + 1;
42 while (end < length && isNotHTMLSpace(characters[end]))
43 ++end;
44
45 if (!tokenProcessor.processToken(characters + start, end - start))
46 return;
47
48 start = end + 1;
49 }
50}
51
52template<typename TokenProcessor>
53static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const String& string)
54{
55 ASSERT(!string.isNull());
56
57 const StringImpl& stringImpl = *string.impl();
58 if (stringImpl.is8Bit())
59 tokenizeSpaceSplitString(tokenProcessor, stringImpl.characters8(), stringImpl.length());
60 else
61 tokenizeSpaceSplitString(tokenProcessor, stringImpl.characters16(), stringImpl.length());
62}
63
64bool SpaceSplitStringData::containsAll(SpaceSplitStringData& other)
65{
66 if (this == &other)
67 return true;
68
69 unsigned otherSize = other.m_size;
70 unsigned i = 0;
71 do {
72 if (!contains(other[i]))
73 return false;
74 ++i;
75 } while (i < otherSize);
76 return true;
77}
78
79struct SpaceSplitStringTableKeyTraits : public HashTraits<AtomString>
80{
81 // The number 200 for typicalNumberOfSpaceSplitString was based on the typical number of unique class names
82 // on typical websites on August 2013.
83 static const unsigned typicalNumberOfSpaceSplitString = 200;
84 static const int minimumTableSize = WTF::HashTableCapacityForSize<typicalNumberOfSpaceSplitString>::value;
85};
86
87typedef HashMap<AtomString, SpaceSplitStringData*, AtomStringHash, SpaceSplitStringTableKeyTraits> SpaceSplitStringTable;
88
89static SpaceSplitStringTable& spaceSplitStringTable()
90{
91 static NeverDestroyed<SpaceSplitStringTable> table;
92 return table;
93}
94
95void SpaceSplitString::set(const AtomString& inputString, bool shouldFoldCase)
96{
97 if (inputString.isNull()) {
98 clear();
99 return;
100 }
101 m_data = SpaceSplitStringData::create(shouldFoldCase ? inputString.convertToASCIILowercase() : inputString);
102}
103
104class TokenIsEqualToCStringTokenProcessor {
105public:
106 TokenIsEqualToCStringTokenProcessor(const char* referenceString, unsigned referenceStringLength)
107 : m_referenceString(referenceString)
108 , m_referenceStringLength(referenceStringLength)
109 , m_referenceStringWasFound(false)
110 {
111 }
112
113 template <typename CharacterType>
114 bool processToken(const CharacterType* characters, unsigned length)
115 {
116 if (length == m_referenceStringLength && equal(characters, reinterpret_cast<const LChar*>(m_referenceString), length)) {
117 m_referenceStringWasFound = true;
118 return false;
119 }
120 return true;
121 }
122
123 bool referenceStringWasFound() const { return m_referenceStringWasFound; }
124
125private:
126 const char* m_referenceString;
127 unsigned m_referenceStringLength;
128 bool m_referenceStringWasFound;
129};
130
131bool SpaceSplitString::spaceSplitStringContainsValue(const String& inputString, const char* value, unsigned valueLength, bool shouldFoldCase)
132{
133 if (inputString.isNull())
134 return false;
135
136 TokenIsEqualToCStringTokenProcessor tokenProcessor(value, valueLength);
137 tokenizeSpaceSplitString(tokenProcessor, shouldFoldCase ? inputString.impl()->convertToASCIILowercase() : inputString);
138 return tokenProcessor.referenceStringWasFound();
139}
140
141class TokenCounter {
142 WTF_MAKE_NONCOPYABLE(TokenCounter);
143public:
144 TokenCounter() : m_tokenCount(0) { }
145
146 template<typename CharacterType> bool processToken(const CharacterType*, unsigned)
147 {
148 ++m_tokenCount;
149 return true;
150 }
151
152 unsigned tokenCount() const { return m_tokenCount; }
153
154private:
155 unsigned m_tokenCount;
156};
157
158class TokenAtomStringInitializer {
159 WTF_MAKE_NONCOPYABLE(TokenAtomStringInitializer);
160public:
161 TokenAtomStringInitializer(AtomString* memory) : m_memoryBucket(memory) { }
162
163 template<typename CharacterType> bool processToken(const CharacterType* characters, unsigned length)
164 {
165 new (NotNull, m_memoryBucket) AtomString(characters, length);
166 ++m_memoryBucket;
167 return true;
168 }
169
170 const AtomString* nextMemoryBucket() const { return m_memoryBucket; }
171
172private:
173 AtomString* m_memoryBucket;
174};
175
176inline Ref<SpaceSplitStringData> SpaceSplitStringData::create(const AtomString& keyString, unsigned tokenCount)
177{
178 ASSERT(tokenCount);
179
180 RELEASE_ASSERT(tokenCount < (std::numeric_limits<unsigned>::max() - sizeof(SpaceSplitStringData)) / sizeof(AtomString));
181 unsigned sizeToAllocate = sizeof(SpaceSplitStringData) + tokenCount * sizeof(AtomString);
182 SpaceSplitStringData* spaceSplitStringData = static_cast<SpaceSplitStringData*>(fastMalloc(sizeToAllocate));
183
184 new (NotNull, spaceSplitStringData) SpaceSplitStringData(keyString, tokenCount);
185 AtomString* tokenArrayStart = spaceSplitStringData->tokenArrayStart();
186 TokenAtomStringInitializer tokenInitializer(tokenArrayStart);
187 tokenizeSpaceSplitString(tokenInitializer, keyString);
188 ASSERT(static_cast<unsigned>(tokenInitializer.nextMemoryBucket() - tokenArrayStart) == tokenCount);
189 ASSERT(reinterpret_cast<const char*>(tokenInitializer.nextMemoryBucket()) == reinterpret_cast<const char*>(spaceSplitStringData) + sizeToAllocate);
190
191 return adoptRef(*spaceSplitStringData);
192}
193
194RefPtr<SpaceSplitStringData> SpaceSplitStringData::create(const AtomString& keyString)
195{
196 ASSERT(isMainThread());
197 ASSERT(!keyString.isNull());
198
199 auto addResult = spaceSplitStringTable().add(keyString, nullptr);
200 if (!addResult.isNewEntry)
201 return addResult.iterator->value;
202
203 // Nothing in the cache? Let's create a new SpaceSplitStringData if the input has something useful.
204 // 1) We find the number of strings in the input to know how much size we need to allocate.
205 TokenCounter tokenCounter;
206 tokenizeSpaceSplitString(tokenCounter, keyString);
207 unsigned tokenCount = tokenCounter.tokenCount();
208
209 if (!tokenCount)
210 return nullptr;
211
212 RefPtr<SpaceSplitStringData> spaceSplitStringData = create(keyString, tokenCount);
213 addResult.iterator->value = spaceSplitStringData.get();
214 return spaceSplitStringData;
215}
216
217
218void SpaceSplitStringData::destroy(SpaceSplitStringData* spaceSplitString)
219{
220 ASSERT(isMainThread());
221
222 spaceSplitStringTable().remove(spaceSplitString->m_keyString);
223
224 unsigned i = 0;
225 unsigned size = spaceSplitString->size();
226 const AtomString* data = spaceSplitString->tokenArrayStart();
227 do {
228 data[i].~AtomString();
229 ++i;
230 } while (i < size);
231
232 spaceSplitString->~SpaceSplitStringData();
233
234 fastFree(spaceSplitString);
235}
236
237} // namespace WebCore
238