1/*
2
3Copyright (C) 2014-2019 Apple Inc. All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
81. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25*/
26
27#include "config.h"
28#include <wtf/text/StringView.h>
29
30#include <mutex>
31#include <unicode/ubrk.h>
32#include <unicode/unorm2.h>
33#include <wtf/HashMap.h>
34#include <wtf/Lock.h>
35#include <wtf/NeverDestroyed.h>
36#include <wtf/Optional.h>
37#include <wtf/text/TextBreakIterator.h>
38#include <wtf/unicode/UTF8Conversion.h>
39
40namespace WTF {
41
42using namespace Unicode;
43
44bool StringView::containsIgnoringASCIICase(const StringView& matchString) const
45{
46 return findIgnoringASCIICase(matchString) != notFound;
47}
48
49bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
50{
51 return findIgnoringASCIICase(matchString, startOffset) != notFound;
52}
53
54size_t StringView::findIgnoringASCIICase(const StringView& matchString) const
55{
56 return ::WTF::findIgnoringASCIICase(*this, matchString, 0);
57}
58
59size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
60{
61 return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
62}
63
64bool StringView::startsWith(const StringView& prefix) const
65{
66 return ::WTF::startsWith(*this, prefix);
67}
68
69bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const
70{
71 return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
72}
73
74bool StringView::endsWith(const StringView& suffix) const
75{
76 return ::WTF::endsWith(*this, suffix);
77}
78
79bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const
80{
81 return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
82}
83
84Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const
85{
86 if (isNull())
87 return CString("", 0);
88 if (is8Bit())
89 return StringImpl::utf8ForCharacters(characters8(), length());
90 return StringImpl::utf8ForCharacters(characters16(), length(), mode);
91}
92
93CString StringView::utf8(ConversionMode mode) const
94{
95 auto expectedString = tryGetUtf8(mode);
96 RELEASE_ASSERT(expectedString);
97 return expectedString.value();
98}
99
100size_t StringView::find(StringView matchString, unsigned start) const
101{
102 return findCommon(*this, matchString, start);
103}
104
105void StringView::SplitResult::Iterator::findNextSubstring()
106{
107 for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) {
108 if (m_result.m_allowEmptyEntries || separatorPosition > m_position) {
109 m_length = separatorPosition - m_position;
110 return;
111 }
112 }
113 m_length = m_result.m_string.length() - m_position;
114 if (!m_length && !m_result.m_allowEmptyEntries)
115 m_isDone = true;
116}
117
118auto StringView::SplitResult::Iterator::operator++() -> Iterator&
119{
120 ASSERT(m_position <= m_result.m_string.length() && !m_isDone);
121 m_position += m_length;
122 if (m_position < m_result.m_string.length()) {
123 ++m_position;
124 findNextSubstring();
125 } else if (!m_isDone)
126 m_isDone = true;
127 return *this;
128}
129
130class StringView::GraphemeClusters::Iterator::Impl {
131 WTF_MAKE_FAST_ALLOCATED;
132public:
133 Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
134 : m_stringView(stringView)
135 , m_iterator(WTFMove(iterator))
136 , m_index(index)
137 , m_indexEnd(computeIndexEnd())
138 {
139 }
140
141 void operator++()
142 {
143 ASSERT(m_indexEnd > m_index);
144 m_index = m_indexEnd;
145 m_indexEnd = computeIndexEnd();
146 }
147
148 StringView operator*() const
149 {
150 if (m_stringView.is8Bit())
151 return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
152 return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
153 }
154
155 bool operator==(const Impl& other) const
156 {
157 ASSERT(&m_stringView == &other.m_stringView);
158 auto result = m_index == other.m_index;
159 ASSERT(!result || m_indexEnd == other.m_indexEnd);
160 return result;
161 }
162
163 unsigned computeIndexEnd()
164 {
165 if (!m_iterator)
166 return 0;
167 if (m_index == m_stringView.length())
168 return m_index;
169 return ubrk_following(m_iterator.value(), m_index);
170 }
171
172private:
173 const StringView& m_stringView;
174 Optional<NonSharedCharacterBreakIterator> m_iterator;
175 unsigned m_index;
176 unsigned m_indexEnd;
177};
178
179StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
180 : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
181{
182}
183
184StringView::GraphemeClusters::Iterator::~Iterator()
185{
186}
187
188StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
189 : m_impl(WTFMove(other.m_impl))
190{
191}
192
193auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
194{
195 ++(*m_impl);
196 return *this;
197}
198
199StringView StringView::GraphemeClusters::Iterator::operator*() const
200{
201 return **m_impl;
202}
203
204bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
205{
206 return *m_impl == *(other.m_impl);
207}
208
209bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
210{
211 return !(*this == other);
212}
213
214enum class ASCIICase { Lower, Upper };
215
216template<ASCIICase type, typename CharacterType>
217String convertASCIICase(const CharacterType* input, unsigned length)
218{
219 if (!input)
220 return { };
221
222 CharacterType* characters;
223 auto result = String::createUninitialized(length, characters);
224 for (unsigned i = 0; i < length; ++i)
225 characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]);
226 return result;
227}
228
229String StringView::convertToASCIILowercase() const
230{
231 if (m_is8Bit)
232 return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length);
233 return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length);
234}
235
236String StringView::convertToASCIIUppercase() const
237{
238 if (m_is8Bit)
239 return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length);
240 return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length);
241}
242
243StringViewWithUnderlyingString normalizedNFC(StringView string)
244{
245 // Latin-1 characters are unaffected by normalization.
246 if (string.is8Bit())
247 return { string, { } };
248
249 UErrorCode status = U_ZERO_ERROR;
250 const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
251 ASSERT(U_SUCCESS(status));
252
253 // No need to normalize if already normalized.
254 UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status);
255 if (checkResult)
256 return { string, { } };
257
258 unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status);
259 ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
260
261 UChar* characters;
262 String result = String::createUninitialized(normalizedLength, characters);
263
264 status = U_ZERO_ERROR;
265 unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status);
266 ASSERT(U_SUCCESS(status));
267
268 StringView view { result };
269 return { view, WTFMove(result) };
270}
271
272String normalizedNFC(const String& string)
273{
274 auto result = normalizedNFC(StringView { string });
275 if (result.underlyingString.isNull())
276 return string;
277 return result.underlyingString;
278}
279
280#if CHECK_STRINGVIEW_LIFETIME
281
282// Manage reference count manually so UnderlyingString does not need to be defined in the header.
283
284struct StringView::UnderlyingString {
285 std::atomic_uint refCount { 1u };
286 bool isValid { true };
287 const StringImpl& string;
288 explicit UnderlyingString(const StringImpl&);
289};
290
291StringView::UnderlyingString::UnderlyingString(const StringImpl& string)
292 : string(string)
293{
294}
295
296static Lock underlyingStringsMutex;
297
298static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings()
299{
300 static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map;
301 return map;
302}
303
304void StringView::invalidate(const StringImpl& stringToBeDestroyed)
305{
306 UnderlyingString* underlyingString;
307 {
308 std::lock_guard<Lock> lock(underlyingStringsMutex);
309 underlyingString = underlyingStrings().take(&stringToBeDestroyed);
310 if (!underlyingString)
311 return;
312 }
313 ASSERT(underlyingString->isValid);
314 underlyingString->isValid = false;
315}
316
317bool StringView::underlyingStringIsValid() const
318{
319 return !m_underlyingString || m_underlyingString->isValid;
320}
321
322void StringView::adoptUnderlyingString(UnderlyingString* underlyingString)
323{
324 if (m_underlyingString) {
325 std::lock_guard<Lock> lock(underlyingStringsMutex);
326 if (!--m_underlyingString->refCount) {
327 if (m_underlyingString->isValid) {
328 underlyingStrings().remove(&m_underlyingString->string);
329 }
330 delete m_underlyingString;
331 }
332 }
333 m_underlyingString = underlyingString;
334}
335
336void StringView::setUnderlyingString(const StringImpl* string)
337{
338 UnderlyingString* underlyingString;
339 if (!string)
340 underlyingString = nullptr;
341 else {
342 std::lock_guard<Lock> lock(underlyingStringsMutex);
343 auto result = underlyingStrings().add(string, nullptr);
344 if (result.isNewEntry)
345 result.iterator->value = new UnderlyingString(*string);
346 else
347 ++result.iterator->value->refCount;
348 underlyingString = result.iterator->value;
349 }
350 adoptUnderlyingString(underlyingString);
351}
352
353void StringView::setUnderlyingString(const StringView& otherString)
354{
355 UnderlyingString* underlyingString = otherString.m_underlyingString;
356 if (underlyingString)
357 ++underlyingString->refCount;
358 adoptUnderlyingString(underlyingString);
359}
360
361#endif // CHECK_STRINGVIEW_LIFETIME
362
363} // namespace WTF
364