1/*
2 Copyright (C) 2004-2016 Apple Inc. All rights reserved.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either
7 version 2 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to
16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 Boston, MA 02110-1301, USA.
18*/
19
20#pragma once
21
22#include <wtf/Deque.h>
23#include <wtf/text/WTFString.h>
24
25namespace WebCore {
26
27// FIXME: This should not start with "k".
28// FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now.
29constexpr LChar kEndOfFileMarker = 0;
30
31class SegmentedString {
32public:
33 SegmentedString() = default;
34 SegmentedString(String&&);
35 SegmentedString(const String&);
36
37 SegmentedString(SegmentedString&&) = delete;
38 SegmentedString(const SegmentedString&) = delete;
39
40 SegmentedString& operator=(SegmentedString&&);
41 SegmentedString& operator=(const SegmentedString&) = default;
42
43 void clear();
44 void close();
45
46 void append(SegmentedString&&);
47 void append(const SegmentedString&);
48
49 void append(String&&);
50 void append(const String&);
51
52 void pushBack(String&&);
53
54 void setExcludeLineNumbers();
55
56 bool isEmpty() const { return !m_currentSubstring.length; }
57 unsigned length() const;
58
59 bool isClosed() const { return m_isClosed; }
60
61 void advance();
62 void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline.
63 void advancePastNewline(); // Faster than calling advance when we know the current character is a newline.
64
65 enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters };
66 template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); }
67 template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); }
68
69 unsigned numberOfCharactersConsumed() const;
70
71 String toString() const;
72
73 UChar currentCharacter() const { return m_currentCharacter; }
74
75 OrdinalNumber currentColumn() const;
76 OrdinalNumber currentLine() const;
77
78 // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog
79 // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
80 void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength);
81
82private:
83 struct Substring {
84 Substring() = default;
85 Substring(String&&);
86
87 UChar currentCharacter() const;
88 UChar currentCharacterPreIncrement();
89
90 unsigned numberOfCharactersConsumed() const;
91 void appendTo(StringBuilder&) const;
92
93 String string;
94 unsigned length { 0 };
95 bool is8Bit;
96 union {
97 const LChar* currentCharacter8;
98 const UChar* currentCharacter16;
99 };
100 bool doNotExcludeLineNumbers { true };
101 };
102
103 enum FastPathFlags {
104 NoFastPath = 0,
105 Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
106 Use8BitAdvance = 1 << 1,
107 };
108
109 void appendSubstring(Substring&&);
110
111 void processPossibleNewline();
112 void startNewLine();
113
114 void advanceWithoutUpdatingLineNumber();
115 void advanceWithoutUpdatingLineNumber16();
116 void advanceAndUpdateLineNumber16();
117 void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber();
118 void advancePastSingleCharacterSubstring();
119 void advanceEmpty();
120
121 void updateAdvanceFunctionPointers();
122 void updateAdvanceFunctionPointersForEmptyString();
123 void updateAdvanceFunctionPointersForSingleCharacterSubstring();
124
125 void decrementAndCheckLength();
126
127 template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase);
128 template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]);
129 AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase);
130
131 Substring m_currentSubstring;
132 Deque<Substring> m_otherSubstrings;
133
134 bool m_isClosed { false };
135
136 UChar m_currentCharacter { 0 };
137
138 unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 };
139 unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 };
140 int m_currentLine { 0 };
141
142 unsigned char m_fastPathFlags { NoFastPath };
143 void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty };
144 void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty };
145};
146
147inline SegmentedString::Substring::Substring(String&& passedString)
148 : string(WTFMove(passedString))
149 , length(string.length())
150{
151 if (length) {
152 is8Bit = string.impl()->is8Bit();
153 if (is8Bit)
154 currentCharacter8 = string.impl()->characters8();
155 else
156 currentCharacter16 = string.impl()->characters16();
157 }
158}
159
160inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const
161{
162 return string.length() - length;
163}
164
165ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const
166{
167 ASSERT(length);
168 return is8Bit ? *currentCharacter8 : *currentCharacter16;
169}
170
171ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement()
172{
173 ASSERT(length);
174 return is8Bit ? *++currentCharacter8 : *++currentCharacter16;
175}
176
177inline SegmentedString::SegmentedString(String&& string)
178 : m_currentSubstring(WTFMove(string))
179{
180 if (m_currentSubstring.length) {
181 m_currentCharacter = m_currentSubstring.currentCharacter();
182 updateAdvanceFunctionPointers();
183 }
184}
185
186inline SegmentedString::SegmentedString(const String& string)
187 : SegmentedString(String { string })
188{
189}
190
191ALWAYS_INLINE void SegmentedString::decrementAndCheckLength()
192{
193 ASSERT(m_currentSubstring.length > 1);
194 if (UNLIKELY(--m_currentSubstring.length == 1))
195 updateAdvanceFunctionPointersForSingleCharacterSubstring();
196}
197
198ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber()
199{
200 if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
201 m_currentCharacter = *++m_currentSubstring.currentCharacter8;
202 decrementAndCheckLength();
203 return;
204 }
205
206 (this->*m_advanceWithoutUpdatingLineNumberFunction)();
207}
208
209inline void SegmentedString::startNewLine()
210{
211 ++m_currentLine;
212 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
213}
214
215inline void SegmentedString::processPossibleNewline()
216{
217 if (m_currentCharacter == '\n')
218 startNewLine();
219}
220
221inline void SegmentedString::advance()
222{
223 if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
224 ASSERT(m_currentSubstring.length > 1);
225 bool lastCharacterWasNewline = m_currentCharacter == '\n';
226 m_currentCharacter = *++m_currentSubstring.currentCharacter8;
227 bool haveOneCharacterLeft = --m_currentSubstring.length == 1;
228 if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft)))
229 return;
230 if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers))
231 startNewLine();
232 if (haveOneCharacterLeft)
233 updateAdvanceFunctionPointersForSingleCharacterSubstring();
234 return;
235 }
236
237 (this->*m_advanceAndUpdateLineNumberFunction)();
238}
239
240ALWAYS_INLINE void SegmentedString::advancePastNonNewline()
241{
242 ASSERT(m_currentCharacter != '\n');
243 advanceWithoutUpdatingLineNumber();
244}
245
246inline void SegmentedString::advancePastNewline()
247{
248 ASSERT(m_currentCharacter == '\n');
249 if (m_currentSubstring.length > 1) {
250 if (m_currentSubstring.doNotExcludeLineNumbers)
251 startNewLine();
252 m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement();
253 decrementAndCheckLength();
254 return;
255 }
256
257 (this->*m_advanceAndUpdateLineNumberFunction)();
258}
259
260inline unsigned SegmentedString::numberOfCharactersConsumed() const
261{
262 return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed();
263}
264
265template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase)
266{
267 return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b;
268}
269
270template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator])
271{
272 constexpr unsigned length = lengthIncludingTerminator - 1;
273 ASSERT(!literal[length]);
274 ASSERT(!strchr(literal, '\n'));
275 if (length + 1 < m_currentSubstring.length) {
276 if (m_currentSubstring.is8Bit) {
277 for (unsigned i = 0; i < length; ++i) {
278 if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase))
279 return DidNotMatch;
280 }
281 m_currentSubstring.currentCharacter8 += length;
282 m_currentCharacter = *m_currentSubstring.currentCharacter8;
283 } else {
284 for (unsigned i = 0; i < length; ++i) {
285 if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase))
286 return DidNotMatch;
287 }
288 m_currentSubstring.currentCharacter16 += length;
289 m_currentCharacter = *m_currentSubstring.currentCharacter16;
290 }
291 m_currentSubstring.length -= length;
292 return DidMatch;
293 }
294 return advancePastSlowCase(literal, lettersIgnoringASCIICase);
295}
296
297inline void SegmentedString::updateAdvanceFunctionPointers()
298{
299 if (m_currentSubstring.length > 1) {
300 if (m_currentSubstring.is8Bit) {
301 m_fastPathFlags = Use8BitAdvance;
302 if (m_currentSubstring.doNotExcludeLineNumbers)
303 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
304 return;
305 }
306 m_fastPathFlags = NoFastPath;
307 m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
308 if (m_currentSubstring.doNotExcludeLineNumbers)
309 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16;
310 else
311 m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
312 return;
313 }
314
315 if (!m_currentSubstring.length) {
316 updateAdvanceFunctionPointersForEmptyString();
317 return;
318 }
319
320 updateAdvanceFunctionPointersForSingleCharacterSubstring();
321}
322
323}
324