1 | /* |
2 | Copyright (C) 2004-2016 Apple Inc. All rights reserved. |
3 | |
4 | This library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Library General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2 of the License, or (at your option) any later version. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Library General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Library General Public License |
15 | along with this library; see the file COPYING.LIB. If not, write to |
16 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
17 | Boston, MA 02110-1301, USA. |
18 | */ |
19 | |
20 | #pragma once |
21 | |
22 | #include <wtf/Deque.h> |
23 | #include <wtf/text/WTFString.h> |
24 | |
25 | namespace WebCore { |
26 | |
27 | // FIXME: This should not start with "k". |
28 | // FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now. |
29 | constexpr LChar kEndOfFileMarker = 0; |
30 | |
31 | class SegmentedString { |
32 | public: |
33 | SegmentedString() = default; |
34 | SegmentedString(String&&); |
35 | SegmentedString(const String&); |
36 | |
37 | SegmentedString(SegmentedString&&) = delete; |
38 | SegmentedString(const SegmentedString&) = delete; |
39 | |
40 | SegmentedString& operator=(SegmentedString&&); |
41 | SegmentedString& operator=(const SegmentedString&) = default; |
42 | |
43 | void clear(); |
44 | void close(); |
45 | |
46 | void append(SegmentedString&&); |
47 | void append(const SegmentedString&); |
48 | |
49 | void append(String&&); |
50 | void append(const String&); |
51 | |
52 | void pushBack(String&&); |
53 | |
54 | void setExcludeLineNumbers(); |
55 | |
56 | bool isEmpty() const { return !m_currentSubstring.length; } |
57 | unsigned length() const; |
58 | |
59 | bool isClosed() const { return m_isClosed; } |
60 | |
61 | void advance(); |
62 | void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline. |
63 | void advancePastNewline(); // Faster than calling advance when we know the current character is a newline. |
64 | |
65 | enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters }; |
66 | template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); } |
67 | template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); } |
68 | |
69 | unsigned numberOfCharactersConsumed() const; |
70 | |
71 | String toString() const; |
72 | |
73 | UChar currentCharacter() const { return m_currentCharacter; } |
74 | |
75 | OrdinalNumber currentColumn() const; |
76 | OrdinalNumber currentLine() const; |
77 | |
78 | // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog |
79 | // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. |
80 | void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength); |
81 | |
82 | private: |
83 | struct Substring { |
84 | Substring() = default; |
85 | Substring(String&&); |
86 | |
87 | UChar currentCharacter() const; |
88 | UChar currentCharacterPreIncrement(); |
89 | |
90 | unsigned numberOfCharactersConsumed() const; |
91 | void appendTo(StringBuilder&) const; |
92 | |
93 | String string; |
94 | unsigned length { 0 }; |
95 | bool is8Bit; |
96 | union { |
97 | const LChar* currentCharacter8; |
98 | const UChar* currentCharacter16; |
99 | }; |
100 | bool doNotExcludeLineNumbers { true }; |
101 | }; |
102 | |
103 | enum FastPathFlags { |
104 | NoFastPath = 0, |
105 | Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, |
106 | Use8BitAdvance = 1 << 1, |
107 | }; |
108 | |
109 | void appendSubstring(Substring&&); |
110 | |
111 | void processPossibleNewline(); |
112 | void startNewLine(); |
113 | |
114 | void advanceWithoutUpdatingLineNumber(); |
115 | void advanceWithoutUpdatingLineNumber16(); |
116 | void advanceAndUpdateLineNumber16(); |
117 | void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber(); |
118 | void advancePastSingleCharacterSubstring(); |
119 | void advanceEmpty(); |
120 | |
121 | void updateAdvanceFunctionPointers(); |
122 | void updateAdvanceFunctionPointersForEmptyString(); |
123 | void updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
124 | |
125 | void decrementAndCheckLength(); |
126 | |
127 | template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase); |
128 | template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]); |
129 | AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase); |
130 | |
131 | Substring m_currentSubstring; |
132 | Deque<Substring> m_otherSubstrings; |
133 | |
134 | bool m_isClosed { false }; |
135 | |
136 | UChar m_currentCharacter { 0 }; |
137 | |
138 | unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 }; |
139 | unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 }; |
140 | int m_currentLine { 0 }; |
141 | |
142 | unsigned char m_fastPathFlags { NoFastPath }; |
143 | void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty }; |
144 | void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty }; |
145 | }; |
146 | |
147 | inline SegmentedString::Substring::Substring(String&& passedString) |
148 | : string(WTFMove(passedString)) |
149 | , length(string.length()) |
150 | { |
151 | if (length) { |
152 | is8Bit = string.impl()->is8Bit(); |
153 | if (is8Bit) |
154 | currentCharacter8 = string.impl()->characters8(); |
155 | else |
156 | currentCharacter16 = string.impl()->characters16(); |
157 | } |
158 | } |
159 | |
160 | inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const |
161 | { |
162 | return string.length() - length; |
163 | } |
164 | |
165 | ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const |
166 | { |
167 | ASSERT(length); |
168 | return is8Bit ? *currentCharacter8 : *currentCharacter16; |
169 | } |
170 | |
171 | ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement() |
172 | { |
173 | ASSERT(length); |
174 | return is8Bit ? *++currentCharacter8 : *++currentCharacter16; |
175 | } |
176 | |
177 | inline SegmentedString::SegmentedString(String&& string) |
178 | : m_currentSubstring(WTFMove(string)) |
179 | { |
180 | if (m_currentSubstring.length) { |
181 | m_currentCharacter = m_currentSubstring.currentCharacter(); |
182 | updateAdvanceFunctionPointers(); |
183 | } |
184 | } |
185 | |
186 | inline SegmentedString::SegmentedString(const String& string) |
187 | : SegmentedString(String { string }) |
188 | { |
189 | } |
190 | |
191 | ALWAYS_INLINE void SegmentedString::decrementAndCheckLength() |
192 | { |
193 | ASSERT(m_currentSubstring.length > 1); |
194 | if (UNLIKELY(--m_currentSubstring.length == 1)) |
195 | updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
196 | } |
197 | |
198 | ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber() |
199 | { |
200 | if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { |
201 | m_currentCharacter = *++m_currentSubstring.currentCharacter8; |
202 | decrementAndCheckLength(); |
203 | return; |
204 | } |
205 | |
206 | (this->*m_advanceWithoutUpdatingLineNumberFunction)(); |
207 | } |
208 | |
209 | inline void SegmentedString::startNewLine() |
210 | { |
211 | ++m_currentLine; |
212 | m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed(); |
213 | } |
214 | |
215 | inline void SegmentedString::processPossibleNewline() |
216 | { |
217 | if (m_currentCharacter == '\n') |
218 | startNewLine(); |
219 | } |
220 | |
221 | inline void SegmentedString::advance() |
222 | { |
223 | if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { |
224 | ASSERT(m_currentSubstring.length > 1); |
225 | bool lastCharacterWasNewline = m_currentCharacter == '\n'; |
226 | m_currentCharacter = *++m_currentSubstring.currentCharacter8; |
227 | bool haveOneCharacterLeft = --m_currentSubstring.length == 1; |
228 | if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft))) |
229 | return; |
230 | if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers)) |
231 | startNewLine(); |
232 | if (haveOneCharacterLeft) |
233 | updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
234 | return; |
235 | } |
236 | |
237 | (this->*m_advanceAndUpdateLineNumberFunction)(); |
238 | } |
239 | |
240 | ALWAYS_INLINE void SegmentedString::advancePastNonNewline() |
241 | { |
242 | ASSERT(m_currentCharacter != '\n'); |
243 | advanceWithoutUpdatingLineNumber(); |
244 | } |
245 | |
246 | inline void SegmentedString::advancePastNewline() |
247 | { |
248 | ASSERT(m_currentCharacter == '\n'); |
249 | if (m_currentSubstring.length > 1) { |
250 | if (m_currentSubstring.doNotExcludeLineNumbers) |
251 | startNewLine(); |
252 | m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement(); |
253 | decrementAndCheckLength(); |
254 | return; |
255 | } |
256 | |
257 | (this->*m_advanceAndUpdateLineNumberFunction)(); |
258 | } |
259 | |
260 | inline unsigned SegmentedString::numberOfCharactersConsumed() const |
261 | { |
262 | return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed(); |
263 | } |
264 | |
265 | template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase) |
266 | { |
267 | return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b; |
268 | } |
269 | |
270 | template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator]) |
271 | { |
272 | constexpr unsigned length = lengthIncludingTerminator - 1; |
273 | ASSERT(!literal[length]); |
274 | ASSERT(!strchr(literal, '\n')); |
275 | if (length + 1 < m_currentSubstring.length) { |
276 | if (m_currentSubstring.is8Bit) { |
277 | for (unsigned i = 0; i < length; ++i) { |
278 | if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase)) |
279 | return DidNotMatch; |
280 | } |
281 | m_currentSubstring.currentCharacter8 += length; |
282 | m_currentCharacter = *m_currentSubstring.currentCharacter8; |
283 | } else { |
284 | for (unsigned i = 0; i < length; ++i) { |
285 | if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase)) |
286 | return DidNotMatch; |
287 | } |
288 | m_currentSubstring.currentCharacter16 += length; |
289 | m_currentCharacter = *m_currentSubstring.currentCharacter16; |
290 | } |
291 | m_currentSubstring.length -= length; |
292 | return DidMatch; |
293 | } |
294 | return advancePastSlowCase(literal, lettersIgnoringASCIICase); |
295 | } |
296 | |
297 | inline void SegmentedString::updateAdvanceFunctionPointers() |
298 | { |
299 | if (m_currentSubstring.length > 1) { |
300 | if (m_currentSubstring.is8Bit) { |
301 | m_fastPathFlags = Use8BitAdvance; |
302 | if (m_currentSubstring.doNotExcludeLineNumbers) |
303 | m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; |
304 | return; |
305 | } |
306 | m_fastPathFlags = NoFastPath; |
307 | m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; |
308 | if (m_currentSubstring.doNotExcludeLineNumbers) |
309 | m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16; |
310 | else |
311 | m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; |
312 | return; |
313 | } |
314 | |
315 | if (!m_currentSubstring.length) { |
316 | updateAdvanceFunctionPointersForEmptyString(); |
317 | return; |
318 | } |
319 | |
320 | updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
321 | } |
322 | |
323 | } |
324 | |