Lexer.cpp source code [jsc/Source/JavaScriptCore/parser/Lexer.cpp]

1	/*
2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3	* Copyright (C) 2006-2019 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5	* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6	* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7	*
8	* This library is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU Library General Public
10	* License as published by the Free Software Foundation; either
11	* version 2 of the License, or (at your option) any later version.
12	*
13	* This library is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	* Library General Public License for more details.
17	*
18	* You should have received a copy of the GNU Library General Public License
19	* along with this library; see the file COPYING.LIB. If not, write to
20	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21	* Boston, MA 02110-1301, USA.
22	*
23	*/
24
25	#include "config.h"
26	#include "Lexer.h"
27
28	#include "BuiltinNames.h"
29	#include "Identifier.h"
30	#include "JSCInlines.h"
31	#include "JSFunctionInlines.h"
32	#include "KeywordLookup.h"
33	#include "Lexer.lut.h"
34	#include "Nodes.h"
35	#include "ParseInt.h"
36	#include "Parser.h"
37	#include <ctype.h>
38	#include <limits.h>
39	#include <string.h>
40	#include <wtf/Assertions.h>
41	#include <wtf/HexNumber.h>
42	#include <wtf/Variant.h>
43	#include <wtf/dtoa.h>
44
45	namespace JSC {
46
47	bool isLexerKeyword(const Identifier& identifier)
48	{
49	return JSC::mainTable.entry(identifier);
50	}
51
52	enum CharacterType {
53	// Types for the main switch
54
55	// The first three types are fixed, and also used for identifying
56	// ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
57	CharacterIdentifierStart,
58	CharacterZero,
59	CharacterNumber,
60
61	// For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
62	// (http://unicode.org/reports/tr31/#Backward_Compatibility)
63	CharacterOtherIdentifierPart,
64
65	CharacterInvalid,
66	CharacterLineTerminator,
67	CharacterExclamationMark,
68	CharacterOpenParen,
69	CharacterCloseParen,
70	CharacterOpenBracket,
71	CharacterCloseBracket,
72	CharacterComma,
73	CharacterColon,
74	CharacterQuestion,
75	CharacterTilde,
76	CharacterQuote,
77	CharacterBackQuote,
78	CharacterDot,
79	CharacterSlash,
80	CharacterBackSlash,
81	CharacterSemicolon,
82	CharacterOpenBrace,
83	CharacterCloseBrace,
84
85	CharacterAdd,
86	CharacterSub,
87	CharacterMultiply,
88	CharacterModulo,
89	CharacterAnd,
90	CharacterXor,
91	CharacterOr,
92	CharacterLess,
93	CharacterGreater,
94	CharacterEqual,
95
96	// Other types (only one so far)
97	CharacterWhiteSpace,
98	CharacterHash,
99	CharacterPrivateIdentifierStart
100	};
101
102	// 256 Latin-1 codes
103	static constexpr const unsigned short typesOfLatin1Characters[`256`] = {
104	/ 0 - Null / CharacterInvalid,
105	/ 1 - Start of Heading / CharacterInvalid,
106	/ 2 - Start of Text / CharacterInvalid,
107	/ 3 - End of Text / CharacterInvalid,
108	/ 4 - End of Transm. / CharacterInvalid,
109	/ 5 - Enquiry / CharacterInvalid,
110	/ 6 - Acknowledgment / CharacterInvalid,
111	/ 7 - Bell / CharacterInvalid,
112	/ 8 - Back Space / CharacterInvalid,
113	/ 9 - Horizontal Tab / CharacterWhiteSpace,
114	/ 10 - Line Feed / CharacterLineTerminator,
115	/ 11 - Vertical Tab / CharacterWhiteSpace,
116	/ 12 - Form Feed / CharacterWhiteSpace,
117	/ 13 - Carriage Return / CharacterLineTerminator,
118	/ 14 - Shift Out / CharacterInvalid,
119	/ 15 - Shift In / CharacterInvalid,
120	/ 16 - Data Line Escape / CharacterInvalid,
121	/ 17 - Device Control 1 / CharacterInvalid,
122	/ 18 - Device Control 2 / CharacterInvalid,
123	/ 19 - Device Control 3 / CharacterInvalid,
124	/ 20 - Device Control 4 / CharacterInvalid,
125	/ 21 - Negative Ack. / CharacterInvalid,
126	/ 22 - Synchronous Idle / CharacterInvalid,
127	/ 23 - End of Transmit / CharacterInvalid,
128	/ 24 - Cancel / CharacterInvalid,
129	/ 25 - End of Medium / CharacterInvalid,
130	/ 26 - Substitute / CharacterInvalid,
131	/ 27 - Escape / CharacterInvalid,
132	/ 28 - File Separator / CharacterInvalid,
133	/ 29 - Group Separator / CharacterInvalid,
134	/ 30 - Record Separator / CharacterInvalid,
135	/ 31 - Unit Separator / CharacterInvalid,
136	/ 32 - Space / CharacterWhiteSpace,
137	/ 33 - ! / CharacterExclamationMark,
138	/ 34 - " / CharacterQuote,
139	/ 35 - # / CharacterHash,
140	/ 36 - $ / CharacterIdentifierStart,
141	/ 37 - % / CharacterModulo,
142	/ 38 - & / CharacterAnd,
143	/ 39 - ' / CharacterQuote,
144	/ 40 - ( / CharacterOpenParen,
145	/ 41 - ) / CharacterCloseParen,
146	/ 42 - * / CharacterMultiply,
147	/ 43 - + / CharacterAdd,
148	/ 44 - , / CharacterComma,
149	/ 45 - - / CharacterSub,
150	/ 46 - . / CharacterDot,
151	/ 47 - / / CharacterSlash,
152	/ 48 - 0 / CharacterZero,
153	/ 49 - 1 / CharacterNumber,
154	/ 50 - 2 / CharacterNumber,
155	/ 51 - 3 / CharacterNumber,
156	/ 52 - 4 / CharacterNumber,
157	/ 53 - 5 / CharacterNumber,
158	/ 54 - 6 / CharacterNumber,
159	/ 55 - 7 / CharacterNumber,
160	/ 56 - 8 / CharacterNumber,
161	/ 57 - 9 / CharacterNumber,
162	/ 58 - : / CharacterColon,
163	/ 59 - ; / CharacterSemicolon,
164	/ 60 - < / CharacterLess,
165	/ 61 - = / CharacterEqual,
166	/ 62 - > / CharacterGreater,
167	/ 63 - ? / CharacterQuestion,
168	/ 64 - @ / CharacterPrivateIdentifierStart,
169	/ 65 - A / CharacterIdentifierStart,
170	/ 66 - B / CharacterIdentifierStart,
171	/ 67 - C / CharacterIdentifierStart,
172	/ 68 - D / CharacterIdentifierStart,
173	/ 69 - E / CharacterIdentifierStart,
174	/ 70 - F / CharacterIdentifierStart,
175	/ 71 - G / CharacterIdentifierStart,
176	/ 72 - H / CharacterIdentifierStart,
177	/ 73 - I / CharacterIdentifierStart,
178	/ 74 - J / CharacterIdentifierStart,
179	/ 75 - K / CharacterIdentifierStart,
180	/ 76 - L / CharacterIdentifierStart,
181	/ 77 - M / CharacterIdentifierStart,
182	/ 78 - N / CharacterIdentifierStart,
183	/ 79 - O / CharacterIdentifierStart,
184	/ 80 - P / CharacterIdentifierStart,
185	/ 81 - Q / CharacterIdentifierStart,
186	/ 82 - R / CharacterIdentifierStart,
187	/ 83 - S / CharacterIdentifierStart,
188	/ 84 - T / CharacterIdentifierStart,
189	/ 85 - U / CharacterIdentifierStart,
190	/ 86 - V / CharacterIdentifierStart,
191	/ 87 - W / CharacterIdentifierStart,
192	/ 88 - X / CharacterIdentifierStart,
193	/ 89 - Y / CharacterIdentifierStart,
194	/ 90 - Z / CharacterIdentifierStart,
195	/ 91 - [ / CharacterOpenBracket,
196	/ 92 - \ / CharacterBackSlash,
197	/ 93 - ] / CharacterCloseBracket,
198	/ 94 - ^ / CharacterXor,
199	/ 95 - _ / CharacterIdentifierStart,
200	/ 96 - ` / CharacterBackQuote,
201	/ 97 - a / CharacterIdentifierStart,
202	/ 98 - b / CharacterIdentifierStart,
203	/ 99 - c / CharacterIdentifierStart,
204	/ 100 - d / CharacterIdentifierStart,
205	/ 101 - e / CharacterIdentifierStart,
206	/ 102 - f / CharacterIdentifierStart,
207	/ 103 - g / CharacterIdentifierStart,
208	/ 104 - h / CharacterIdentifierStart,
209	/ 105 - i / CharacterIdentifierStart,
210	/ 106 - j / CharacterIdentifierStart,
211	/ 107 - k / CharacterIdentifierStart,
212	/ 108 - l / CharacterIdentifierStart,
213	/ 109 - m / CharacterIdentifierStart,
214	/ 110 - n / CharacterIdentifierStart,
215	/ 111 - o / CharacterIdentifierStart,
216	/ 112 - p / CharacterIdentifierStart,
217	/ 113 - q / CharacterIdentifierStart,
218	/ 114 - r / CharacterIdentifierStart,
219	/ 115 - s / CharacterIdentifierStart,
220	/ 116 - t / CharacterIdentifierStart,
221	/ 117 - u / CharacterIdentifierStart,
222	/ 118 - v / CharacterIdentifierStart,
223	/ 119 - w / CharacterIdentifierStart,
224	/ 120 - x / CharacterIdentifierStart,
225	/ 121 - y / CharacterIdentifierStart,
226	/ 122 - z / CharacterIdentifierStart,
227	/ 123 - { / CharacterOpenBrace,
228	/ 124 - \| / CharacterOr,
229	/ 125 - } / CharacterCloseBrace,
230	/ 126 - ~ / CharacterTilde,
231	/ 127 - Delete / CharacterInvalid,
232	/ 128 - Cc category / CharacterInvalid,
233	/ 129 - Cc category / CharacterInvalid,
234	/ 130 - Cc category / CharacterInvalid,
235	/ 131 - Cc category / CharacterInvalid,
236	/ 132 - Cc category / CharacterInvalid,
237	/ 133 - Cc category / CharacterInvalid,
238	/ 134 - Cc category / CharacterInvalid,
239	/ 135 - Cc category / CharacterInvalid,
240	/ 136 - Cc category / CharacterInvalid,
241	/ 137 - Cc category / CharacterInvalid,
242	/ 138 - Cc category / CharacterInvalid,
243	/ 139 - Cc category / CharacterInvalid,
244	/ 140 - Cc category / CharacterInvalid,
245	/ 141 - Cc category / CharacterInvalid,
246	/ 142 - Cc category / CharacterInvalid,
247	/ 143 - Cc category / CharacterInvalid,
248	/ 144 - Cc category / CharacterInvalid,
249	/ 145 - Cc category / CharacterInvalid,
250	/ 146 - Cc category / CharacterInvalid,
251	/ 147 - Cc category / CharacterInvalid,
252	/ 148 - Cc category / CharacterInvalid,
253	/ 149 - Cc category / CharacterInvalid,
254	/ 150 - Cc category / CharacterInvalid,
255	/ 151 - Cc category / CharacterInvalid,
256	/ 152 - Cc category / CharacterInvalid,
257	/ 153 - Cc category / CharacterInvalid,
258	/ 154 - Cc category / CharacterInvalid,
259	/ 155 - Cc category / CharacterInvalid,
260	/ 156 - Cc category / CharacterInvalid,
261	/ 157 - Cc category / CharacterInvalid,
262	/ 158 - Cc category / CharacterInvalid,
263	/ 159 - Cc category / CharacterInvalid,
264	/ 160 - Zs category (nbsp) / CharacterWhiteSpace,
265	/ 161 - Po category / CharacterInvalid,
266	/ 162 - Sc category / CharacterInvalid,
267	/ 163 - Sc category / CharacterInvalid,
268	/ 164 - Sc category / CharacterInvalid,
269	/ 165 - Sc category / CharacterInvalid,
270	/ 166 - So category / CharacterInvalid,
271	/ 167 - So category / CharacterInvalid,
272	/ 168 - Sk category / CharacterInvalid,
273	/ 169 - So category / CharacterInvalid,
274	/ 170 - Ll category / CharacterIdentifierStart,
275	/ 171 - Pi category / CharacterInvalid,
276	/ 172 - Sm category / CharacterInvalid,
277	/ 173 - Cf category / CharacterInvalid,
278	/ 174 - So category / CharacterInvalid,
279	/ 175 - Sk category / CharacterInvalid,
280	/ 176 - So category / CharacterInvalid,
281	/ 177 - Sm category / CharacterInvalid,
282	/ 178 - No category / CharacterInvalid,
283	/ 179 - No category / CharacterInvalid,
284	/ 180 - Sk category / CharacterInvalid,
285	/ 181 - Ll category / CharacterIdentifierStart,
286	/ 182 - So category / CharacterInvalid,
287	/ 183 - Po category / CharacterOtherIdentifierPart,
288	/ 184 - Sk category / CharacterInvalid,
289	/ 185 - No category / CharacterInvalid,
290	/ 186 - Ll category / CharacterIdentifierStart,
291	/ 187 - Pf category / CharacterInvalid,
292	/ 188 - No category / CharacterInvalid,
293	/ 189 - No category / CharacterInvalid,
294	/ 190 - No category / CharacterInvalid,
295	/ 191 - Po category / CharacterInvalid,
296	/ 192 - Lu category / CharacterIdentifierStart,
297	/ 193 - Lu category / CharacterIdentifierStart,
298	/ 194 - Lu category / CharacterIdentifierStart,
299	/ 195 - Lu category / CharacterIdentifierStart,
300	/ 196 - Lu category / CharacterIdentifierStart,
301	/ 197 - Lu category / CharacterIdentifierStart,
302	/ 198 - Lu category / CharacterIdentifierStart,
303	/ 199 - Lu category / CharacterIdentifierStart,
304	/ 200 - Lu category / CharacterIdentifierStart,
305	/ 201 - Lu category / CharacterIdentifierStart,
306	/ 202 - Lu category / CharacterIdentifierStart,
307	/ 203 - Lu category / CharacterIdentifierStart,
308	/ 204 - Lu category / CharacterIdentifierStart,
309	/ 205 - Lu category / CharacterIdentifierStart,
310	/ 206 - Lu category / CharacterIdentifierStart,
311	/ 207 - Lu category / CharacterIdentifierStart,
312	/ 208 - Lu category / CharacterIdentifierStart,
313	/ 209 - Lu category / CharacterIdentifierStart,
314	/ 210 - Lu category / CharacterIdentifierStart,
315	/ 211 - Lu category / CharacterIdentifierStart,
316	/ 212 - Lu category / CharacterIdentifierStart,
317	/ 213 - Lu category / CharacterIdentifierStart,
318	/ 214 - Lu category / CharacterIdentifierStart,
319	/ 215 - Sm category / CharacterInvalid,
320	/ 216 - Lu category / CharacterIdentifierStart,
321	/ 217 - Lu category / CharacterIdentifierStart,
322	/ 218 - Lu category / CharacterIdentifierStart,
323	/ 219 - Lu category / CharacterIdentifierStart,
324	/ 220 - Lu category / CharacterIdentifierStart,
325	/ 221 - Lu category / CharacterIdentifierStart,
326	/ 222 - Lu category / CharacterIdentifierStart,
327	/ 223 - Ll category / CharacterIdentifierStart,
328	/ 224 - Ll category / CharacterIdentifierStart,
329	/ 225 - Ll category / CharacterIdentifierStart,
330	/ 226 - Ll category / CharacterIdentifierStart,
331	/ 227 - Ll category / CharacterIdentifierStart,
332	/ 228 - Ll category / CharacterIdentifierStart,
333	/ 229 - Ll category / CharacterIdentifierStart,
334	/ 230 - Ll category / CharacterIdentifierStart,
335	/ 231 - Ll category / CharacterIdentifierStart,
336	/ 232 - Ll category / CharacterIdentifierStart,
337	/ 233 - Ll category / CharacterIdentifierStart,
338	/ 234 - Ll category / CharacterIdentifierStart,
339	/ 235 - Ll category / CharacterIdentifierStart,
340	/ 236 - Ll category / CharacterIdentifierStart,
341	/ 237 - Ll category / CharacterIdentifierStart,
342	/ 238 - Ll category / CharacterIdentifierStart,
343	/ 239 - Ll category / CharacterIdentifierStart,
344	/ 240 - Ll category / CharacterIdentifierStart,
345	/ 241 - Ll category / CharacterIdentifierStart,
346	/ 242 - Ll category / CharacterIdentifierStart,
347	/ 243 - Ll category / CharacterIdentifierStart,
348	/ 244 - Ll category / CharacterIdentifierStart,
349	/ 245 - Ll category / CharacterIdentifierStart,
350	/ 246 - Ll category / CharacterIdentifierStart,
351	/ 247 - Sm category / CharacterInvalid,
352	/ 248 - Ll category / CharacterIdentifierStart,
353	/ 249 - Ll category / CharacterIdentifierStart,
354	/ 250 - Ll category / CharacterIdentifierStart,
355	/ 251 - Ll category / CharacterIdentifierStart,
356	/ 252 - Ll category / CharacterIdentifierStart,
357	/ 253 - Ll category / CharacterIdentifierStart,
358	/ 254 - Ll category / CharacterIdentifierStart,
359	/ 255 - Ll category / CharacterIdentifierStart
360	};
361
362	// This table provides the character that results from \X where X is the index in the table beginning
363	// with SPACE. A table value of 0 means that more processing needs to be done.
364	static constexpr const LChar singleCharacterEscapeValuesForASCII[`128`] = {
365	/ 0 - Null / `0`,
366	/ 1 - Start of Heading / `0`,
367	/ 2 - Start of Text / `0`,
368	/ 3 - End of Text / `0`,
369	/ 4 - End of Transm. / `0`,
370	/ 5 - Enquiry / `0`,
371	/ 6 - Acknowledgment / `0`,
372	/ 7 - Bell / `0`,
373	/ 8 - Back Space / `0`,
374	/ 9 - Horizontal Tab / `0`,
375	/ 10 - Line Feed / `0`,
376	/ 11 - Vertical Tab / `0`,
377	/ 12 - Form Feed / `0`,
378	/ 13 - Carriage Return / `0`,
379	/ 14 - Shift Out / `0`,
380	/ 15 - Shift In / `0`,
381	/ 16 - Data Line Escape / `0`,
382	/ 17 - Device Control 1 / `0`,
383	/ 18 - Device Control 2 / `0`,
384	/ 19 - Device Control 3 / `0`,
385	/ 20 - Device Control 4 / `0`,
386	/ 21 - Negative Ack. / `0`,
387	/ 22 - Synchronous Idle / `0`,
388	/ 23 - End of Transmit / `0`,
389	/ 24 - Cancel / `0`,
390	/ 25 - End of Medium / `0`,
391	/ 26 - Substitute / `0`,
392	/ 27 - Escape / `0`,
393	/ 28 - File Separator / `0`,
394	/ 29 - Group Separator / `0`,
395	/ 30 - Record Separator / `0`,
396	/ 31 - Unit Separator / `0`,
397	/ 32 - Space / `' '`,
398	/ 33 - ! / `'!'`,
399	/ 34 - " / `'"'`,
400	/ 35 - # / `'#'`,
401	/ 36 - $ / `'$'`,
402	/ 37 - % / `'%'`,
403	/ 38 - & / `'&'`,
404	/ 39 - ' / `'\''`,
405	/ 40 - ( / `'('`,
406	/ 41 - ) / `')'`,
407	/ 42 - * / `'*'`,
408	/ 43 - + / `'+'`,
409	/ 44 - , / `','`,
410	/ 45 - - / `'-'`,
411	/ 46 - . / `'.'`,
412	/ 47 - / / `'/'`,
413	/ 48 - 0 / `0`,
414	/ 49 - 1 / `0`,
415	/ 50 - 2 / `0`,
416	/ 51 - 3 / `0`,
417	/ 52 - 4 / `0`,
418	/ 53 - 5 / `0`,
419	/ 54 - 6 / `0`,
420	/ 55 - 7 / `0`,
421	/ 56 - 8 / `0`,
422	/ 57 - 9 / `0`,
423	/ 58 - : / `':'`,
424	/ 59 - ; / `';'`,
425	/ 60 - < / `'<'`,
426	/ 61 - = / `'='`,
427	/ 62 - > / `'>'`,
428	/ 63 - ? / `'?'`,
429	/ 64 - @ / `'@'`,
430	/ 65 - A / `'A'`,
431	/ 66 - B / `'B'`,
432	/ 67 - C / `'C'`,
433	/ 68 - D / `'D'`,
434	/ 69 - E / `'E'`,
435	/ 70 - F / `'F'`,
436	/ 71 - G / `'G'`,
437	/ 72 - H / `'H'`,
438	/ 73 - I / `'I'`,
439	/ 74 - J / `'J'`,
440	/ 75 - K / `'K'`,
441	/ 76 - L / `'L'`,
442	/ 77 - M / `'M'`,
443	/ 78 - N / `'N'`,
444	/ 79 - O / `'O'`,
445	/ 80 - P / `'P'`,
446	/ 81 - Q / `'Q'`,
447	/ 82 - R / `'R'`,
448	/ 83 - S / `'S'`,
449	/ 84 - T / `'T'`,
450	/ 85 - U / `'U'`,
451	/ 86 - V / `'V'`,
452	/ 87 - W / `'W'`,
453	/ 88 - X / `'X'`,
454	/ 89 - Y / `'Y'`,
455	/ 90 - Z / `'Z'`,
456	/ 91 - [ / `'['`,
457	/ 92 - \ / `'\\'`,
458	/ 93 - ] / `']'`,
459	/ 94 - ^ / `'^'`,
460	/ 95 - _ / `'_'`,
461	/ 96 - ` / '`',
462	/ 97 - a / `'a'`,
463	/ 98 - b / `0x08`,
464	/ 99 - c / `'c'`,
465	/ 100 - d / `'d'`,
466	/ 101 - e / `'e'`,
467	/ 102 - f / `0x0C`,
468	/ 103 - g / `'g'`,
469	/ 104 - h / `'h'`,
470	/ 105 - i / `'i'`,
471	/ 106 - j / `'j'`,
472	/ 107 - k / `'k'`,
473	/ 108 - l / `'l'`,
474	/ 109 - m / `'m'`,
475	/ 110 - n / `0x0A`,
476	/ 111 - o / `'o'`,
477	/ 112 - p / `'p'`,
478	/ 113 - q / `'q'`,
479	/ 114 - r / `0x0D`,
480	/ 115 - s / `'s'`,
481	/ 116 - t / `0x09`,
482	/ 117 - u / `0`,
483	/ 118 - v / `0x0B`,
484	/ 119 - w / `'w'`,
485	/ 120 - x / `0`,
486	/ 121 - y / `'y'`,
487	/ 122 - z / `'z'`,
488	/ 123 - { / `'{'`,
489	/ 124 - \| / `'\|'`,
490	/ 125 - } / `'}'`,
491	/ 126 - ~ / `'~'`,
492	/ 127 - Delete / `0`
493	};
494
495	template <typename T>
496	Lexer<T>::Lexer(VM& vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
497	: m_isReparsingFunction(false)
498	, m_vm(vm)
499	, m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
500	, m_scriptMode(scriptMode)
501	{
502	}
503
504	static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
505	{
506	if ((doubleValue \|\| !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
507	return INTEGER;
508	return DOUBLE;
509	}
510
511	template <typename T>
512	Lexer<T>::~Lexer()
513	{
514	}
515
516	template <typename T>
517	String Lexer<T>::invalidCharacterMessage() const
518	{
519	switch (m_current) {
520	case `0`:
521	return "Invalid character: '\\0'"_s;
522	case `10`:
523	return "Invalid character: '\\n'"_s;
524	case `11`:
525	return "Invalid character: '\\v'"_s;
526	case `13`:
527	return "Invalid character: '\\r'"_s;
528	case `35`:
529	return "Invalid character: '#'"_s;
530	case `64`:
531	return "Invalid character: '@'"_s;
532	case `96`:
533	return "Invalid character: '`'"_s;
534	default:
535	return makeString("Invalid character '\\u", hex(m_current, `4`, Lowercase), `'\''`);
536	}
537	}
538
539	template <typename T>
540	ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
541	{
542	ASSERT(m_code <= m_codeEnd);
543	return m_code;
544	}
545
546	template <typename T>
547	void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
548	{
549	m_arena = &arena->identifierArena();
550
551	m_lineNumber = source.firstLine().oneBasedInt();
552	m_lastToken = -`1`;
553
554	StringView sourceString = source.provider()->source();
555
556	if (!sourceString.isNull())
557	setCodeStart(sourceString);
558	else
559	m_codeStart = `0`;
560
561	m_source = &source;
562	m_sourceOffset = source.startOffset();
563	m_codeStartPlusOffset = m_codeStart + source.startOffset();
564	m_code = m_codeStartPlusOffset;
565	m_codeEnd = m_codeStart + source.endOffset();
566	m_error = false;
567	m_atLineStart = true;
568	m_lineStart = m_code;
569	m_lexErrorMessage = String ();
570	m_sourceURLDirective = String ();
571	m_sourceMappingURLDirective = String ();
572
573	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
574	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
575	m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
576
577	if (LIKELY(m_code < m_codeEnd))
578	m_current = *m_code;
579	else
580	m_current = `0`;
581	ASSERT(currentOffset() == source.startOffset());
582	}
583
584	template <typename T>
585	template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
586	{
587	m_code += shiftAmount;
588	ASSERT(currentOffset() >= currentLineStartOffset());
589	m_current = *m_code;
590	}
591
592	template <typename T>
593	ALWAYS_INLINE void Lexer<T>::shift()
594	{
595	// At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
596	m_current = `0`;
597	++m_code;
598	if (LIKELY(m_code < m_codeEnd))
599	m_current = *m_code;
600	}
601
602	template <typename T>
603	ALWAYS_INLINE bool Lexer<T>::atEnd() const
604	{
605	ASSERT(!m_current \|\| m_code < m_codeEnd);
606	return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
607	}
608
609	template <typename T>
610	ALWAYS_INLINE T Lexer<T>::peek(int offset) const
611	{
612	ASSERT(offset > `0` && offset < `5`);
613	const T* code = m_code + offset;
614	return (code < m_codeEnd) ? *code : `0`;
615	}
616
617	struct ParsedUnicodeEscapeValue {
618	ParsedUnicodeEscapeValue(UChar32 value)
619	: m_value(value)
620	{
621	ASSERT(isValid());
622	}
623
624	enum SpecialValueType { Incomplete = -`2`, Invalid = -`1` };
625	ParsedUnicodeEscapeValue(SpecialValueType type)
626	: m_value(type)
627	{
628	}
629
630	bool isValid() const { return m_value >= `0`; }
631	bool isIncomplete() const { return m_value == Incomplete; }
632
633	UChar32 value() const
634	{
635	ASSERT(isValid());
636	return m_value;
637	}
638
639	private:
640	UChar32 m_value;
641	};
642
643	template<typename CharacterType>
644	ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
645	{
646	if (m_current == `'{'`) {
647	shift();
648	UChar32 codePoint = `0`;
649	do {
650	if (!isASCIIHexDigit(m_current))
651	return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
652	codePoint = (codePoint << `4`) \| toASCIIHexValue(m_current);
653	if (codePoint > UCHAR_MAX_VALUE) {
654	// For raw template literal syntax, we consume `NotEscapeSequence`.
655	// Here, we consume NotCodePoint's HexDigits.
656	//
657	// NotEscapeSequence ::
658	// u { [lookahread not one of HexDigit]
659	// u { NotCodePoint
660	// u { CodePoint [lookahead != }]
661	//
662	// NotCodePoint ::
663	// HexDigits but not if MV of HexDigits <= 0x10FFFF
664	//
665	// CodePoint ::
666	// HexDigits but not if MV of HexDigits > 0x10FFFF
667	shift();
668	while (isASCIIHexDigit(m_current))
669	shift();
670
671	return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
672	}
673	shift();
674	} while (m_current != `'}'`);
675	shift();
676	return codePoint;
677	}
678
679	auto character2 = peek(`1`);
680	auto character3 = peek(`2`);
681	auto character4 = peek(`3`);
682	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(character2) \|\| !isASCIIHexDigit(character3) \|\| !isASCIIHexDigit(character4))) {
683	auto result = (m_code + `4`) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
684
685	// For raw template literal syntax, we consume `NotEscapeSequence`.
686	//
687	// NotEscapeSequence ::
688	// u [lookahead not one of HexDigit][lookahead != {]
689	// u HexDigit [lookahead not one of HexDigit]
690	// u HexDigit HexDigit [lookahead not one of HexDigit]
691	// u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
692	while (isASCIIHexDigit(m_current))
693	shift();
694
695	return result;
696	}
697
698	auto result = convertUnicode(m_current, character2, character3, character4);
699	shift();
700	shift();
701	shift();
702	shift();
703	return result;
704	}
705
706	template <typename T>
707	void Lexer<T>::shiftLineTerminator()
708	{
709	ASSERT(isLineTerminator(m_current));
710
711	m_positionBeforeLastNewline = currentPosition();
712	T prev = m_current;
713	shift();
714
715	if (prev == `'\r'` && m_current == `'\n'`)
716	shift();
717
718	++m_lineNumber;
719	}
720
721	template <typename T>
722	ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
723	{
724	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
725	}
726
727	template <typename T>
728	ALWAYS_INLINE void Lexer<T>::skipWhitespace()
729	{
730	while (isWhiteSpace(m_current))
731	shift();
732	}
733
734	static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
735	{
736	return u_hasBinaryProperty(c, UCHAR_ID_START);
737	}
738
739	static inline bool isIdentStart(LChar c)
740	{
741	return typesOfLatin1Characters[c] == CharacterIdentifierStart;
742	}
743
744	static inline bool isIdentStart(UChar32 c)
745	{
746	return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
747	}
748
749	static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
750	{
751	return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) \|\| c == `0x200C` \|\| c == `0x200D`;
752	}
753
754	static ALWAYS_INLINE bool isIdentPart(LChar c)
755	{
756	// Character types are divided into two groups depending on whether they can be part of an
757	// identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
758	// part of an identifier. (See the CharacterType definition for more details.)
759	return typesOfLatin1Characters[c] <= CharacterOtherIdentifierPart;
760	}
761
762	static ALWAYS_INLINE bool isIdentPart(UChar32 c)
763	{
764	return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
765	}
766
767	static ALWAYS_INLINE bool isIdentPart(UChar c)
768	{
769	return isIdentPart(static_cast<UChar32>(c));
770	}
771
772	template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
773	{
774	if (isIdentPart(code[`0`]))
775	return true;
776
777	// Shortest sequence handled below is \u{0}, which is 5 characters.
778	if (!(code[`0`] == `'\\'` && codeEnd - code >= `5` && code[`1`] == `'u'`))
779	return false;
780
781	if (code[`2`] == `'{'`) {
782	UChar32 codePoint = `0`;
783	const CharacterType* pointer;
784	for (pointer = &code[`3`]; pointer < codeEnd; ++pointer) {
785	auto digit = *pointer;
786	if (!isASCIIHexDigit(digit))
787	break;
788	codePoint = (codePoint << `4`) \| toASCIIHexValue(digit);
789	if (codePoint > UCHAR_MAX_VALUE)
790	return false;
791	}
792	return isIdentPart(codePoint) && pointer < codeEnd && *pointer == `'}'`;
793	}
794
795	// Shortest sequence handled below is \uXXXX, which is 6 characters.
796	if (codeEnd - code < `6`)
797	return false;
798
799	auto character1 = code[`2`];
800	auto character2 = code[`3`];
801	auto character3 = code[`4`];
802	auto character4 = code[`5`];
803	return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
804	&& isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
805	}
806
807	static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
808	{
809	return isIdentPartIncludingEscapeTemplate(code, codeEnd);
810	}
811
812	static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
813	{
814	return isIdentPartIncludingEscapeTemplate(code, codeEnd);
815	}
816
817	template<typename CharacterType>
818	static inline bool isASCIIDigitOrSeparator(CharacterType character)
819	{
820	return isASCIIDigit(character) \|\| character == `'_'`;
821	}
822
823	template<typename CharacterType>
824	static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
825	{
826	return isASCIIHexDigit(character) \|\| character == `'_'`;
827	}
828
829	template<typename CharacterType>
830	static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
831	{
832	return isASCIIBinaryDigit(character) \|\| character == `'_'`;
833	}
834
835	template<typename CharacterType>
836	static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
837	{
838	return isASCIIOctalDigit(character) \|\| character == `'_'`;
839	}
840
841	static inline LChar singleEscape(int c)
842	{
843	if (c < `128`) {
844	ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
845	return singleCharacterEscapeValuesForASCII[c];
846	}
847	return `0`;
848	}
849
850	template <typename T>
851	inline void Lexer<T>::record8(int c)
852	{
853	ASSERT(isLatin1(c));
854	m_buffer8.append(static_cast<LChar>(c));
855	}
856
857	template <typename T>
858	inline void Lexer<T>::append8(const T* p, size_t length)
859	{
860	size_t currentSize = m_buffer8.size();
861	m_buffer8.grow(currentSize + length);
862	LChar* rawBuffer = m_buffer8.data() + currentSize;
863
864	for (size_t i = `0`; i < length; i++) {
865	T c = p[i];
866	ASSERT(isLatin1(c));
867	rawBuffer[i] = c;
868	}
869	}
870
871	template <typename T>
872	inline void Lexer<T>::append16(const LChar* p, size_t length)
873	{
874	size_t currentSize = m_buffer16.size();
875	m_buffer16.grow(currentSize + length);
876	UChar* rawBuffer = m_buffer16.data() + currentSize;
877
878	for (size_t i = `0`; i < length; i++)
879	rawBuffer[i] = p[i];
880	}
881
882	template <typename T>
883	inline void Lexer<T>::record16(T c)
884	{
885	m_buffer16.append(c);
886	}
887
888	template <typename T>
889	inline void Lexer<T>::record16(int c)
890	{
891	ASSERT(c >= `0`);
892	ASSERT(c <= static_cast<int>(USHRT_MAX));
893	m_buffer16.append(static_cast<UChar>(c));
894	}
895
896	template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
897	{
898	ASSERT(codePoint >= `0`);
899	ASSERT(codePoint <= UCHAR_MAX_VALUE);
900	if (U_IS_BMP(codePoint))
901	record16(codePoint);
902	else {
903	UChar codeUnits[`2`] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
904	append16(codeUnits, `2`);
905	}
906	}
907
908	#if !ASSERT_DISABLED
909	bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
910	{
911	if (!ident)
912	return true;
913	/ Just block any use of suspicious identifiers. This is intended to*
914	* be used as a safety net while implementing builtins.
915	*/
916	// FIXME: How can a debug-only assertion be a safety net?
917	if (*ident == vm.propertyNames->builtinNames().callPublicName())
918	return false;
919	if (*ident == vm.propertyNames->builtinNames().applyPublicName())
920	return false;
921	if (*ident == vm.propertyNames->eval)
922	return false;
923	if (*ident == vm.propertyNames->Function)
924	return false;
925	return true;
926	}
927	#endif
928
929	template <>
930	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
931	{
932	tokenData->escaped = false;
933	const ptrdiff_t remaining = m_codeEnd - m_code;
934	if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
935	JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
936	if (keyword != IDENT) {
937	ASSERT((!shouldCreateIdentifier) \|\| tokenData->ident);
938	return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
939	}
940	}
941
942	bool isPrivateName = m_current == `'@'` && m_parsingBuiltinFunction;
943	if (isPrivateName)
944	shift();
945
946	const LChar* identifierStart = currentSourcePtr();
947	unsigned identifierLineStart = currentLineStartOffset();
948
949	while (isIdentPart(m_current))
950	shift();
951
952	if (UNLIKELY(m_current == `'\\'`)) {
953	setOffsetFromSourcePtr(identifierStart, identifierLineStart);
954	return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
955	}
956
957	const Identifier* ident = nullptr;
958
959	if (shouldCreateIdentifier \|\| m_parsingBuiltinFunction) {
960	int identifierLength = currentSourcePtr() - identifierStart;
961	ident = makeIdentifier(identifierStart, identifierLength);
962	if (m_parsingBuiltinFunction) {
963	if (!isSafeBuiltinIdentifier(m_vm, ident) && !isPrivateName) {
964	m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
965	return ERRORTOK;
966	}
967	if (isPrivateName)
968	ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->lookUpPrivateName(*ident));
969	else if (*ident == m_vm.propertyNames->undefinedKeyword)
970	tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
971	if (!ident)
972	return INVALID_PRIVATE_NAME_ERRORTOK;
973	}
974	tokenData->ident = ident;
975	} else
976	tokenData->ident = nullptr;
977
978	if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isPrivateName) {
979	ASSERT(shouldCreateIdentifier);
980	if (remaining < maxTokenLength) {
981	const HashTableValue* entry = JSC::mainTable.entry(*ident);
982	ASSERT((remaining < maxTokenLength) \|\| !entry);
983	if (!entry)
984	return IDENT;
985	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
986	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
987	}
988	return IDENT;
989	}
990
991	return IDENT;
992	}
993
994	template <>
995	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
996	{
997	tokenData->escaped = false;
998	const ptrdiff_t remaining = m_codeEnd - m_code;
999	if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
1000	JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
1001	if (keyword != IDENT) {
1002	ASSERT((!shouldCreateIdentifier) \|\| tokenData->ident);
1003	return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
1004	}
1005	}
1006
1007	bool isPrivateName = m_current == `'@'` && m_parsingBuiltinFunction;
1008	if (isPrivateName)
1009	shift();
1010
1011	const UChar* identifierStart = currentSourcePtr();
1012	int identifierLineStart = currentLineStartOffset();
1013
1014	UChar orAllChars = `0`;
1015
1016	while (isIdentPart(m_current)) {
1017	orAllChars \|= m_current;
1018	shift();
1019	}
1020
1021	if (UNLIKELY(m_current == `'\\'`)) {
1022	ASSERT(!isPrivateName);
1023	setOffsetFromSourcePtr(identifierStart, identifierLineStart);
1024	return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
1025	}
1026
1027	bool isAll8Bit = false;
1028
1029	if (!(orAllChars & ~`0xff`))
1030	isAll8Bit = true;
1031
1032	const Identifier* ident = nullptr;
1033
1034	if (shouldCreateIdentifier \|\| m_parsingBuiltinFunction) {
1035	int identifierLength = currentSourcePtr() - identifierStart;
1036	if (isAll8Bit)
1037	ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1038	else
1039	ident = makeIdentifier(identifierStart, identifierLength);
1040	if (m_parsingBuiltinFunction) {
1041	if (!isSafeBuiltinIdentifier(m_vm, ident) && !isPrivateName) {
1042	m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1043	return ERRORTOK;
1044	}
1045	if (isPrivateName)
1046	ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->lookUpPrivateName(*ident));
1047	else if (*ident == m_vm.propertyNames->undefinedKeyword)
1048	tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
1049	if (!ident)
1050	return INVALID_PRIVATE_NAME_ERRORTOK;
1051	}
1052	tokenData->ident = ident;
1053	} else
1054	tokenData->ident = nullptr;
1055
1056	if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isPrivateName) {
1057	ASSERT(shouldCreateIdentifier);
1058	if (remaining < maxTokenLength) {
1059	const HashTableValue* entry = JSC::mainTable.entry(*ident);
1060	ASSERT((remaining < maxTokenLength) \|\| !entry);
1061	if (!entry)
1062	return IDENT;
1063	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1064	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
1065	}
1066	return IDENT;
1067	}
1068
1069	return IDENT;
1070	}
1071
1072	template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
1073	{
1074	tokenData->escaped = true;
1075	auto identifierStart = currentSourcePtr();
1076	bool bufferRequired = false;
1077
1078	while (true) {
1079	if (LIKELY(isIdentPart(m_current))) {
1080	shift();
1081	continue;
1082	}
1083	if (LIKELY(m_current != `'\\'`))
1084	break;
1085
1086	// \uXXXX unicode characters.
1087	bufferRequired = true;
1088	if (identifierStart != currentSourcePtr())
1089	m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1090	shift();
1091	if (UNLIKELY(m_current != `'u'`))
1092	return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1093	shift();
1094	auto character = parseUnicodeEscape();
1095	if (UNLIKELY(!character.isValid()))
1096	return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1097	if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1098	return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1099	if (shouldCreateIdentifier)
1100	recordUnicodeCodePoint(character.value());
1101	identifierStart = currentSourcePtr();
1102	}
1103
1104	int identifierLength;
1105	const Identifier* ident = nullptr;
1106	if (shouldCreateIdentifier) {
1107	if (!bufferRequired) {
1108	identifierLength = currentSourcePtr() - identifierStart;
1109	ident = makeIdentifier(identifierStart, identifierLength);
1110	} else {
1111	if (identifierStart != currentSourcePtr())
1112	m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1113	ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1114	}
1115
1116	tokenData->ident = ident;
1117	} else
1118	tokenData->ident = nullptr;
1119
1120	m_buffer16.shrink(`0`);
1121
1122	if (LIKELY(!lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
1123	ASSERT(shouldCreateIdentifier);
1124	const HashTableValue* entry = JSC::mainTable.entry(*ident);
1125	if (!entry)
1126	return IDENT;
1127	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1128	if ((token != RESERVED_IF_STRICT) \|\| strictMode)
1129	return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token;
1130	}
1131
1132	return IDENT;
1133	}
1134
1135	static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1136	{
1137	return character < `0xE`;
1138	}
1139
1140	static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1141	{
1142	return character < `0xE` \|\| !isLatin1(character);
1143	}
1144
1145	template <typename T>
1146	template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1147	{
1148	int startingOffset = currentOffset();
1149	int startingLineStartOffset = currentLineStartOffset();
1150	int startingLineNumber = lineNumber();
1151	T stringQuoteCharacter = m_current;
1152	shift();
1153
1154	const T* stringStart = currentSourcePtr();
1155
1156	while (m_current != stringQuoteCharacter) {
1157	if (UNLIKELY(m_current == `'\\'`)) {
1158	if (stringStart != currentSourcePtr() && shouldBuildStrings)
1159	append8(stringStart, currentSourcePtr() - stringStart);
1160	shift();
1161
1162	LChar escape = singleEscape(m_current);
1163
1164	// Most common escape sequences first.
1165	if (escape) {
1166	if (shouldBuildStrings)
1167	record8(escape);
1168	shift();
1169	} else if (UNLIKELY(isLineTerminator(m_current)))
1170	shiftLineTerminator();
1171	else if (m_current == `'x'`) {
1172	shift();
1173	if (!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(peek(`1`))) {
1174	m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1175	return (atEnd() \|\| (isASCIIHexDigit(m_current) && (m_code + `1` == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1176	}
1177	T prev = m_current;
1178	shift();
1179	if (shouldBuildStrings)
1180	record8(convertHex(prev, m_current));
1181	shift();
1182	} else {
1183	setOffset(startingOffset, startingLineStartOffset);
1184	setLineNumber(startingLineNumber);
1185	m_buffer8.shrink(`0`);
1186	return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1187	}
1188	stringStart = currentSourcePtr();
1189	continue;
1190	}
1191
1192	if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1193	setOffset(startingOffset, startingLineStartOffset);
1194	setLineNumber(startingLineNumber);
1195	m_buffer8.shrink(`0`);
1196	return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1197	}
1198
1199	shift();
1200	}
1201
1202	if (currentSourcePtr() != stringStart && shouldBuildStrings)
1203	append8(stringStart, currentSourcePtr() - stringStart);
1204	if (shouldBuildStrings) {
1205	tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1206	m_buffer8.shrink(`0`);
1207	} else
1208	tokenData->ident = `0`;
1209
1210	return StringParsedSuccessfully;
1211	}
1212
1213	template <typename T>
1214	template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
1215	{
1216	if (m_current == `'x'`) {
1217	shift();
1218	if (!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(peek(`1`))) {
1219	// For raw template literal syntax, we consume `NotEscapeSequence`.
1220	//
1221	// NotEscapeSequence ::
1222	// x [lookahread not one of HexDigit]
1223	// x HexDigit [lookahread not one of HexDigit]
1224	if (isASCIIHexDigit(m_current))
1225	shift();
1226	ASSERT(!isASCIIHexDigit(m_current));
1227
1228	m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1229	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1230	}
1231
1232	T prev = m_current;
1233	shift();
1234	if (shouldBuildStrings)
1235	record16(convertHex(prev, m_current));
1236	shift();
1237
1238	return StringParsedSuccessfully;
1239	}
1240
1241	if (m_current == `'u'`) {
1242	shift();
1243
1244	if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
1245	if (shouldBuildStrings)
1246	record16(`'u'`);
1247	return StringParsedSuccessfully;
1248	}
1249
1250	auto character = parseUnicodeEscape();
1251	if (character.isValid()) {
1252	if (shouldBuildStrings)
1253	recordUnicodeCodePoint(character.value());
1254	return StringParsedSuccessfully;
1255	}
1256
1257	m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
1258	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1259	}
1260
1261	if (strictMode) {
1262	if (isASCIIDigit(m_current)) {
1263	// The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1264	int character1 = m_current;
1265	shift();
1266	if (character1 != `'0'` \|\| isASCIIDigit(m_current)) {
1267	// For raw template literal syntax, we consume `NotEscapeSequence`.
1268	//
1269	// NotEscapeSequence ::
1270	// 0 DecimalDigit
1271	// DecimalDigit but not 0
1272	if (character1 == `'0'`)
1273	shift();
1274
1275	m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
1276	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1277	}
1278	if (shouldBuildStrings)
1279	record16(`0`);
1280	return StringParsedSuccessfully;
1281	}
1282	} else {
1283	if (isASCIIOctalDigit(m_current)) {
1284	// Octal character sequences
1285	T character1 = m_current;
1286	shift();
1287	if (isASCIIOctalDigit(m_current)) {
1288	// Two octal characters
1289	T character2 = m_current;
1290	shift();
1291	if (character1 >= `'0'` && character1 <= `'3'` && isASCIIOctalDigit(m_current)) {
1292	if (shouldBuildStrings)
1293	record16((character1 - `'0'`) * `64` + (character2 - `'0'`) * `8` + m_current - `'0'`);
1294	shift();
1295	} else {
1296	if (shouldBuildStrings)
1297	record16((character1 - `'0'`) * `8` + character2 - `'0'`);
1298	}
1299	} else {
1300	if (shouldBuildStrings)
1301	record16(character1 - `'0'`);
1302	}
1303	return StringParsedSuccessfully;
1304	}
1305	}
1306
1307	if (!atEnd()) {
1308	if (shouldBuildStrings)
1309	record16(m_current);
1310	shift();
1311	return StringParsedSuccessfully;
1312	}
1313
1314	m_lexErrorMessage = "Unterminated string constant"_s;
1315	return StringUnterminated;
1316	}
1317
1318	template <typename T>
1319	template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1320	{
1321	T stringQuoteCharacter = m_current;
1322	shift();
1323
1324	const T* stringStart = currentSourcePtr();
1325
1326	while (m_current != stringQuoteCharacter) {
1327	if (UNLIKELY(m_current == `'\\'`)) {
1328	if (stringStart != currentSourcePtr() && shouldBuildStrings)
1329	append16(stringStart, currentSourcePtr() - stringStart);
1330	shift();
1331
1332	LChar escape = singleEscape(m_current);
1333
1334	// Most common escape sequences first
1335	if (escape) {
1336	if (shouldBuildStrings)
1337	record16(escape);
1338	shift();
1339	} else if (UNLIKELY(isLineTerminator(m_current)))
1340	shiftLineTerminator();
1341	else {
1342	StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
1343	if (result != StringParsedSuccessfully)
1344	return result;
1345	}
1346
1347	stringStart = currentSourcePtr();
1348	continue;
1349	}
1350	// Fast check for characters that require special handling.
1351	// Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
1352	static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
1353	if (UNLIKELY(m_current < `0xE`)) {
1354	// New-line or end of input is not allowed
1355	if (atEnd() \|\| m_current == `'\r'` \|\| m_current == `'\n'`) {
1356	m_lexErrorMessage = "Unexpected EOF"_s;
1357	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1358	}
1359	// Anything else is just a normal character
1360	}
1361	shift();
1362	}
1363
1364	if (currentSourcePtr() != stringStart && shouldBuildStrings)
1365	append16(stringStart, currentSourcePtr() - stringStart);
1366	if (shouldBuildStrings)
1367	tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1368	else
1369	tokenData->ident = `0`;
1370
1371	m_buffer16.shrink(`0`);
1372	return StringParsedSuccessfully;
1373	}
1374
1375	template <typename T>
1376	typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1377	{
1378	bool parseCookedFailed = false;
1379	const T* stringStart = currentSourcePtr();
1380	const T* rawStringStart = currentSourcePtr();
1381
1382	while (m_current != '`') {
1383	if (UNLIKELY(m_current == `'\\'`)) {
1384	if (stringStart != currentSourcePtr())
1385	append16(stringStart, currentSourcePtr() - stringStart);
1386	shift();
1387
1388	LChar escape = singleEscape(m_current);
1389
1390	// Most common escape sequences first.
1391	if (escape) {
1392	record16(escape);
1393	shift();
1394	} else if (UNLIKELY(isLineTerminator(m_current))) {
1395	// Normalize <CR>, <CR><LF> to <LF>.
1396	if (m_current == `'\r'`) {
1397	ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
1398
1399	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
1400	m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1401	m_bufferForRawTemplateString16.append(`'\n'`);
1402	}
1403
1404	shiftLineTerminator();
1405	rawStringStart = currentSourcePtr();
1406	} else
1407	shiftLineTerminator();
1408	} else {
1409	bool strictMode = true;
1410	StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
1411	if (result != StringParsedSuccessfully) {
1412	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
1413	parseCookedFailed = true;
1414	else
1415	return result;
1416	}
1417	}
1418
1419	stringStart = currentSourcePtr();
1420	continue;
1421	}
1422
1423	if (m_current == `'$'` && peek(`1`) == `'{'`)
1424	break;
1425
1426	// Fast check for characters that require special handling.
1427	// Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1428	// as possible, and lets through all common ASCII characters.
1429	if (UNLIKELY(((static_cast<unsigned>(m_current) - `0xE`) & `0x2000`))) {
1430	// End of input is not allowed.
1431	// Unlike String, line terminator is allowed.
1432	if (atEnd()) {
1433	m_lexErrorMessage = "Unexpected EOF"_s;
1434	return StringUnterminated;
1435	}
1436
1437	if (isLineTerminator(m_current)) {
1438	if (m_current == `'\r'`) {
1439	// Normalize <CR>, <CR><LF> to <LF>.
1440	if (stringStart != currentSourcePtr())
1441	append16(stringStart, currentSourcePtr() - stringStart);
1442	if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1443	m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1444
1445	record16(`'\n'`);
1446	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1447	m_bufferForRawTemplateString16.append(`'\n'`);
1448	shiftLineTerminator();
1449	stringStart = currentSourcePtr();
1450	rawStringStart = currentSourcePtr();
1451	} else
1452	shiftLineTerminator();
1453	continue;
1454	}
1455	// Anything else is just a normal character
1456	}
1457
1458	shift();
1459	}
1460
1461	bool isTail = m_current == '`';
1462
1463	if (currentSourcePtr() != stringStart)
1464	append16(stringStart, currentSourcePtr() - stringStart);
1465	if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1466	m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1467
1468	if (!parseCookedFailed)
1469	tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1470	else
1471	tokenData->cooked = nullptr;
1472
1473	// Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1474	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1475	tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1476	else
1477	tokenData->raw = nullptr;
1478
1479	tokenData->isTail = isTail;
1480
1481	m_buffer16.shrink(`0`);
1482	m_bufferForRawTemplateString16.shrink(`0`);
1483
1484	if (isTail) {
1485	// Skip `
1486	shift();
1487	} else {
1488	// Skip $ and {
1489	shift();
1490	shift();
1491	}
1492
1493	return StringParsedSuccessfully;
1494	}
1495
1496	template <typename T>
1497	ALWAYS_INLINE auto Lexer<T>::parseHex() -> Optional<NumberParseResult>
1498	{
1499	ASSERT(isASCIIHexDigit(m_current));
1500
1501	// Optimization: most hexadecimal values fit into 4 bytes.
1502	uint32_t hexValue = `0`;
1503	int maximumDigits = `7`;
1504
1505	do {
1506	if (m_current == `'_'`) {
1507	if (UNLIKELY(!isASCIIHexDigit(peek(`1`))))
1508	return WTF::nullopt;
1509
1510	shift();
1511	}
1512
1513	hexValue = (hexValue << `4`) + toASCIIHexValue(m_current);
1514	shift();
1515	--maximumDigits;
1516	} while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= `0`);
1517
1518	if (LIKELY(maximumDigits >= `0` && m_current != `'n'`))
1519	return NumberParseResult { hexValue };
1520
1521	// No more place in the hexValue buffer.
1522	// The values are shifted out and placed into the m_buffer8 vector.
1523	for (int i = `0`; i < `8`; ++i) {
1524	int digit = hexValue >> `28`;
1525	if (digit < `10`)
1526	record8(digit + `'0'`);
1527	else
1528	record8(digit - `10` + `'a'`);
1529	hexValue <<= `4`;
1530	}
1531
1532	while (isASCIIHexDigitOrSeparator(m_current)) {
1533	if (m_current == `'_'`) {
1534	if (UNLIKELY(!isASCIIHexDigit(peek(`1`))))
1535	return WTF::nullopt;
1536
1537	shift();
1538	}
1539
1540	record8(m_current);
1541	shift();
1542	}
1543
1544	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`))
1545	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1546
1547	return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), `16`) };
1548	}
1549
1550	template <typename T>
1551	ALWAYS_INLINE auto Lexer<T>::parseBinary() -> Optional<NumberParseResult>
1552	{
1553	ASSERT(isASCIIBinaryDigit(m_current));
1554
1555	// Optimization: most binary values fit into 4 bytes.
1556	uint32_t binaryValue = `0`;
1557	const unsigned maximumDigits = `32`;
1558	int digit = maximumDigits - `1`;
1559	// Temporary buffer for the digits. Makes easier
1560	// to reconstruct the input characters when needed.
1561	LChar digits[maximumDigits];
1562
1563	do {
1564	if (m_current == `'_'`) {
1565	if (UNLIKELY(!isASCIIBinaryDigit(peek(`1`))))
1566	return WTF::nullopt;
1567
1568	shift();
1569	}
1570
1571	binaryValue = (binaryValue << `1`) + (m_current - `'0'`);
1572	digits[digit] = m_current;
1573	shift();
1574	--digit;
1575	} while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= `0`);
1576
1577	if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= `0` && m_current != `'n'`))
1578	return NumberParseResult { binaryValue };
1579
1580	for (int i = maximumDigits - `1`; i > digit; --i)
1581	record8(digits[i]);
1582
1583	while (isASCIIBinaryDigitOrSeparator(m_current)) {
1584	if (m_current == `'_'`) {
1585	if (UNLIKELY(!isASCIIBinaryDigit(peek(`1`))))
1586	return WTF::nullopt;
1587
1588	shift();
1589	}
1590
1591	record8(m_current);
1592	shift();
1593	}
1594
1595	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`))
1596	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1597
1598	if (isASCIIDigit(m_current))
1599	return WTF::nullopt;
1600
1601	return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), `2`) };
1602	}
1603
1604	template <typename T>
1605	ALWAYS_INLINE auto Lexer<T>::parseOctal() -> Optional<NumberParseResult>
1606	{
1607	ASSERT(isASCIIOctalDigit(m_current));
1608	ASSERT(!m_buffer8.size() \|\| (m_buffer8.size() == `1` && m_buffer8[`0`] == `'0'`));
1609	bool isLegacyLiteral = m_buffer8.size();
1610
1611	// Optimization: most octal values fit into 4 bytes.
1612	uint32_t octalValue = `0`;
1613	const unsigned maximumDigits = `10`;
1614	int digit = maximumDigits - `1`;
1615	// Temporary buffer for the digits. Makes easier
1616	// to reconstruct the input characters when needed.
1617	LChar digits[maximumDigits];
1618
1619	do {
1620	if (m_current == `'_'`) {
1621	if (UNLIKELY(!isASCIIOctalDigit(peek(`1`)) \|\| isLegacyLiteral))
1622	return WTF::nullopt;
1623
1624	shift();
1625	}
1626
1627	octalValue = octalValue * `8` + (m_current - `'0'`);
1628	digits[digit] = m_current;
1629	shift();
1630	--digit;
1631	} while (isASCIIOctalDigitOrSeparator(m_current) && digit >= `0`);
1632
1633	if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= `0` && m_current != `'n'`))
1634	return NumberParseResult { octalValue };
1635
1636	for (int i = maximumDigits - `1`; i > digit; --i)
1637	record8(digits[i]);
1638
1639	while (isASCIIOctalDigitOrSeparator(m_current)) {
1640	if (m_current == `'_'`) {
1641	if (UNLIKELY(!isASCIIOctalDigit(peek(`1`)) \|\| isLegacyLiteral))
1642	return WTF::nullopt;
1643
1644	shift();
1645	}
1646
1647	record8(m_current);
1648	shift();
1649	}
1650
1651	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`) && !isLegacyLiteral)
1652	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1653
1654	if (isASCIIDigit(m_current))
1655	return WTF::nullopt;
1656
1657	return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), `8`) };
1658	}
1659
1660	template <typename T>
1661	ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> Optional<NumberParseResult>
1662	{
1663	ASSERT(isASCIIDigit(m_current) \|\| m_buffer8.size());
1664	bool isLegacyLiteral = m_buffer8.size() && isASCIIDigitOrSeparator(m_current);
1665
1666	// Optimization: most decimal values fit into 4 bytes.
1667	uint32_t decimalValue = `0`;
1668
1669	// Since parseOctal may be executed before parseDecimal,
1670	// the m_buffer8 may hold ascii digits.
1671	if (!m_buffer8.size()) {
1672	const unsigned maximumDigits = `10`;
1673	int digit = maximumDigits - `1`;
1674	// Temporary buffer for the digits. Makes easier
1675	// to reconstruct the input characters when needed.
1676	LChar digits[maximumDigits];
1677
1678	do {
1679	if (m_current == `'_'`) {
1680	if (UNLIKELY(!isASCIIDigit(peek(`1`)) \|\| isLegacyLiteral))
1681	return WTF::nullopt;
1682
1683	shift();
1684	}
1685
1686	decimalValue = decimalValue * `10` + (m_current - `'0'`);
1687	digits[digit] = m_current;
1688	shift();
1689	--digit;
1690	} while (isASCIIDigitOrSeparator(m_current) && digit >= `0`);
1691
1692	if (digit >= `0` && m_current != `'.'` && !isASCIIAlphaCaselessEqual(m_current, `'e'`) && m_current != `'n'`)
1693	return NumberParseResult { decimalValue };
1694
1695	for (int i = maximumDigits - `1`; i > digit; --i)
1696	record8(digits[i]);
1697	}
1698
1699	while (isASCIIDigitOrSeparator(m_current)) {
1700	if (m_current == `'_'`) {
1701	if (UNLIKELY(!isASCIIDigit(peek(`1`)) \|\| isLegacyLiteral))
1702	return WTF::nullopt;
1703
1704	shift();
1705	}
1706
1707	record8(m_current);
1708	shift();
1709	}
1710
1711	if (UNLIKELY(Options::useBigInt() && m_current == `'n'` && !isLegacyLiteral))
1712	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1713
1714	return WTF::nullopt;
1715	}
1716
1717	template <typename T>
1718	ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
1719	{
1720	ASSERT(isASCIIDigit(m_current));
1721	record8(`'.'`);
1722
1723	do {
1724	if (m_current == `'_'`) {
1725	if (UNLIKELY(!isASCIIDigit(peek(`1`))))
1726	return false;
1727
1728	shift();
1729	}
1730
1731	record8(m_current);
1732	shift();
1733	} while (isASCIIDigitOrSeparator(m_current));
1734
1735	return true;
1736	}
1737
1738	template <typename T>
1739	ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1740	{
1741	record8(`'e'`);
1742	shift();
1743	if (m_current == `'+'` \|\| m_current == `'-'`) {
1744	record8(m_current);
1745	shift();
1746	}
1747
1748	if (!isASCIIDigit(m_current))
1749	return false;
1750
1751	do {
1752	if (m_current == `'_'`) {
1753	if (UNLIKELY(!isASCIIDigit(peek(`1`))))
1754	return false;
1755
1756	shift();
1757	}
1758
1759	record8(m_current);
1760	shift();
1761	} while (isASCIIDigitOrSeparator(m_current));
1762
1763	return true;
1764	}
1765
1766	template <typename T>
1767	ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1768	{
1769	while (true) {
1770	while (UNLIKELY(m_current == `'*'`)) {
1771	shift();
1772	if (m_current == `'/'`) {
1773	shift();
1774	return true;
1775	}
1776	}
1777
1778	if (atEnd())
1779	return false;
1780
1781	if (isLineTerminator(m_current)) {
1782	shiftLineTerminator();
1783	m_hasLineTerminatorBeforeToken = true;
1784	} else
1785	shift();
1786	}
1787	}
1788
1789	template <typename T>
1790	ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
1791	{
1792	// sourceURL and sourceMappingURL directives.
1793	if (!consume("source"))
1794	return;
1795
1796	if (consume("URL=")) {
1797	m_sourceURLDirective = parseCommentDirectiveValue();
1798	return;
1799	}
1800
1801	if (consume("MappingURL=")) {
1802	m_sourceMappingURLDirective = parseCommentDirectiveValue();
1803	return;
1804	}
1805	}
1806
1807	template <typename T>
1808	ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
1809	{
1810	skipWhitespace();
1811	const T* stringStart = currentSourcePtr();
1812	while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != `'"'` && m_current != `'\''` && !atEnd())
1813	shift();
1814	const T* stringEnd = currentSourcePtr();
1815	skipWhitespace();
1816
1817	if (!isLineTerminator(m_current) && !atEnd())
1818	return String ();
1819
1820	append8(stringStart, stringEnd - stringStart);
1821	String result = String(m_buffer8.data(), m_buffer8.size());
1822	m_buffer8.shrink(`0`);
1823	return result;
1824	}
1825
1826	template <typename T>
1827	template <unsigned length>
1828	ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
1829	{
1830	unsigned lengthToCheck = length - `1`; // Ignore the ending NULL byte in the string literal.
1831
1832	unsigned i = `0`;
1833	for (; i < lengthToCheck && m_current == input[i]; i++)
1834	shift();
1835
1836	return i == lengthToCheck;
1837	}
1838
1839	template <typename T>
1840	bool Lexer<T>::nextTokenIsColon()
1841	{
1842	const T* code = m_code;
1843	while (code < m_codeEnd && (isWhiteSpace(code) \|\| isLineTerminator(code)))
1844	code++;
1845
1846	return code < m_codeEnd && *code == `':'`;
1847	}
1848
1849	template <typename T>
1850	void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
1851	{
1852	JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1853	tokenLocation->line = lineNumber;
1854	tokenLocation->endOffset = endOffset;
1855	tokenLocation->lineStartOffset = lineStartOffset;
1856	ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1857	tokenRecord->m_endPosition = endPosition;
1858	m_lastToken = token;
1859	}
1860
1861	template <typename T>
1862	JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, OptionSet<LexerFlags> lexerFlags, bool strictMode)
1863	{
1864	JSTokenData* tokenData = &tokenRecord->m_data;
1865	JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1866	m_lastTokenLocation = JSTokenLocation (tokenRecord->m_location);
1867
1868	ASSERT(!m_error);
1869	ASSERT(m_buffer8.isEmpty());
1870	ASSERT(m_buffer16.isEmpty());
1871
1872	JSTokenType token = ERRORTOK;
1873
1874	start:
1875	skipWhitespace();
1876
1877	tokenLocation->startOffset = currentOffset();
1878	ASSERT(currentOffset() >= currentLineStartOffset());
1879	tokenRecord->m_startPosition = currentPosition();
1880
1881	if (atEnd()) {
1882	token = EOFTOK;
1883	goto returnToken;
1884	}
1885
1886	CharacterType type;
1887	if (LIKELY(isLatin1(m_current)))
1888	type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1889	else if (isNonLatin1IdentStart(m_current))
1890	type = CharacterIdentifierStart;
1891	else if (isLineTerminator(m_current))
1892	type = CharacterLineTerminator;
1893	else
1894	type = CharacterInvalid;
1895
1896	switch (type) {
1897	case CharacterGreater:
1898	shift();
1899	if (m_current == `'>'`) {
1900	shift();
1901	if (m_current == `'>'`) {
1902	shift();
1903	if (m_current == `'='`) {
1904	shift();
1905	token = URSHIFTEQUAL;
1906	break;
1907	}
1908	token = URSHIFT;
1909	break;
1910	}
1911	if (m_current == `'='`) {
1912	shift();
1913	token = RSHIFTEQUAL;
1914	break;
1915	}
1916	token = RSHIFT;
1917	break;
1918	}
1919	if (m_current == `'='`) {
1920	shift();
1921	token = GE;
1922	break;
1923	}
1924	token = GT;
1925	break;
1926	case CharacterEqual: {
1927	if (peek(`1`) == `'>'`) {
1928	token = ARROWFUNCTION;
1929	tokenData->line = lineNumber();
1930	tokenData->offset = currentOffset();
1931	tokenData->lineStartOffset = currentLineStartOffset();
1932	ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1933	shift();
1934	shift();
1935	break;
1936	}
1937
1938	shift();
1939	if (m_current == `'='`) {
1940	shift();
1941	if (m_current == `'='`) {
1942	shift();
1943	token = STREQ;
1944	break;
1945	}
1946	token = EQEQ;
1947	break;
1948	}
1949	token = EQUAL;
1950	break;
1951	}
1952	case CharacterLess:
1953	shift();
1954	if (m_current == `'!'` && peek(`1`) == `'-'` && peek(`2`) == `'-'`) {
1955	if (m_scriptMode == JSParserScriptMode::Classic) {
1956	// <!-- marks the beginning of a line comment (for www usage)
1957	goto inSingleLineComment;
1958	}
1959	}
1960	if (m_current == `'<'`) {
1961	shift();
1962	if (m_current == `'='`) {
1963	shift();
1964	token = LSHIFTEQUAL;
1965	break;
1966	}
1967	token = LSHIFT;
1968	break;
1969	}
1970	if (m_current == `'='`) {
1971	shift();
1972	token = LE;
1973	break;
1974	}
1975	token = LT;
1976	break;
1977	case CharacterExclamationMark:
1978	shift();
1979	if (m_current == `'='`) {
1980	shift();
1981	if (m_current == `'='`) {
1982	shift();
1983	token = STRNEQ;
1984	break;
1985	}
1986	token = NE;
1987	break;
1988	}
1989	token = EXCLAMATION;
1990	break;
1991	case CharacterAdd:
1992	shift();
1993	if (m_current == `'+'`) {
1994	shift();
1995	token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
1996	break;
1997	}
1998	if (m_current == `'='`) {
1999	shift();
2000	token = PLUSEQUAL;
2001	break;
2002	}
2003	token = PLUS;
2004	break;
2005	case CharacterSub:
2006	shift();
2007	if (m_current == `'-'`) {
2008	shift();
2009	if ((m_atLineStart \|\| m_hasLineTerminatorBeforeToken) && m_current == `'>'`) {
2010	if (m_scriptMode == JSParserScriptMode::Classic) {
2011	shift();
2012	goto inSingleLineComment;
2013	}
2014	}
2015	token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
2016	break;
2017	}
2018	if (m_current == `'='`) {
2019	shift();
2020	token = MINUSEQUAL;
2021	break;
2022	}
2023	token = MINUS;
2024	break;
2025	case CharacterMultiply:
2026	shift();
2027	if (m_current == `'='`) {
2028	shift();
2029	token = MULTEQUAL;
2030	break;
2031	}
2032	if (m_current == `'*'`) {
2033	shift();
2034	if (m_current == `'='`) {
2035	shift();
2036	token = POWEQUAL;
2037	break;
2038	}
2039	token = POW;
2040	break;
2041	}
2042	token = TIMES;
2043	break;
2044	case CharacterSlash:
2045	shift();
2046	if (m_current == `'/'`) {
2047	shift();
2048	goto inSingleLineCommentCheckForDirectives;
2049	}
2050	if (m_current == `'*'`) {
2051	shift();
2052	if (parseMultilineComment())
2053	goto start;
2054	m_lexErrorMessage = "Multiline comment was not closed properly"_s;
2055	token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
2056	goto returnError;
2057	}
2058	if (m_current == `'='`) {
2059	shift();
2060	token = DIVEQUAL;
2061	break;
2062	}
2063	token = DIVIDE;
2064	break;
2065	case CharacterAnd:
2066	shift();
2067	if (m_current == `'&'`) {
2068	shift();
2069	token = AND;
2070	break;
2071	}
2072	if (m_current == `'='`) {
2073	shift();
2074	token = ANDEQUAL;
2075	break;
2076	}
2077	token = BITAND;
2078	break;
2079	case CharacterXor:
2080	shift();
2081	if (m_current == `'='`) {
2082	shift();
2083	token = XOREQUAL;
2084	break;
2085	}
2086	token = BITXOR;
2087	break;
2088	case CharacterModulo:
2089	shift();
2090	if (m_current == `'='`) {
2091	shift();
2092	token = MODEQUAL;
2093	break;
2094	}
2095	token = MOD;
2096	break;
2097	case CharacterOr:
2098	shift();
2099	if (m_current == `'='`) {
2100	shift();
2101	token = OREQUAL;
2102	break;
2103	}
2104	if (m_current == `'\|'`) {
2105	shift();
2106	token = OR;
2107	break;
2108	}
2109	token = BITOR;
2110	break;
2111	case CharacterOpenParen:
2112	token = OPENPAREN;
2113	tokenData->line = lineNumber();
2114	tokenData->offset = currentOffset();
2115	tokenData->lineStartOffset = currentLineStartOffset();
2116	shift();
2117	break;
2118	case CharacterCloseParen:
2119	token = CLOSEPAREN;
2120	shift();
2121	break;
2122	case CharacterOpenBracket:
2123	token = OPENBRACKET;
2124	shift();
2125	break;
2126	case CharacterCloseBracket:
2127	token = CLOSEBRACKET;
2128	shift();
2129	break;
2130	case CharacterComma:
2131	token = COMMA;
2132	shift();
2133	break;
2134	case CharacterColon:
2135	token = COLON;
2136	shift();
2137	break;
2138	case CharacterQuestion:
2139	shift();
2140	if (m_current == `'?'`) {
2141	shift();
2142	token = COALESCE;
2143	break;
2144	}
2145	if (m_current == `'.'` && !isASCIIDigit(peek(`1`))) {
2146	shift();
2147	token = QUESTIONDOT;
2148	break;
2149	}
2150	token = QUESTION;
2151	break;
2152	case CharacterTilde:
2153	token = TILDE;
2154	shift();
2155	break;
2156	case CharacterSemicolon:
2157	shift();
2158	token = SEMICOLON;
2159	break;
2160	case CharacterBackQuote:
2161	shift();
2162	token = BACKQUOTE;
2163	break;
2164	case CharacterOpenBrace:
2165	tokenData->line = lineNumber();
2166	tokenData->offset = currentOffset();
2167	tokenData->lineStartOffset = currentLineStartOffset();
2168	ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2169	shift();
2170	token = OPENBRACE;
2171	break;
2172	case CharacterCloseBrace:
2173	tokenData->line = lineNumber();
2174	tokenData->offset = currentOffset();
2175	tokenData->lineStartOffset = currentLineStartOffset();
2176	ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2177	shift();
2178	token = CLOSEBRACE;
2179	break;
2180	case CharacterDot:
2181	shift();
2182	if (!isASCIIDigit(m_current)) {
2183	if (UNLIKELY((m_current == `'.'`) && (peek(`1`) == `'.'`))) {
2184	shift();
2185	shift();
2186	token = DOTDOTDOT;
2187	break;
2188	}
2189	token = DOT;
2190	break;
2191	}
2192	if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
2193	m_lexErrorMessage = "Non-number found after decimal point"_s;
2194	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2195	goto returnError;
2196	}
2197	token = DOUBLE;
2198	if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, `'e'`) && !parseNumberAfterExponentIndicator())) {
2199	m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2200	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2201	goto returnError;
2202	}
2203	size_t parsedLength;
2204	tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2205	if (token == INTEGER)
2206	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2207
2208	if (UNLIKELY(isIdentStart(m_current))) {
2209	m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2210	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2211	goto returnError;
2212	}
2213	m_buffer8.shrink(`0`);
2214	break;
2215	case CharacterZero:
2216	shift();
2217	if (isASCIIAlphaCaselessEqual(m_current, `'x'`)) {
2218	if (UNLIKELY(!isASCIIHexDigit(peek(`1`)))) {
2219	m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
2220	token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2221	goto returnError;
2222	}
2223
2224	// Shift out the 'x' prefix.
2225	shift();
2226
2227	auto parseNumberResult = parseHex();
2228	if (!parseNumberResult)
2229	tokenData->doubleValue = `0`;
2230	else if (WTF::holds_alternative<double>(*parseNumberResult))
2231	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2232	else {
2233	token = BIGINT;
2234	shift();
2235	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2236	tokenData->radix = `16`;
2237	}
2238
2239	if (UNLIKELY(isIdentStart(m_current))) {
2240	m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
2241	token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2242	goto returnError;
2243	}
2244	if (LIKELY(token != BIGINT))
2245	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2246	m_buffer8.shrink(`0`);
2247	break;
2248	}
2249	if (isASCIIAlphaCaselessEqual(m_current, `'b'`)) {
2250	if (UNLIKELY(!isASCIIBinaryDigit(peek(`1`)))) {
2251	m_lexErrorMessage = "No binary digits after '0b'"_s;
2252	token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2253	goto returnError;
2254	}
2255
2256	// Shift out the 'b' prefix.
2257	shift();
2258
2259	auto parseNumberResult = parseBinary();
2260	if (!parseNumberResult)
2261	tokenData->doubleValue = `0`;
2262	else if (WTF::holds_alternative<double>(*parseNumberResult))
2263	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2264	else {
2265	token = BIGINT;
2266	shift();
2267	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2268	tokenData->radix = `2`;
2269	}
2270
2271	if (UNLIKELY(isIdentStart(m_current))) {
2272	m_lexErrorMessage = "No space between binary literal and identifier"_s;
2273	token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2274	goto returnError;
2275	}
2276	if (LIKELY(token != BIGINT))
2277	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2278	m_buffer8.shrink(`0`);
2279	break;
2280	}
2281
2282	if (isASCIIAlphaCaselessEqual(m_current, `'o'`)) {
2283	if (UNLIKELY(!isASCIIOctalDigit(peek(`1`)))) {
2284	m_lexErrorMessage = "No octal digits after '0o'"_s;
2285	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2286	goto returnError;
2287	}
2288
2289	// Shift out the 'o' prefix.
2290	shift();
2291
2292	auto parseNumberResult = parseOctal();
2293	if (!parseNumberResult)
2294	tokenData->doubleValue = `0`;
2295	else if (WTF::holds_alternative<double>(*parseNumberResult))
2296	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2297	else {
2298	token = BIGINT;
2299	shift();
2300	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2301	tokenData->radix = `8`;
2302	}
2303
2304	if (UNLIKELY(isIdentStart(m_current))) {
2305	m_lexErrorMessage = "No space between octal literal and identifier"_s;
2306	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2307	goto returnError;
2308	}
2309	if (LIKELY(token != BIGINT))
2310	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2311	m_buffer8.shrink(`0`);
2312	break;
2313	}
2314
2315	if (UNLIKELY(m_current == `'_'`)) {
2316	m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
2317	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2318	goto returnError;
2319	}
2320
2321	record8(`'0'`);
2322	if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
2323	m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
2324	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2325	goto returnError;
2326	}
2327	if (isASCIIOctalDigit(m_current)) {
2328	auto parseNumberResult = parseOctal();
2329	if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
2330	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2331	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2332	}
2333	}
2334	FALLTHROUGH;
2335	case CharacterNumber:
2336	if (LIKELY(token != INTEGER && token != DOUBLE)) {
2337	auto parseNumberResult = parseDecimal();
2338	if (parseNumberResult) {
2339	if (WTF::holds_alternative<double>(*parseNumberResult)) {
2340	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2341	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2342	} else {
2343	token = BIGINT;
2344	shift();
2345	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2346	tokenData->radix = `10`;
2347	}
2348	} else {
2349	token = INTEGER;
2350	if (m_current == `'.'`) {
2351	shift();
2352	if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
2353	m_lexErrorMessage = "Non-number found after decimal point"_s;
2354	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2355	goto returnError;
2356	}
2357	token = DOUBLE;
2358	}
2359	if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, `'e'`) && !parseNumberAfterExponentIndicator())) {
2360	m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2361	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2362	goto returnError;
2363	}
2364	size_t parsedLength;
2365	tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2366	if (token == INTEGER)
2367	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2368	}
2369	}
2370
2371	if (UNLIKELY(isIdentStart(m_current))) {
2372	m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2373	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2374	goto returnError;
2375	}
2376	m_buffer8.shrink(`0`);
2377	break;
2378	case CharacterQuote: {
2379	StringParseResult result = StringCannotBeParsed;
2380	if (lexerFlags.contains(LexerFlags::DontBuildStrings))
2381	result = parseString<false>(tokenData, strictMode);
2382	else
2383	result = parseString<true>(tokenData, strictMode);
2384
2385	if (UNLIKELY(result != StringParsedSuccessfully)) {
2386	token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2387	goto returnError;
2388	}
2389	shift();
2390	token = STRING;
2391	break;
2392	}
2393	case CharacterIdentifierStart:
2394	ASSERT(isIdentStart(m_current));
2395	FALLTHROUGH;
2396	case CharacterBackSlash:
2397	parseIdent:
2398	if (lexerFlags.contains(LexerFlags::DontBuildKeywords))
2399	token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2400	else
2401	token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2402	break;
2403	case CharacterLineTerminator:
2404	ASSERT(isLineTerminator(m_current));
2405	shiftLineTerminator();
2406	m_atLineStart = true;
2407	m_hasLineTerminatorBeforeToken = true;
2408	m_lineStart = m_code;
2409	goto start;
2410	case CharacterHash:
2411	// Hashbang is only permitted at the start of the source text.
2412	if (peek(`1`) == `'!'` && !currentOffset()) {
2413	shift();
2414	shift();
2415	goto inSingleLineComment;
2416	}
2417	goto invalidCharacter;
2418	case CharacterPrivateIdentifierStart:
2419	if (m_parsingBuiltinFunction)
2420	goto parseIdent;
2421	goto invalidCharacter;
2422	case CharacterOtherIdentifierPart:
2423	case CharacterInvalid:
2424	goto invalidCharacter;
2425	default:
2426	RELEASE_ASSERT_NOT_REACHED();
2427	m_lexErrorMessage = "Internal Error"_s;
2428	token = ERRORTOK;
2429	goto returnError;
2430	}
2431
2432	m_atLineStart = false;
2433	goto returnToken;
2434
2435	inSingleLineCommentCheckForDirectives:
2436	// Script comment directives like "//# sourceURL=test.js".
2437	if (UNLIKELY((m_current == `'#'` \|\| m_current == `'@'`) && isWhiteSpace(peek(`1`)))) {
2438	shift();
2439	shift();
2440	parseCommentDirective();
2441	}
2442	// Fall through to complete single line comment parsing.
2443
2444	inSingleLineComment:
2445	{
2446	auto lineNumber = m_lineNumber;
2447	auto endOffset = currentOffset();
2448	auto lineStartOffset = currentLineStartOffset();
2449	auto endPosition = currentPosition();
2450
2451	while (!isLineTerminator(m_current)) {
2452	if (atEnd()) {
2453	token = EOFTOK;
2454	fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2455	return token;
2456	}
2457	shift();
2458	}
2459	shiftLineTerminator();
2460	m_atLineStart = true;
2461	m_hasLineTerminatorBeforeToken = true;
2462	m_lineStart = m_code;
2463	if (!lastTokenWasRestrKeyword())
2464	goto start;
2465
2466	token = SEMICOLON;
2467	fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2468	return token;
2469	}
2470
2471	returnToken:
2472	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2473	return token;
2474
2475	invalidCharacter:
2476	m_lexErrorMessage = invalidCharacterMessage();
2477	token = ERRORTOK;
2478	// Falls through to return error.
2479
2480	returnError:
2481	m_error = true;
2482	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2483	RELEASE_ASSERT(token & ErrorTokenFlag);
2484	return token;
2485	}
2486
2487	template <typename T>
2488	static inline void orCharacter(UChar&, UChar);
2489
2490	template <>
2491	inline void orCharacter<LChar>(UChar&, UChar) { }
2492
2493	template <>
2494	inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2495	{
2496	orAccumulator \|= character;
2497	}
2498
2499	template <typename T>
2500	JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
2501	{
2502	JSTokenData* tokenData = &tokenRecord->m_data;
2503	ASSERT(m_buffer16.isEmpty());
2504
2505	bool lastWasEscape = false;
2506	bool inBrackets = false;
2507	UChar charactersOredTogether = `0`;
2508
2509	if (patternPrefix) {
2510	ASSERT(!isLineTerminator(patternPrefix));
2511	ASSERT(patternPrefix != `'/'`);
2512	ASSERT(patternPrefix != `'['`);
2513	record16(patternPrefix);
2514	}
2515
2516	while (true) {
2517	if (isLineTerminator(m_current) \|\| atEnd()) {
2518	m_buffer16.shrink(`0`);
2519	JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
2520	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2521	m_error = true;
2522	m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
2523	return token;
2524	}
2525
2526	T prev = m_current;
2527
2528	shift();
2529
2530	if (prev == `'/'` && !lastWasEscape && !inBrackets)
2531	break;
2532
2533	record16(prev);
2534	orCharacter<T>(charactersOredTogether, prev);
2535
2536	if (lastWasEscape) {
2537	lastWasEscape = false;
2538	continue;
2539	}
2540
2541	switch (prev) {
2542	case `'['`:
2543	inBrackets = true;
2544	break;
2545	case `']'`:
2546	inBrackets = false;
2547	break;
2548	case `'\\'`:
2549	lastWasEscape = true;
2550	break;
2551	}
2552	}
2553
2554	tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2555
2556	m_buffer16.shrink(`0`);
2557	charactersOredTogether = `0`;
2558
2559	while (isIdentPart(m_current)) {
2560	record16(m_current);
2561	orCharacter<T>(charactersOredTogether, m_current);
2562	shift();
2563	}
2564
2565	tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2566	m_buffer16.shrink(`0`);
2567
2568	// Since RegExp always ends with /, m_atLineStart always becomes false.
2569	m_atLineStart = false;
2570
2571	JSTokenType token = REGEXP;
2572	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2573	return token;
2574	}
2575
2576	template <typename T>
2577	JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2578	{
2579	JSTokenData* tokenData = &tokenRecord->m_data;
2580	ASSERT(!m_error);
2581	ASSERT(m_buffer16.isEmpty());
2582
2583	// Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
2584	// So in this re-scan phase, shift() is not needed here.
2585	StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
2586	JSTokenType token = ERRORTOK;
2587	if (UNLIKELY(result != StringParsedSuccessfully)) {
2588	token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2589	m_error = true;
2590	} else
2591	token = TEMPLATE;
2592
2593	// Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2594	m_atLineStart = false;
2595	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2596	return token;
2597	}
2598
2599	template <typename T>
2600	void Lexer<T>::clear()
2601	{
2602	m_arena = `0`;
2603
2604	Vector<LChar> newBuffer8;
2605	m_buffer8.swap(newBuffer8);
2606
2607	Vector<UChar> newBuffer16;
2608	m_buffer16.swap(newBuffer16);
2609
2610	Vector<UChar> newBufferForRawTemplateString16;
2611	m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2612
2613	m_isReparsingFunction = false;
2614	}
2615
2616	// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2617	template class Lexer<LChar>;
2618	template class Lexer<UChar>;
2619
2620	} // namespace JSC
2621

Browse the source code of jsc/Source/JavaScriptCore/parser/Lexer.cpp