LiteralParser.cpp source code [jsc/Source/JavaScriptCore/runtime/LiteralParser.cpp]

1	/*
2	* Copyright (C) 2009-2019 Apple Inc. All rights reserved.
3	* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include "LiteralParser.h"
29
30	#include "ButterflyInlines.h"
31	#include "CodeBlock.h"
32	#include "JSArray.h"
33	#include "JSString.h"
34	#include "Lexer.h"
35	#include "ObjectConstructor.h"
36	#include "JSCInlines.h"
37	#include "StrongInlines.h"
38	#include <wtf/ASCIICType.h>
39	#include <wtf/dtoa.h>
40	#include <wtf/text/StringConcatenate.h>
41
42	namespace JSC {
43
44	template <typename CharType>
45	static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c)
46	{
47	// The JSON RFC 4627 defines a list of allowed characters to be considered
48	// insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
49	return c == `' '` \|\| c == `0x9` \|\| c == `0xA` \|\| c == `0xD`;
50	}
51
52	template <typename CharType>
53	bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
54	{
55	VM& vm = m_globalObject->vm();
56	auto scope = DECLARE_THROW_SCOPE(vm);
57	if (m_lexer.next() != TokIdentifier)
58	return false;
59	do {
60	Vector<JSONPPathEntry> path;
61	// Unguarded next to start off the lexer
62	Identifier name = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
63	JSONPPathEntry entry;
64	if (name == vm.propertyNames->varKeyword) {
65	if (m_lexer.next() != TokIdentifier)
66	return false;
67	entry.m_type = JSONPPathEntryTypeDeclareVar;
68	entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
69	path.append(entry);
70	} else {
71	entry.m_type = JSONPPathEntryTypeDot;
72	entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
73	path.append(entry);
74	}
75	if (isLexerKeyword(entry.m_pathEntryName))
76	return false;
77	TokenType tokenType = m_lexer.next();
78	if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign)
79	return false;
80	while (tokenType != TokAssign) {
81	switch (tokenType) {
82	case TokLBracket: {
83	entry.m_type = JSONPPathEntryTypeLookup;
84	if (m_lexer.next() != TokNumber)
85	return false;
86	double doubleIndex = m_lexer.currentToken()->numberToken;
87	int index = (int)doubleIndex;
88	if (index != doubleIndex \|\| index < `0`)
89	return false;
90	entry.m_pathIndex = index;
91	if (m_lexer.next() != TokRBracket)
92	return false;
93	break;
94	}
95	case TokDot: {
96	entry.m_type = JSONPPathEntryTypeDot;
97	if (m_lexer.next() != TokIdentifier)
98	return false;
99	entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
100	break;
101	}
102	case TokLParen: {
103	if (path.last().m_type != JSONPPathEntryTypeDot \|\| needsFullSourceInfo)
104	return false;
105	path.last().m_type = JSONPPathEntryTypeCall;
106	entry = path.last();
107	goto startJSON;
108	}
109	default:
110	return false;
111	}
112	path.append(entry);
113	tokenType = m_lexer.next();
114	}
115	startJSON:
116	m_lexer.next();
117	results.append(JSONPData ());
118	JSValue startParseExpressionValue = parse(StartParseExpression);
119	RETURN_IF_EXCEPTION(scope, false);
120	results.last().m_value.set(vm, startParseExpressionValue);
121	if (!results.last().m_value)
122	return false;
123	results.last().m_path.swap(path);
124	if (entry.m_type == JSONPPathEntryTypeCall) {
125	if (m_lexer.currentToken()->type != TokRParen)
126	return false;
127	m_lexer.next();
128	}
129	if (m_lexer.currentToken()->type != TokSemi)
130	break;
131	m_lexer.next();
132	} while (m_lexer.currentToken()->type == TokIdentifier);
133	return m_lexer.currentToken()->type == TokEnd;
134	}
135
136	template <typename CharType>
137	ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
138	{
139	VM& vm = m_globalObject->vm();
140	if (!length)
141	return vm.propertyNames->emptyIdentifier;
142	if (characters[`0`] >= MaximumCachableCharacter)
143	return Identifier::fromString(vm, characters, length);
144
145	if (length == `1`) {
146	if (!m_shortIdentifiers[characters[`0`]].isNull())
147	return m_shortIdentifiers[characters[`0`]];
148	m_shortIdentifiers[characters[`0`]] = Identifier::fromString(vm, characters, length);
149	return m_shortIdentifiers[characters[`0`]];
150	}
151	if (!m_recentIdentifiers[characters[`0`]].isNull() && Identifier::equal(m_recentIdentifiers[characters[`0`]].impl(), characters, length))
152	return m_recentIdentifiers[characters[`0`]];
153	m_recentIdentifiers[characters[`0`]] = Identifier::fromString(vm, characters, length);
154	return m_recentIdentifiers[characters[`0`]];
155	}
156
157	template <typename CharType>
158	ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
159	{
160	VM& vm = m_globalObject->vm();
161	if (!length)
162	return vm.propertyNames->emptyIdentifier;
163	if (characters[`0`] >= MaximumCachableCharacter)
164	return Identifier::fromString(vm, characters, length);
165
166	if (length == `1`) {
167	if (!m_shortIdentifiers[characters[`0`]].isNull())
168	return m_shortIdentifiers[characters[`0`]];
169	m_shortIdentifiers[characters[`0`]] = Identifier::fromString(vm, characters, length);
170	return m_shortIdentifiers[characters[`0`]];
171	}
172	if (!m_recentIdentifiers[characters[`0`]].isNull() && Identifier::equal(m_recentIdentifiers[characters[`0`]].impl(), characters, length))
173	return m_recentIdentifiers[characters[`0`]];
174	m_recentIdentifiers[characters[`0`]] = Identifier::fromString(vm, characters, length);
175	return m_recentIdentifiers[characters[`0`]];
176	}
177
178	// 256 Latin-1 codes
179	static constexpr const TokenType TokenTypesOfLatin1Characters[`256`] = {
180	/ 0 - Null / TokError,
181	/ 1 - Start of Heading / TokError,
182	/ 2 - Start of Text / TokError,
183	/ 3 - End of Text / TokError,
184	/ 4 - End of Transm. / TokError,
185	/ 5 - Enquiry / TokError,
186	/ 6 - Acknowledgment / TokError,
187	/ 7 - Bell / TokError,
188	/ 8 - Back Space / TokError,
189	/ 9 - Horizontal Tab / TokError,
190	/ 10 - Line Feed / TokError,
191	/ 11 - Vertical Tab / TokError,
192	/ 12 - Form Feed / TokError,
193	/ 13 - Carriage Return / TokError,
194	/ 14 - Shift Out / TokError,
195	/ 15 - Shift In / TokError,
196	/ 16 - Data Line Escape / TokError,
197	/ 17 - Device Control 1 / TokError,
198	/ 18 - Device Control 2 / TokError,
199	/ 19 - Device Control 3 / TokError,
200	/ 20 - Device Control 4 / TokError,
201	/ 21 - Negative Ack. / TokError,
202	/ 22 - Synchronous Idle / TokError,
203	/ 23 - End of Transmit / TokError,
204	/ 24 - Cancel / TokError,
205	/ 25 - End of Medium / TokError,
206	/ 26 - Substitute / TokError,
207	/ 27 - Escape / TokError,
208	/ 28 - File Separator / TokError,
209	/ 29 - Group Separator / TokError,
210	/ 30 - Record Separator / TokError,
211	/ 31 - Unit Separator / TokError,
212	/ 32 - Space / TokError,
213	/ 33 - ! / TokError,
214	/ 34 - " / TokString,
215	/ 35 - # / TokError,
216	/ 36 - $ / TokIdentifier,
217	/ 37 - % / TokError,
218	/ 38 - & / TokError,
219	/ 39 - ' / TokString,
220	/ 40 - ( / TokLParen,
221	/ 41 - ) / TokRParen,
222	/ 42 - * / TokError,
223	/ 43 - + / TokError,
224	/ 44 - , / TokComma,
225	/ 45 - - / TokNumber,
226	/ 46 - . / TokDot,
227	/ 47 - / / TokError,
228	/ 48 - 0 / TokNumber,
229	/ 49 - 1 / TokNumber,
230	/ 50 - 2 / TokNumber,
231	/ 51 - 3 / TokNumber,
232	/ 52 - 4 / TokNumber,
233	/ 53 - 5 / TokNumber,
234	/ 54 - 6 / TokNumber,
235	/ 55 - 7 / TokNumber,
236	/ 56 - 8 / TokNumber,
237	/ 57 - 9 / TokNumber,
238	/ 58 - : / TokColon,
239	/ 59 - ; / TokSemi,
240	/ 60 - < / TokError,
241	/ 61 - = / TokAssign,
242	/ 62 - > / TokError,
243	/ 63 - ? / TokError,
244	/ 64 - @ / TokError,
245	/ 65 - A / TokIdentifier,
246	/ 66 - B / TokIdentifier,
247	/ 67 - C / TokIdentifier,
248	/ 68 - D / TokIdentifier,
249	/ 69 - E / TokIdentifier,
250	/ 70 - F / TokIdentifier,
251	/ 71 - G / TokIdentifier,
252	/ 72 - H / TokIdentifier,
253	/ 73 - I / TokIdentifier,
254	/ 74 - J / TokIdentifier,
255	/ 75 - K / TokIdentifier,
256	/ 76 - L / TokIdentifier,
257	/ 77 - M / TokIdentifier,
258	/ 78 - N / TokIdentifier,
259	/ 79 - O / TokIdentifier,
260	/ 80 - P / TokIdentifier,
261	/ 81 - Q / TokIdentifier,
262	/ 82 - R / TokIdentifier,
263	/ 83 - S / TokIdentifier,
264	/ 84 - T / TokIdentifier,
265	/ 85 - U / TokIdentifier,
266	/ 86 - V / TokIdentifier,
267	/ 87 - W / TokIdentifier,
268	/ 88 - X / TokIdentifier,
269	/ 89 - Y / TokIdentifier,
270	/ 90 - Z / TokIdentifier,
271	/ 91 - [ / TokLBracket,
272	/ 92 - \ / TokError,
273	/ 93 - ] / TokRBracket,
274	/ 94 - ^ / TokError,
275	/ 95 - _ / TokIdentifier,
276	/ 96 - ` / TokError,
277	/ 97 - a / TokIdentifier,
278	/ 98 - b / TokIdentifier,
279	/ 99 - c / TokIdentifier,
280	/ 100 - d / TokIdentifier,
281	/ 101 - e / TokIdentifier,
282	/ 102 - f / TokIdentifier,
283	/ 103 - g / TokIdentifier,
284	/ 104 - h / TokIdentifier,
285	/ 105 - i / TokIdentifier,
286	/ 106 - j / TokIdentifier,
287	/ 107 - k / TokIdentifier,
288	/ 108 - l / TokIdentifier,
289	/ 109 - m / TokIdentifier,
290	/ 110 - n / TokIdentifier,
291	/ 111 - o / TokIdentifier,
292	/ 112 - p / TokIdentifier,
293	/ 113 - q / TokIdentifier,
294	/ 114 - r / TokIdentifier,
295	/ 115 - s / TokIdentifier,
296	/ 116 - t / TokIdentifier,
297	/ 117 - u / TokIdentifier,
298	/ 118 - v / TokIdentifier,
299	/ 119 - w / TokIdentifier,
300	/ 120 - x / TokIdentifier,
301	/ 121 - y / TokIdentifier,
302	/ 122 - z / TokIdentifier,
303	/ 123 - { / TokLBrace,
304	/ 124 - \| / TokError,
305	/ 125 - } / TokRBrace,
306	/ 126 - ~ / TokError,
307	/ 127 - Delete / TokError,
308	/ 128 - Cc category / TokError,
309	/ 129 - Cc category / TokError,
310	/ 130 - Cc category / TokError,
311	/ 131 - Cc category / TokError,
312	/ 132 - Cc category / TokError,
313	/ 133 - Cc category / TokError,
314	/ 134 - Cc category / TokError,
315	/ 135 - Cc category / TokError,
316	/ 136 - Cc category / TokError,
317	/ 137 - Cc category / TokError,
318	/ 138 - Cc category / TokError,
319	/ 139 - Cc category / TokError,
320	/ 140 - Cc category / TokError,
321	/ 141 - Cc category / TokError,
322	/ 142 - Cc category / TokError,
323	/ 143 - Cc category / TokError,
324	/ 144 - Cc category / TokError,
325	/ 145 - Cc category / TokError,
326	/ 146 - Cc category / TokError,
327	/ 147 - Cc category / TokError,
328	/ 148 - Cc category / TokError,
329	/ 149 - Cc category / TokError,
330	/ 150 - Cc category / TokError,
331	/ 151 - Cc category / TokError,
332	/ 152 - Cc category / TokError,
333	/ 153 - Cc category / TokError,
334	/ 154 - Cc category / TokError,
335	/ 155 - Cc category / TokError,
336	/ 156 - Cc category / TokError,
337	/ 157 - Cc category / TokError,
338	/ 158 - Cc category / TokError,
339	/ 159 - Cc category / TokError,
340	/ 160 - Zs category (nbsp) / TokError,
341	/ 161 - Po category / TokError,
342	/ 162 - Sc category / TokError,
343	/ 163 - Sc category / TokError,
344	/ 164 - Sc category / TokError,
345	/ 165 - Sc category / TokError,
346	/ 166 - So category / TokError,
347	/ 167 - So category / TokError,
348	/ 168 - Sk category / TokError,
349	/ 169 - So category / TokError,
350	/ 170 - Ll category / TokError,
351	/ 171 - Pi category / TokError,
352	/ 172 - Sm category / TokError,
353	/ 173 - Cf category / TokError,
354	/ 174 - So category / TokError,
355	/ 175 - Sk category / TokError,
356	/ 176 - So category / TokError,
357	/ 177 - Sm category / TokError,
358	/ 178 - No category / TokError,
359	/ 179 - No category / TokError,
360	/ 180 - Sk category / TokError,
361	/ 181 - Ll category / TokError,
362	/ 182 - So category / TokError,
363	/ 183 - Po category / TokError,
364	/ 184 - Sk category / TokError,
365	/ 185 - No category / TokError,
366	/ 186 - Ll category / TokError,
367	/ 187 - Pf category / TokError,
368	/ 188 - No category / TokError,
369	/ 189 - No category / TokError,
370	/ 190 - No category / TokError,
371	/ 191 - Po category / TokError,
372	/ 192 - Lu category / TokError,
373	/ 193 - Lu category / TokError,
374	/ 194 - Lu category / TokError,
375	/ 195 - Lu category / TokError,
376	/ 196 - Lu category / TokError,
377	/ 197 - Lu category / TokError,
378	/ 198 - Lu category / TokError,
379	/ 199 - Lu category / TokError,
380	/ 200 - Lu category / TokError,
381	/ 201 - Lu category / TokError,
382	/ 202 - Lu category / TokError,
383	/ 203 - Lu category / TokError,
384	/ 204 - Lu category / TokError,
385	/ 205 - Lu category / TokError,
386	/ 206 - Lu category / TokError,
387	/ 207 - Lu category / TokError,
388	/ 208 - Lu category / TokError,
389	/ 209 - Lu category / TokError,
390	/ 210 - Lu category / TokError,
391	/ 211 - Lu category / TokError,
392	/ 212 - Lu category / TokError,
393	/ 213 - Lu category / TokError,
394	/ 214 - Lu category / TokError,
395	/ 215 - Sm category / TokError,
396	/ 216 - Lu category / TokError,
397	/ 217 - Lu category / TokError,
398	/ 218 - Lu category / TokError,
399	/ 219 - Lu category / TokError,
400	/ 220 - Lu category / TokError,
401	/ 221 - Lu category / TokError,
402	/ 222 - Lu category / TokError,
403	/ 223 - Ll category / TokError,
404	/ 224 - Ll category / TokError,
405	/ 225 - Ll category / TokError,
406	/ 226 - Ll category / TokError,
407	/ 227 - Ll category / TokError,
408	/ 228 - Ll category / TokError,
409	/ 229 - Ll category / TokError,
410	/ 230 - Ll category / TokError,
411	/ 231 - Ll category / TokError,
412	/ 232 - Ll category / TokError,
413	/ 233 - Ll category / TokError,
414	/ 234 - Ll category / TokError,
415	/ 235 - Ll category / TokError,
416	/ 236 - Ll category / TokError,
417	/ 237 - Ll category / TokError,
418	/ 238 - Ll category / TokError,
419	/ 239 - Ll category / TokError,
420	/ 240 - Ll category / TokError,
421	/ 241 - Ll category / TokError,
422	/ 242 - Ll category / TokError,
423	/ 243 - Ll category / TokError,
424	/ 244 - Ll category / TokError,
425	/ 245 - Ll category / TokError,
426	/ 246 - Ll category / TokError,
427	/ 247 - Sm category / TokError,
428	/ 248 - Ll category / TokError,
429	/ 249 - Ll category / TokError,
430	/ 250 - Ll category / TokError,
431	/ 251 - Ll category / TokError,
432	/ 252 - Ll category / TokError,
433	/ 253 - Ll category / TokError,
434	/ 254 - Ll category / TokError,
435	/ 255 - Ll category / TokError
436	};
437
438	template <typename CharType>
439	ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
440	{
441	#if !ASSERT_DISABLED
442	m_currentTokenID++;
443	#endif
444
445	while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
446	++m_ptr;
447
448	ASSERT(m_ptr <= m_end);
449	if (m_ptr == m_end) {
450	token.type = TokEnd;
451	token.start = token.end = m_ptr;
452	return TokEnd;
453	}
454	ASSERT(m_ptr < m_end);
455	token.type = TokError;
456	token.start = m_ptr;
457	CharType character = *m_ptr;
458	if (LIKELY(isLatin1(character))) {
459	TokenType tokenType = TokenTypesOfLatin1Characters[character];
460	switch (tokenType) {
461	case TokString:
462	if (character == `'\''` && m_mode == StrictJSON) {
463	m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
464	return TokError;
465	}
466	return lexString(token, character);
467
468	case TokIdentifier: {
469	switch (character) {
470	case `'t'`:
471	if (m_end - m_ptr >= `4` && m_ptr[`1`] == `'r'` && m_ptr[`2`] == `'u'` && m_ptr[`3`] == `'e'`) {
472	m_ptr += `4`;
473	token.type = TokTrue;
474	token.end = m_ptr;
475	return TokTrue;
476	}
477	break;
478	case `'f'`:
479	if (m_end - m_ptr >= `5` && m_ptr[`1`] == `'a'` && m_ptr[`2`] == `'l'` && m_ptr[`3`] == `'s'` && m_ptr[`4`] == `'e'`) {
480	m_ptr += `5`;
481	token.type = TokFalse;
482	token.end = m_ptr;
483	return TokFalse;
484	}
485	break;
486	case `'n'`:
487	if (m_end - m_ptr >= `4` && m_ptr[`1`] == `'u'` && m_ptr[`2`] == `'l'` && m_ptr[`3`] == `'l'`) {
488	m_ptr += `4`;
489	token.type = TokNull;
490	token.end = m_ptr;
491	return TokNull;
492	}
493	break;
494	}
495	return lexIdentifier(token);
496	}
497
498	case TokNumber:
499	return lexNumber(token);
500
501	case TokError:
502	break;
503
504	default:
505	ASSERT(tokenType == TokLBracket
506	\|\| tokenType == TokRBracket
507	\|\| tokenType == TokLBrace
508	\|\| tokenType == TokRBrace
509	\|\| tokenType == TokColon
510	\|\| tokenType == TokLParen
511	\|\| tokenType == TokRParen
512	\|\| tokenType == TokComma
513	\|\| tokenType == TokDot
514	\|\| tokenType == TokAssign
515	\|\| tokenType == TokSemi);
516	token.type = tokenType;
517	token.end = ++m_ptr;
518	return tokenType;
519	}
520	}
521	m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, `1` }, `'\''`);
522	return TokError;
523	}
524
525	template <>
526	ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
527	{
528	while (m_ptr < m_end && (isASCIIAlphanumeric(m_ptr) \|\| m_ptr == `'_'` \|\| *m_ptr == `'$'`))
529	m_ptr++;
530	token.stringIs8Bit = `1`;
531	token.stringToken8 = token.start;
532	token.stringLength = m_ptr - token.start;
533	token.type = TokIdentifier;
534	token.end = m_ptr;
535	return TokIdentifier;
536	}
537
538	template <>
539	ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
540	{
541	while (m_ptr < m_end && (isASCIIAlphanumeric(m_ptr) \|\| m_ptr == `'_'` \|\| m_ptr == `'$'` \|\| m_ptr == `0x200C` \|\| *m_ptr == `0x200D`))
542	m_ptr++;
543	token.stringIs8Bit = `0`;
544	token.stringToken16 = token.start;
545	token.stringLength = m_ptr - token.start;
546	token.type = TokIdentifier;
547	token.end = m_ptr;
548	return TokIdentifier;
549	}
550
551	template <typename CharType>
552	TokenType LiteralParser<CharType>::Lexer::next()
553	{
554	TokenType result = lex(m_currentToken);
555	ASSERT(m_currentToken.type == result);
556	return result;
557	}
558
559	template <>
560	ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
561	{
562	token.stringIs8Bit = `1`;
563	token.stringToken8 = string;
564	}
565
566	template <>
567	ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
568	{
569	token.stringIs8Bit = `0`;
570	token.stringToken16 = string;
571	}
572
573	enum class SafeStringCharacterSet { Strict, NonStrict };
574
575	template <SafeStringCharacterSet set>
576	static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
577	{
578	return (c >= `' '` && c != `'\\'` && c != terminator) \|\| (c == `'\t'` && set != SafeStringCharacterSet::Strict);
579	}
580
581	template <SafeStringCharacterSet set>
582	static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
583	{
584	return (c >= `' '` && (set == SafeStringCharacterSet::Strict \|\| isLatin1(c)) && c != `'\\'` && c != terminator) \|\| (c == `'\t'` && set != SafeStringCharacterSet::Strict);
585	}
586
587	template <typename CharType>
588	ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator)
589	{
590	++m_ptr;
591	const CharType* runStart = m_ptr;
592
593	if (m_mode == StrictJSON) {
594	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
595	++m_ptr;
596	} else {
597	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
598	++m_ptr;
599	}
600
601	if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
602	setParserTokenString<CharType>(token, runStart);
603	token.stringLength = m_ptr - runStart;
604	token.type = TokString;
605	token.end = ++m_ptr;
606	return TokString;
607	}
608	return lexStringSlow(token, runStart, terminator);
609	}
610
611	template <typename CharType>
612	TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator)
613	{
614	m_builder.clear();
615	goto slowPathBegin;
616	do {
617	runStart = m_ptr;
618	if (m_mode == StrictJSON) {
619	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
620	++m_ptr;
621	} else {
622	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
623	++m_ptr;
624	}
625
626	if (!m_builder.isEmpty())
627	m_builder.appendCharacters(runStart, m_ptr - runStart);
628
629	slowPathBegin:
630	if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == `'\\'`) {
631	if (m_builder.isEmpty() && runStart < m_ptr)
632	m_builder.appendCharacters(runStart, m_ptr - runStart);
633	++m_ptr;
634	if (m_ptr >= m_end) {
635	m_lexErrorMessage = "Unterminated string"_s;
636	return TokError;
637	}
638	switch (*m_ptr) {
639	case `'"'`:
640	m_builder.append(`'"'`);
641	m_ptr++;
642	break;
643	case `'\\'`:
644	m_builder.append(`'\\'`);
645	m_ptr++;
646	break;
647	case `'/'`:
648	m_builder.append(`'/'`);
649	m_ptr++;
650	break;
651	case `'b'`:
652	m_builder.append(`'\b'`);
653	m_ptr++;
654	break;
655	case `'f'`:
656	m_builder.append(`'\f'`);
657	m_ptr++;
658	break;
659	case `'n'`:
660	m_builder.append(`'\n'`);
661	m_ptr++;
662	break;
663	case `'r'`:
664	m_builder.append(`'\r'`);
665	m_ptr++;
666	break;
667	case `'t'`:
668	m_builder.append(`'\t'`);
669	m_ptr++;
670	break;
671
672	case `'u'`:
673	if ((m_end - m_ptr) < `5`) {
674	m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s;
675	return TokError;
676	} // uNNNN == 5 characters
677	for (int i = `1`; i < `5`; i++) {
678	if (!isASCIIHexDigit(m_ptr[i])) {
679	m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, `5` }, "\" is not a valid unicode escape");
680	return TokError;
681	}
682	}
683	m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[`1`], m_ptr[`2`], m_ptr[`3`], m_ptr[`4`]));
684	m_ptr += `5`;
685	break;
686
687	default:
688	if (*m_ptr == `'\''` && m_mode != StrictJSON) {
689	m_builder.append(`'\''`);
690	m_ptr++;
691	break;
692	}
693	m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, `1` });
694	return TokError;
695	}
696	}
697	} while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
698
699	if (m_ptr >= m_end \|\| *m_ptr != terminator) {
700	m_lexErrorMessage = "Unterminated string"_s;
701	return TokError;
702	}
703
704	if (m_builder.isEmpty()) {
705	setParserTokenString<CharType>(token, runStart);
706	token.stringLength = m_ptr - runStart;
707	} else {
708	if (m_builder.is8Bit()) {
709	token.stringIs8Bit = `1`;
710	token.stringToken8 = m_builder.characters8();
711	} else {
712	token.stringIs8Bit = `0`;
713	token.stringToken16 = m_builder.characters16();
714	}
715	token.stringLength = m_builder.length();
716	}
717	token.type = TokString;
718	token.end = ++m_ptr;
719	return TokString;
720	}
721
722	template <typename CharType>
723	TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
724	{
725	// ES5 and json.org define numbers as
726	// number
727	// int
728	// int frac? exp?
729	//
730	// int
731	// -? 0
732	// -? digit1-9 digits?
733	//
734	// digits
735	// digit digits?
736	//
737	// -?(0 \| [1-9][0-9]) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?*
738
739	if (m_ptr < m_end && m_ptr == `'-'`) // -?*
740	++m_ptr;
741
742	// (0 \| [1-9][0-9])*
743	if (m_ptr < m_end && m_ptr == `'0'`) // 0*
744	++m_ptr;
745	else if (m_ptr < m_end && m_ptr >= `'1'` && m_ptr <= `'9'`) { // [1-9]
746	++m_ptr;
747	// [0-9]*
748	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
749	++m_ptr;
750	} else {
751	m_lexErrorMessage = "Invalid number"_s;
752	return TokError;
753	}
754
755	// ('.' [0-9]+)?
756	const int NumberOfDigitsForSafeInt32 = `9`; // The numbers from -99999999 to 999999999 are always in range of Int32.
757	if (m_ptr < m_end && *m_ptr == `'.'`) {
758	++m_ptr;
759	// [0-9]+
760	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr)) {
761	m_lexErrorMessage = "Invalid digits after decimal point"_s;
762	return TokError;
763	}
764
765	++m_ptr;
766	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
767	++m_ptr;
768	} else if (m_ptr < m_end && (m_ptr != `'e'` && m_ptr != `'E'`) && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) {
769	int32_t result = `0`;
770	token.type = TokNumber;
771	token.end = m_ptr;
772	const CharType* digit = token.start;
773	bool negative = false;
774	if (*digit == `'-'`) {
775	negative = true;
776	digit++;
777	}
778
779	ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32);
780	while (digit < m_ptr)
781	result = result * `10` + (*digit++) - `'0'`;
782
783	if (!negative)
784	token.numberToken = result;
785	else {
786	if (!result)
787	token.numberToken = -`0.0`;
788	else
789	token.numberToken = -result;
790	}
791	return TokNumber;
792	}
793
794	// ([eE][+-]? [0-9]+)?
795	if (m_ptr < m_end && (m_ptr == `'e'` \|\| m_ptr == `'E'`)) { // [eE]
796	++m_ptr;
797
798	// [-+]?
799	if (m_ptr < m_end && (m_ptr == `'-'` \|\| m_ptr == `'+'`))
800	++m_ptr;
801
802	// [0-9]+
803	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr)) {
804	m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s;
805	return TokError;
806	}
807
808	++m_ptr;
809	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
810	++m_ptr;
811	}
812
813	token.type = TokNumber;
814	token.end = m_ptr;
815	size_t parsedLength;
816	token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
817	return TokNumber;
818	}
819
820	template <typename CharType>
821	JSValue LiteralParser<CharType>::parse(ParserState initialState)
822	{
823	VM& vm = m_globalObject->vm();
824	auto scope = DECLARE_THROW_SCOPE(vm);
825	ParserState state = initialState;
826	MarkedArgumentBuffer objectStack;
827	JSValue lastValue;
828	Vector<ParserState, `16`, UnsafeVectorOverflow> stateStack;
829	Vector<Identifier, `16`, UnsafeVectorOverflow> identifierStack;
830	HashSet<JSObject*> visitedUnderscoreProto;
831	while (`1`) {
832	switch(state) {
833	startParseArray:
834	case StartParseArray: {
835	JSArray* array = constructEmptyArray(m_globalObject, `0`);
836	RETURN_IF_EXCEPTION(scope, JSValue ());
837	objectStack.appendWithCrashOnOverflow(array);
838	}
839	doParseArrayStartExpression:
840	FALLTHROUGH;
841	case DoParseArrayStartExpression: {
842	TokenType lastToken = m_lexer.currentToken()->type;
843	if (m_lexer.next() == TokRBracket) {
844	if (lastToken == TokComma) {
845	m_parseErrorMessage = "Unexpected comma at the end of array expression"_s;
846	return JSValue ();
847	}
848	m_lexer.next();
849	lastValue = objectStack.takeLast();
850	break;
851	}
852
853	stateStack.append(DoParseArrayEndExpression);
854	goto startParseExpression;
855	}
856	case DoParseArrayEndExpression: {
857	JSArray* array = asArray(objectStack.last());
858	array->putDirectIndex(m_globalObject, array->length(), lastValue);
859	RETURN_IF_EXCEPTION(scope, JSValue ());
860
861	if (m_lexer.currentToken()->type == TokComma)
862	goto doParseArrayStartExpression;
863
864	if (m_lexer.currentToken()->type != TokRBracket) {
865	m_parseErrorMessage = "Expected ']'"_s;
866	return JSValue ();
867	}
868
869	m_lexer.next();
870	lastValue = objectStack.takeLast();
871	break;
872	}
873	startParseObject:
874	case StartParseObject: {
875	JSObject* object = constructEmptyObject(m_globalObject);
876	objectStack.appendWithCrashOnOverflow(object);
877
878	TokenType type = m_lexer.next();
879	if (type == TokString \|\| (m_mode != StrictJSON && type == TokIdentifier)) {
880	typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
881	if (identifierToken->stringIs8Bit)
882	identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
883	else
884	identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
885
886	// Check for colon
887	if (m_lexer.next() != TokColon) {
888	m_parseErrorMessage = "Expected ':' before value in object property definition"_s;
889	return JSValue ();
890	}
891
892	m_lexer.next();
893	stateStack.append(DoParseObjectEndExpression);
894	goto startParseExpression;
895	}
896	if (type != TokRBrace) {
897	m_parseErrorMessage = "Expected '}'"_s;
898	return JSValue ();
899	}
900	m_lexer.next();
901	lastValue = objectStack.takeLast();
902	break;
903	}
904	doParseObjectStartExpression:
905	case DoParseObjectStartExpression: {
906	TokenType type = m_lexer.next();
907	if (type != TokString && (m_mode == StrictJSON \|\| type != TokIdentifier)) {
908	m_parseErrorMessage = "Property name must be a string literal"_s;
909	return JSValue ();
910	}
911	typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
912	if (identifierToken->stringIs8Bit)
913	identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
914	else
915	identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
916
917	// Check for colon
918	if (m_lexer.next() != TokColon) {
919	m_parseErrorMessage = "Expected ':'"_s;
920	return JSValue ();
921	}
922
923	m_lexer.next();
924	stateStack.append(DoParseObjectEndExpression);
925	goto startParseExpression;
926	}
927	case DoParseObjectEndExpression:
928	{
929	JSObject* object = asObject(objectStack.last());
930	Identifier ident = identifierStack.takeLast();
931	if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) {
932	if (!visitedUnderscoreProto.add(object).isNewEntry) {
933	m_parseErrorMessage = "Attempted to redefine __proto__ property"_s;
934	return JSValue ();
935	}
936	PutPropertySlot slot(object, m_nullOrCodeBlock ? m_nullOrCodeBlock->isStrictMode() : false);
937	objectStack.last().put(m_globalObject, ident, lastValue, slot);
938	} else {
939	if (Optional<uint32_t> index = parseIndex(ident))
940	object->putDirectIndex(m_globalObject, index.value(), lastValue);
941	else
942	object->putDirect(vm, ident, lastValue);
943	}
944	RETURN_IF_EXCEPTION(scope, JSValue ());
945	if (m_lexer.currentToken()->type == TokComma)
946	goto doParseObjectStartExpression;
947	if (m_lexer.currentToken()->type != TokRBrace) {
948	m_parseErrorMessage = "Expected '}'"_s;
949	return JSValue ();
950	}
951	m_lexer.next();
952	lastValue = objectStack.takeLast();
953	break;
954	}
955	startParseExpression:
956	case StartParseExpression: {
957	switch (m_lexer.currentToken()->type) {
958	case TokLBracket:
959	goto startParseArray;
960	case TokLBrace:
961	goto startParseObject;
962	case TokString: {
963	typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
964	if (stringToken->stringIs8Bit)
965	lastValue = jsString(vm, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
966	else
967	lastValue = jsString(vm, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
968	m_lexer.next();
969	break;
970	}
971	case TokNumber: {
972	typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
973	lastValue = jsNumber(numberToken->numberToken);
974	m_lexer.next();
975	break;
976	}
977	case TokNull:
978	m_lexer.next();
979	lastValue = jsNull();
980	break;
981
982	case TokTrue:
983	m_lexer.next();
984	lastValue = jsBoolean(true);
985	break;
986
987	case TokFalse:
988	m_lexer.next();
989	lastValue = jsBoolean(false);
990	break;
991	case TokRBracket:
992	m_parseErrorMessage = "Unexpected token ']'"_s;
993	return JSValue ();
994	case TokRBrace:
995	m_parseErrorMessage = "Unexpected token '}'"_s;
996	return JSValue ();
997	case TokIdentifier: {
998	typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
999	if (token->stringIs8Bit)
1000	m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken8, token->stringLength }, `'"'`);
1001	else
1002	m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken16, token->stringLength }, `'"'`);
1003	return JSValue ();
1004	}
1005	case TokColon:
1006	m_parseErrorMessage = "Unexpected token ':'"_s;
1007	return JSValue ();
1008	case TokLParen:
1009	m_parseErrorMessage = "Unexpected token '('"_s;
1010	return JSValue ();
1011	case TokRParen:
1012	m_parseErrorMessage = "Unexpected token ')'"_s;
1013	return JSValue ();
1014	case TokComma:
1015	m_parseErrorMessage = "Unexpected token ','"_s;
1016	return JSValue ();
1017	case TokDot:
1018	m_parseErrorMessage = "Unexpected token '.'"_s;
1019	return JSValue ();
1020	case TokAssign:
1021	m_parseErrorMessage = "Unexpected token '='"_s;
1022	return JSValue ();
1023	case TokSemi:
1024	m_parseErrorMessage = "Unexpected token ';'"_s;
1025	return JSValue ();
1026	case TokEnd:
1027	m_parseErrorMessage = "Unexpected EOF"_s;
1028	return JSValue ();
1029	case TokError:
1030	default:
1031	// Error
1032	m_parseErrorMessage = "Could not parse value expression"_s;
1033	return JSValue ();
1034	}
1035	break;
1036	}
1037	case StartParseStatement: {
1038	switch (m_lexer.currentToken()->type) {
1039	case TokLBracket:
1040	case TokNumber:
1041	case TokString:
1042	goto startParseExpression;
1043
1044	case TokLParen: {
1045	m_lexer.next();
1046	stateStack.append(StartParseStatementEndStatement);
1047	goto startParseExpression;
1048	}
1049	case TokRBracket:
1050	m_parseErrorMessage = "Unexpected token ']'"_s;
1051	return JSValue ();
1052	case TokLBrace:
1053	m_parseErrorMessage = "Unexpected token '{'"_s;
1054	return JSValue ();
1055	case TokRBrace:
1056	m_parseErrorMessage = "Unexpected token '}'"_s;
1057	return JSValue ();
1058	case TokIdentifier:
1059	m_parseErrorMessage = "Unexpected identifier"_s;
1060	return JSValue ();
1061	case TokColon:
1062	m_parseErrorMessage = "Unexpected token ':'"_s;
1063	return JSValue ();
1064	case TokRParen:
1065	m_parseErrorMessage = "Unexpected token ')'"_s;
1066	return JSValue ();
1067	case TokComma:
1068	m_parseErrorMessage = "Unexpected token ','"_s;
1069	return JSValue ();
1070	case TokTrue:
1071	m_parseErrorMessage = "Unexpected token 'true'"_s;
1072	return JSValue ();
1073	case TokFalse:
1074	m_parseErrorMessage = "Unexpected token 'false'"_s;
1075	return JSValue ();
1076	case TokNull:
1077	m_parseErrorMessage = "Unexpected token 'null'"_s;
1078	return JSValue ();
1079	case TokEnd:
1080	m_parseErrorMessage = "Unexpected EOF"_s;
1081	return JSValue ();
1082	case TokDot:
1083	m_parseErrorMessage = "Unexpected token '.'"_s;
1084	return JSValue ();
1085	case TokAssign:
1086	m_parseErrorMessage = "Unexpected token '='"_s;
1087	return JSValue ();
1088	case TokSemi:
1089	m_parseErrorMessage = "Unexpected token ';'"_s;
1090	return JSValue ();
1091	case TokError:
1092	default:
1093	m_parseErrorMessage = "Could not parse statement"_s;
1094	return JSValue ();
1095	}
1096	}
1097	case StartParseStatementEndStatement: {
1098	ASSERT(stateStack.isEmpty());
1099	if (m_lexer.currentToken()->type != TokRParen)
1100	return JSValue ();
1101	if (m_lexer.next() == TokEnd)
1102	return lastValue;
1103	m_parseErrorMessage = "Unexpected content at end of JSON literal"_s;
1104	return JSValue ();
1105	}
1106	default:
1107	RELEASE_ASSERT_NOT_REACHED();
1108	}
1109	if (stateStack.isEmpty())
1110	return lastValue;
1111	state = stateStack.takeLast();
1112	continue;
1113	}
1114	}
1115
1116	// Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
1117	template class LiteralParser<LChar>;
1118	template class LiteralParser<UChar>;
1119
1120	}
1121

Browse the source code of jsc/Source/JavaScriptCore/runtime/LiteralParser.cpp