1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/json-parser.h"
6
7#include "src/char-predicates-inl.h"
8#include "src/conversions.h"
9#include "src/debug/debug.h"
10#include "src/field-type.h"
11#include "src/hash-seed-inl.h"
12#include "src/heap/heap-inl.h" // For string_table().
13#include "src/message-template.h"
14#include "src/objects-inl.h"
15#include "src/objects/hash-table-inl.h"
16#include "src/property-descriptor.h"
17#include "src/string-hasher.h"
18#include "src/transitions.h"
19
20namespace v8 {
21namespace internal {
22
23namespace {
24
25// A vector-like data structure that uses a larger vector for allocation, and
26// provides limited utility access. The original vector must not be used for the
27// duration, and it may even be reallocated. This allows vector storage to be
28// reused for the properties of sibling objects.
29template <typename Container>
30class VectorSegment {
31 public:
32 using value_type = typename Container::value_type;
33
34 explicit VectorSegment(Container* container)
35 : container_(*container), begin_(container->size()) {}
36 ~VectorSegment() { container_.resize(begin_); }
37
38 Vector<const value_type> GetVector() const {
39 return VectorOf(container_) + begin_;
40 }
41
42 template <typename T>
43 void push_back(T&& value) {
44 container_.push_back(std::forward<T>(value));
45 }
46
47 private:
48 Container& container_;
49 const typename Container::size_type begin_;
50};
51
52} // namespace
53
54MaybeHandle<Object> JsonParseInternalizer::Internalize(Isolate* isolate,
55 Handle<Object> object,
56 Handle<Object> reviver) {
57 DCHECK(reviver->IsCallable());
58 JsonParseInternalizer internalizer(isolate,
59 Handle<JSReceiver>::cast(reviver));
60 Handle<JSObject> holder =
61 isolate->factory()->NewJSObject(isolate->object_function());
62 Handle<String> name = isolate->factory()->empty_string();
63 JSObject::AddProperty(isolate, holder, name, object, NONE);
64 return internalizer.InternalizeJsonProperty(holder, name);
65}
66
67MaybeHandle<Object> JsonParseInternalizer::InternalizeJsonProperty(
68 Handle<JSReceiver> holder, Handle<String> name) {
69 HandleScope outer_scope(isolate_);
70 Handle<Object> value;
71 ASSIGN_RETURN_ON_EXCEPTION(
72 isolate_, value, Object::GetPropertyOrElement(isolate_, holder, name),
73 Object);
74 if (value->IsJSReceiver()) {
75 Handle<JSReceiver> object = Handle<JSReceiver>::cast(value);
76 Maybe<bool> is_array = Object::IsArray(object);
77 if (is_array.IsNothing()) return MaybeHandle<Object>();
78 if (is_array.FromJust()) {
79 Handle<Object> length_object;
80 ASSIGN_RETURN_ON_EXCEPTION(
81 isolate_, length_object,
82 Object::GetLengthFromArrayLike(isolate_, object), Object);
83 double length = length_object->Number();
84 for (double i = 0; i < length; i++) {
85 HandleScope inner_scope(isolate_);
86 Handle<Object> index = isolate_->factory()->NewNumber(i);
87 Handle<String> name = isolate_->factory()->NumberToString(index);
88 if (!RecurseAndApply(object, name)) return MaybeHandle<Object>();
89 }
90 } else {
91 Handle<FixedArray> contents;
92 ASSIGN_RETURN_ON_EXCEPTION(
93 isolate_, contents,
94 KeyAccumulator::GetKeys(object, KeyCollectionMode::kOwnOnly,
95 ENUMERABLE_STRINGS,
96 GetKeysConversion::kConvertToString),
97 Object);
98 for (int i = 0; i < contents->length(); i++) {
99 HandleScope inner_scope(isolate_);
100 Handle<String> name(String::cast(contents->get(i)), isolate_);
101 if (!RecurseAndApply(object, name)) return MaybeHandle<Object>();
102 }
103 }
104 }
105 Handle<Object> argv[] = {name, value};
106 Handle<Object> result;
107 ASSIGN_RETURN_ON_EXCEPTION(
108 isolate_, result, Execution::Call(isolate_, reviver_, holder, 2, argv),
109 Object);
110 return outer_scope.CloseAndEscape(result);
111}
112
113bool JsonParseInternalizer::RecurseAndApply(Handle<JSReceiver> holder,
114 Handle<String> name) {
115 STACK_CHECK(isolate_, false);
116
117 Handle<Object> result;
118 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
119 isolate_, result, InternalizeJsonProperty(holder, name), false);
120 Maybe<bool> change_result = Nothing<bool>();
121 if (result->IsUndefined(isolate_)) {
122 change_result = JSReceiver::DeletePropertyOrElement(holder, name,
123 LanguageMode::kSloppy);
124 } else {
125 PropertyDescriptor desc;
126 desc.set_value(result);
127 desc.set_configurable(true);
128 desc.set_enumerable(true);
129 desc.set_writable(true);
130 change_result = JSReceiver::DefineOwnProperty(isolate_, holder, name, &desc,
131 Just(kDontThrow));
132 }
133 MAYBE_RETURN(change_result, false);
134 return true;
135}
136
137template <bool seq_one_byte>
138JsonParser<seq_one_byte>::JsonParser(Isolate* isolate, Handle<String> source)
139 : source_(source),
140 source_length_(source->length()),
141 isolate_(isolate),
142 zone_(isolate_->allocator(), ZONE_NAME),
143 object_constructor_(isolate_->native_context()->object_function(),
144 isolate_),
145 position_(-1),
146 properties_(&zone_) {
147 source_ = String::Flatten(isolate, source_);
148 allocation_ = (source_length_ >= kPretenureTreshold) ? AllocationType::kOld
149 : AllocationType::kYoung;
150
151 // Optimized fast case where we only have Latin1 characters.
152 if (seq_one_byte) {
153 seq_source_ = Handle<SeqOneByteString>::cast(source_);
154 }
155}
156
157template <bool seq_one_byte>
158MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
159 // Advance to the first character (possibly EOS)
160 AdvanceSkipWhitespace();
161 Handle<Object> result = ParseJsonValue();
162 if (result.is_null() || c0_ != kEndOfString) {
163 // Some exception (for example stack overflow) is already pending.
164 if (isolate_->has_pending_exception()) return Handle<Object>::null();
165
166 // Parse failed. Current character is the unexpected token.
167 Factory* factory = this->factory();
168 MessageTemplate message;
169 Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate());
170 Handle<Object> arg2;
171
172 switch (c0_) {
173 case kEndOfString:
174 message = MessageTemplate::kJsonParseUnexpectedEOS;
175 break;
176 case '-':
177 case '0':
178 case '1':
179 case '2':
180 case '3':
181 case '4':
182 case '5':
183 case '6':
184 case '7':
185 case '8':
186 case '9':
187 message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
188 break;
189 case '"':
190 message = MessageTemplate::kJsonParseUnexpectedTokenString;
191 break;
192 default:
193 message = MessageTemplate::kJsonParseUnexpectedToken;
194 arg2 = arg1;
195 arg1 = factory->LookupSingleCharacterStringFromCode(c0_);
196 break;
197 }
198
199 Handle<Script> script(factory->NewScript(source_));
200 if (isolate()->NeedsSourcePositionsForProfiling()) {
201 Script::InitLineEnds(script);
202 }
203 // We should sent compile error event because we compile JSON object in
204 // separated source file.
205 isolate()->debug()->OnCompileError(script);
206 MessageLocation location(script, position_, position_ + 1);
207 Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
208 return isolate()->template Throw<Object>(error, &location);
209 }
210 return result;
211}
212
213MaybeHandle<Object> InternalizeJsonProperty(Handle<JSObject> holder,
214 Handle<String> key);
215
216template <bool seq_one_byte>
217void JsonParser<seq_one_byte>::Advance() {
218 position_++;
219 if (position_ >= source_length_) {
220 c0_ = kEndOfString;
221 } else if (seq_one_byte) {
222 c0_ = seq_source_->SeqOneByteStringGet(position_);
223 } else {
224 c0_ = source_->Get(position_);
225 }
226}
227
228template <bool seq_one_byte>
229void JsonParser<seq_one_byte>::AdvanceSkipWhitespace() {
230 do {
231 Advance();
232 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
233}
234
235template <bool seq_one_byte>
236void JsonParser<seq_one_byte>::SkipWhitespace() {
237 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
238 Advance();
239 }
240}
241
242template <bool seq_one_byte>
243uc32 JsonParser<seq_one_byte>::AdvanceGetChar() {
244 Advance();
245 return c0_;
246}
247
248template <bool seq_one_byte>
249bool JsonParser<seq_one_byte>::MatchSkipWhiteSpace(uc32 c) {
250 if (c0_ == c) {
251 AdvanceSkipWhitespace();
252 return true;
253 }
254 return false;
255}
256
257template <bool seq_one_byte>
258bool JsonParser<seq_one_byte>::ParseJsonString(Handle<String> expected) {
259 int length = expected->length();
260 if (source_->length() - position_ - 1 > length) {
261 DisallowHeapAllocation no_gc;
262 String::FlatContent content = expected->GetFlatContent(no_gc);
263 if (content.IsOneByte()) {
264 DCHECK_EQ('"', c0_);
265 const uint8_t* input_chars = seq_source_->GetChars(no_gc) + position_ + 1;
266 const uint8_t* expected_chars = content.ToOneByteVector().start();
267 for (int i = 0; i < length; i++) {
268 uint8_t c0 = input_chars[i];
269 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
270 return false;
271 }
272 }
273 if (input_chars[length] == '"') {
274 position_ = position_ + length + 1;
275 AdvanceSkipWhitespace();
276 return true;
277 }
278 }
279 }
280 return false;
281}
282
283// Parse any JSON value.
284template <bool seq_one_byte>
285Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
286 StackLimitCheck stack_check(isolate_);
287 if (stack_check.HasOverflowed()) {
288 isolate_->StackOverflow();
289 return Handle<Object>::null();
290 }
291
292 if (stack_check.InterruptRequested() &&
293 isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
294 return Handle<Object>::null();
295 }
296
297 if (c0_ == '"') return ParseJsonString();
298 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
299 if (c0_ == '{') return ParseJsonObject();
300 if (c0_ == '[') return ParseJsonArray();
301 if (c0_ == 'f') {
302 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
303 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
304 AdvanceSkipWhitespace();
305 return factory()->false_value();
306 }
307 return ReportUnexpectedCharacter();
308 }
309 if (c0_ == 't') {
310 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
311 AdvanceGetChar() == 'e') {
312 AdvanceSkipWhitespace();
313 return factory()->true_value();
314 }
315 return ReportUnexpectedCharacter();
316 }
317 if (c0_ == 'n') {
318 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
319 AdvanceGetChar() == 'l') {
320 AdvanceSkipWhitespace();
321 return factory()->null_value();
322 }
323 return ReportUnexpectedCharacter();
324 }
325 return ReportUnexpectedCharacter();
326}
327
328template <bool seq_one_byte>
329ParseElementResult JsonParser<seq_one_byte>::ParseElement(
330 Handle<JSObject> json_object) {
331 uint32_t index = 0;
332 // Maybe an array index, try to parse it.
333 if (c0_ == '0') {
334 // With a leading zero, the string has to be "0" only to be an index.
335 Advance();
336 } else {
337 do {
338 int d = c0_ - '0';
339 if (index > 429496729U - ((d + 3) >> 3)) break;
340 index = (index * 10) + d;
341 Advance();
342 } while (IsDecimalDigit(c0_));
343 }
344
345 if (c0_ == '"') {
346 // Successfully parsed index, parse and store element.
347 AdvanceSkipWhitespace();
348
349 if (c0_ == ':') {
350 AdvanceSkipWhitespace();
351 Handle<Object> value = ParseJsonValue();
352 if (!value.is_null()) {
353 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
354 .Assert();
355 return kElementFound;
356 } else {
357 return kNullHandle;
358 }
359 }
360 }
361 return kElementNotFound;
362}
363
364// Parse a JSON object. Position must be right at '{'.
365template <bool seq_one_byte>
366Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
367 HandleScope scope(isolate());
368 Handle<JSObject> json_object =
369 factory()->NewJSObject(object_constructor(), allocation_);
370 Handle<Map> map(json_object->map(), isolate());
371 int descriptor = 0;
372 VectorSegment<ZoneVector<Handle<Object>>> properties(&properties_);
373 DCHECK_EQ(c0_, '{');
374
375 bool transitioning = true;
376
377 AdvanceSkipWhitespace();
378 if (c0_ != '}') {
379 do {
380 if (c0_ != '"') return ReportUnexpectedCharacter();
381
382 int start_position = position_;
383 Advance();
384
385 if (IsDecimalDigit(c0_)) {
386 ParseElementResult element_result = ParseElement(json_object);
387 if (element_result == kNullHandle) return Handle<Object>::null();
388 if (element_result == kElementFound) continue;
389 }
390 // Not an index, fallback to the slow path.
391
392 position_ = start_position;
393#ifdef DEBUG
394 c0_ = '"';
395#endif
396
397 Handle<String> key;
398 Handle<Object> value;
399
400 // Try to follow existing transitions as long as possible. Once we stop
401 // transitioning, no transition can be found anymore.
402 DCHECK(transitioning);
403 // First check whether there is a single expected transition. If so, try
404 // to parse it first.
405 bool follow_expected = false;
406 Handle<Map> target;
407 if (seq_one_byte) {
408 DisallowHeapAllocation no_gc;
409 TransitionsAccessor transitions(isolate(), *map, &no_gc);
410 key = transitions.ExpectedTransitionKey();
411 follow_expected = !key.is_null() && ParseJsonString(key);
412 // If the expected transition hits, follow it.
413 if (follow_expected) {
414 target = transitions.ExpectedTransitionTarget();
415 }
416 }
417 if (!follow_expected) {
418 // If the expected transition failed, parse an internalized string and
419 // try to find a matching transition.
420 key = ParseJsonString();
421 if (key.is_null()) return ReportUnexpectedCharacter();
422
423 // If a transition was found, follow it and continue.
424 transitioning = TransitionsAccessor(isolate(), map)
425 .FindTransitionToField(key)
426 .ToHandle(&target);
427 }
428 if (c0_ != ':') return ReportUnexpectedCharacter();
429
430 AdvanceSkipWhitespace();
431 value = ParseJsonValue();
432 if (value.is_null()) return ReportUnexpectedCharacter();
433
434 if (transitioning) {
435 PropertyDetails details =
436 target->instance_descriptors()->GetDetails(descriptor);
437 Representation expected_representation = details.representation();
438
439 if (value->FitsRepresentation(expected_representation)) {
440 if (expected_representation.IsHeapObject() &&
441 !target->instance_descriptors()
442 ->GetFieldType(descriptor)
443 ->NowContains(value)) {
444 Handle<FieldType> value_type(
445 value->OptimalType(isolate(), expected_representation));
446 Map::GeneralizeField(isolate(), target, descriptor,
447 details.constness(), expected_representation,
448 value_type);
449 }
450 DCHECK(target->instance_descriptors()
451 ->GetFieldType(descriptor)
452 ->NowContains(value));
453 properties.push_back(value);
454 map = target;
455 descriptor++;
456 continue;
457 } else {
458 transitioning = false;
459 }
460 }
461
462 DCHECK(!transitioning);
463
464 // Commit the intermediate state to the object and stop transitioning.
465 CommitStateToJsonObject(json_object, map, properties.GetVector());
466
467 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
468 .Check();
469 } while (transitioning && MatchSkipWhiteSpace(','));
470
471 // If we transitioned until the very end, transition the map now.
472 if (transitioning) {
473 CommitStateToJsonObject(json_object, map, properties.GetVector());
474 } else {
475 while (MatchSkipWhiteSpace(',')) {
476 HandleScope local_scope(isolate());
477 if (c0_ != '"') return ReportUnexpectedCharacter();
478
479 int start_position = position_;
480 Advance();
481
482 if (IsDecimalDigit(c0_)) {
483 ParseElementResult element_result = ParseElement(json_object);
484 if (element_result == kNullHandle) return Handle<Object>::null();
485 if (element_result == kElementFound) continue;
486 }
487 // Not an index, fallback to the slow path.
488
489 position_ = start_position;
490#ifdef DEBUG
491 c0_ = '"';
492#endif
493
494 Handle<String> key;
495 Handle<Object> value;
496
497 key = ParseJsonString();
498 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
499
500 AdvanceSkipWhitespace();
501 value = ParseJsonValue();
502 if (value.is_null()) return ReportUnexpectedCharacter();
503
504 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
505 value)
506 .Check();
507 }
508 }
509
510 if (c0_ != '}') {
511 return ReportUnexpectedCharacter();
512 }
513 }
514 AdvanceSkipWhitespace();
515 return scope.CloseAndEscape(json_object);
516}
517
518template <bool seq_one_byte>
519void JsonParser<seq_one_byte>::CommitStateToJsonObject(
520 Handle<JSObject> json_object, Handle<Map> map,
521 Vector<const Handle<Object>> properties) {
522 JSObject::AllocateStorageForMap(json_object, map);
523 DCHECK(!json_object->map()->is_dictionary_map());
524
525 DisallowHeapAllocation no_gc;
526 DescriptorArray descriptors = json_object->map()->instance_descriptors();
527 for (int i = 0; i < properties.length(); i++) {
528 Handle<Object> value = properties[i];
529 // Initializing store.
530 json_object->WriteToField(i, descriptors->GetDetails(i), *value);
531 }
532}
533
534class ElementKindLattice {
535 private:
536 enum {
537 SMI_ELEMENTS,
538 NUMBER_ELEMENTS,
539 OBJECT_ELEMENTS,
540 };
541
542 public:
543 ElementKindLattice() : value_(SMI_ELEMENTS) {}
544
545 void Update(Handle<Object> o) {
546 if (o->IsSmi()) {
547 return;
548 } else if (o->IsHeapNumber()) {
549 if (value_ < NUMBER_ELEMENTS) value_ = NUMBER_ELEMENTS;
550 } else {
551 DCHECK(!o->IsNumber());
552 value_ = OBJECT_ELEMENTS;
553 }
554 }
555
556 ElementsKind GetElementsKind() const {
557 switch (value_) {
558 case SMI_ELEMENTS:
559 return PACKED_SMI_ELEMENTS;
560 case NUMBER_ELEMENTS:
561 return PACKED_DOUBLE_ELEMENTS;
562 case OBJECT_ELEMENTS:
563 return PACKED_ELEMENTS;
564 default:
565 UNREACHABLE();
566 return PACKED_ELEMENTS;
567 }
568 }
569
570 private:
571 int value_;
572};
573
574// Parse a JSON array. Position must be right at '['.
575template <bool seq_one_byte>
576Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
577 HandleScope scope(isolate());
578 ZoneVector<Handle<Object>> elements(zone());
579 DCHECK_EQ(c0_, '[');
580
581 ElementKindLattice lattice;
582
583 AdvanceSkipWhitespace();
584 if (c0_ != ']') {
585 do {
586 Handle<Object> element = ParseJsonValue();
587 if (element.is_null()) return ReportUnexpectedCharacter();
588 elements.push_back(element);
589 lattice.Update(element);
590 } while (MatchSkipWhiteSpace(','));
591 if (c0_ != ']') {
592 return ReportUnexpectedCharacter();
593 }
594 }
595 AdvanceSkipWhitespace();
596
597 // Allocate a fixed array with all the elements.
598
599 Handle<Object> json_array;
600 const ElementsKind kind = lattice.GetElementsKind();
601 int elements_size = static_cast<int>(elements.size());
602
603 switch (kind) {
604 case PACKED_ELEMENTS:
605 case PACKED_SMI_ELEMENTS: {
606 Handle<FixedArray> elems =
607 factory()->NewFixedArray(elements_size, allocation_);
608 for (int i = 0; i < elements_size; i++) elems->set(i, *elements[i]);
609 json_array = factory()->NewJSArrayWithElements(elems, kind, allocation_);
610 break;
611 }
612 case PACKED_DOUBLE_ELEMENTS: {
613 Handle<FixedDoubleArray> elems = Handle<FixedDoubleArray>::cast(
614 factory()->NewFixedDoubleArray(elements_size, allocation_));
615 for (int i = 0; i < elements_size; i++) {
616 elems->set(i, elements[i]->Number());
617 }
618 json_array = factory()->NewJSArrayWithElements(elems, kind, allocation_);
619 break;
620 }
621 default:
622 UNREACHABLE();
623 }
624
625 return scope.CloseAndEscape(json_array);
626}
627
628template <bool seq_one_byte>
629Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
630 bool negative = false;
631 int beg_pos = position_;
632 if (c0_ == '-') {
633 Advance();
634 negative = true;
635 }
636 if (c0_ == '0') {
637 Advance();
638 // Prefix zero is only allowed if it's the only digit before
639 // a decimal point or exponent.
640 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
641 } else {
642 uint32_t i = 0;
643 int digits = 0;
644 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
645 do {
646 // This can overflow. That's OK, the "digits < 10" check below
647 // will discard overflown results.
648 i = i * 10 + c0_ - '0';
649 digits++;
650 Advance();
651 } while (IsDecimalDigit(c0_));
652 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
653 SkipWhitespace();
654 return Handle<Smi>(Smi::FromInt((negative ? -static_cast<int>(i) : i)),
655 isolate());
656 }
657 }
658 if (c0_ == '.') {
659 Advance();
660 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
661 do {
662 Advance();
663 } while (IsDecimalDigit(c0_));
664 }
665 if (AsciiAlphaToLower(c0_) == 'e') {
666 Advance();
667 if (c0_ == '-' || c0_ == '+') Advance();
668 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
669 do {
670 Advance();
671 } while (IsDecimalDigit(c0_));
672 }
673 int length = position_ - beg_pos;
674 double number;
675 if (seq_one_byte) {
676 DisallowHeapAllocation no_gc;
677 Vector<const uint8_t> chars(seq_source_->GetChars(no_gc) + beg_pos, length);
678 number = StringToDouble(chars,
679 NO_FLAGS, // Hex, octal or trailing junk.
680 std::numeric_limits<double>::quiet_NaN());
681 } else {
682 Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
683 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
684 Vector<const uint8_t> result =
685 Vector<const uint8_t>(buffer.start(), length);
686 number = StringToDouble(result,
687 NO_FLAGS, // Hex, octal or trailing junk.
688 0.0);
689 buffer.Dispose();
690 }
691 SkipWhitespace();
692 return factory()->NewNumber(number, allocation_);
693}
694
695template <typename StringType>
696inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
697
698template <>
699inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
700 seq_str->SeqTwoByteStringSet(i, c);
701}
702
703template <>
704inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
705 seq_str->SeqOneByteStringSet(i, c);
706}
707
708template <typename StringType>
709inline Handle<StringType> NewRawString(Factory* factory, int length,
710 AllocationType allocation);
711
712template <>
713inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length,
714 AllocationType allocation) {
715 return factory->NewRawTwoByteString(length, allocation).ToHandleChecked();
716}
717
718template <>
719inline Handle<SeqOneByteString> NewRawString(Factory* factory, int length,
720 AllocationType allocation) {
721 return factory->NewRawOneByteString(length, allocation).ToHandleChecked();
722}
723
724// Scans the rest of a JSON string starting from position_ and writes
725// prefix[start..end] along with the scanned characters into a
726// sequential string of type StringType.
727template <bool seq_one_byte>
728template <typename StringType, typename SinkChar>
729Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
730 Handle<String> prefix, int start, int end) {
731 int count = end - start;
732 int max_length = count + source_length_ - position_;
733 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
734 Handle<StringType> seq_string =
735 NewRawString<StringType>(factory(), length, allocation_);
736
737 {
738 DisallowHeapAllocation no_gc;
739 // Copy prefix into seq_str.
740 SinkChar* dest = seq_string->GetChars(no_gc);
741 String::WriteToFlat(*prefix, dest, start, end);
742 }
743
744 while (c0_ != '"') {
745 // Check for control character (0x00-0x1F) or unterminated string (<0).
746 if (c0_ < 0x20) return Handle<String>::null();
747 if (count >= length) {
748 // We need to create a longer sequential string for the result.
749 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
750 }
751 if (c0_ != '\\') {
752 // If the sink can contain UC16 characters, or source_ contains only
753 // Latin1 characters, there's no need to test whether we can store the
754 // character. Otherwise check whether the UC16 source character can fit
755 // in the Latin1 sink.
756 if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
757 c0_ <= String::kMaxOneByteCharCode) {
758 SeqStringSet(seq_string, count++, c0_);
759 Advance();
760 } else {
761 // StringType is SeqOneByteString and we just read a non-Latin1 char.
762 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
763 }
764 } else {
765 Advance(); // Advance past the \.
766 switch (c0_) {
767 case '"':
768 case '\\':
769 case '/':
770 SeqStringSet(seq_string, count++, c0_);
771 break;
772 case 'b':
773 SeqStringSet(seq_string, count++, '\x08');
774 break;
775 case 'f':
776 SeqStringSet(seq_string, count++, '\x0C');
777 break;
778 case 'n':
779 SeqStringSet(seq_string, count++, '\x0A');
780 break;
781 case 'r':
782 SeqStringSet(seq_string, count++, '\x0D');
783 break;
784 case 't':
785 SeqStringSet(seq_string, count++, '\x09');
786 break;
787 case 'u': {
788 uc32 value = 0;
789 for (int i = 0; i < 4; i++) {
790 Advance();
791 int digit = HexValue(c0_);
792 if (digit < 0) {
793 return Handle<String>::null();
794 }
795 value = value * 16 + digit;
796 }
797 if (sizeof(SinkChar) == kUC16Size ||
798 value <= String::kMaxOneByteCharCode) {
799 SeqStringSet(seq_string, count++, value);
800 break;
801 } else {
802 // StringType is SeqOneByteString and we just read a non-Latin1
803 // char.
804 position_ -= 6; // Rewind position_ to \ in \uxxxx.
805 Advance();
806 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0,
807 count);
808 }
809 }
810 default:
811 return Handle<String>::null();
812 }
813 Advance();
814 }
815 }
816
817 DCHECK_EQ('"', c0_);
818 // Advance past the last '"'.
819 AdvanceSkipWhitespace();
820
821 // Shrink seq_string length to count and return.
822 return SeqString::Truncate(seq_string, count);
823}
824
825template <bool seq_one_byte>
826Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
827 DCHECK_EQ('"', c0_);
828 Advance();
829 if (c0_ == '"') {
830 AdvanceSkipWhitespace();
831 return factory()->empty_string();
832 }
833
834 if (seq_one_byte) {
835 // Fast path for existing internalized strings. If the the string being
836 // parsed is not a known internalized string, contains backslashes or
837 // unexpectedly reaches the end of string, return with an empty handle.
838
839 // We intentionally use local variables instead of fields, compute hash
840 // while we are iterating a string and manually inline StringTable lookup
841 // here.
842
843 int position = position_;
844 uc32 c0 = c0_;
845 uint32_t running_hash = static_cast<uint32_t>(HashSeed(isolate()));
846 uint32_t index = 0;
847 bool is_array_index = true;
848
849 do {
850 if (c0 == '\\') {
851 c0_ = c0;
852 int beg_pos = position_;
853 position_ = position;
854 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
855 position_);
856 }
857 if (c0 < 0x20) {
858 c0_ = c0;
859 position_ = position;
860 return Handle<String>::null();
861 }
862 if (is_array_index) {
863 // With leading zero, the string has to be "0" to be a valid index.
864 if (!IsDecimalDigit(c0) || (position > position_ && index == 0)) {
865 is_array_index = false;
866 } else {
867 int d = c0 - '0';
868 is_array_index = index <= 429496729U - ((d + 3) >> 3);
869 index = (index * 10) + d;
870 }
871 }
872 running_hash = StringHasher::AddCharacterCore(running_hash,
873 static_cast<uint16_t>(c0));
874 position++;
875 if (position >= source_length_) {
876 c0_ = kEndOfString;
877 position_ = position;
878 return Handle<String>::null();
879 }
880 c0 = seq_source_->SeqOneByteStringGet(position);
881 } while (c0 != '"');
882 int length = position - position_;
883 uint32_t hash;
884 if (is_array_index) {
885 hash =
886 StringHasher::MakeArrayIndexHash(index, length) >> String::kHashShift;
887 } else if (length <= String::kMaxHashCalcLength) {
888 hash = StringHasher::GetHashCore(running_hash);
889 } else {
890 hash = static_cast<uint32_t>(length);
891 }
892 StringTable string_table = isolate()->heap()->string_table();
893 uint32_t capacity = string_table->Capacity();
894 uint32_t entry = StringTable::FirstProbe(hash, capacity);
895 uint32_t count = 1;
896 Handle<String> result;
897 while (true) {
898 Object element = string_table->KeyAt(entry);
899 if (element->IsUndefined(isolate())) {
900 // Lookup failure.
901 result =
902 factory()->InternalizeOneByteString(seq_source_, position_, length);
903 break;
904 }
905 if (!element->IsTheHole(isolate())) {
906 DisallowHeapAllocation no_gc;
907 Vector<const uint8_t> string_vector(
908 seq_source_->GetChars(no_gc) + position_, length);
909 if (String::cast(element)->IsOneByteEqualTo(string_vector)) {
910 result = Handle<String>(String::cast(element), isolate());
911 DCHECK_EQ(result->Hash(),
912 (hash << String::kHashShift) >> String::kHashShift);
913 break;
914 }
915 }
916 entry = StringTable::NextProbe(entry, count++, capacity);
917 }
918 position_ = position;
919 // Advance past the last '"'.
920 AdvanceSkipWhitespace();
921 return result;
922 }
923
924 int beg_pos = position_;
925 // Fast case for Latin1 only without escape characters.
926 do {
927 // Check for control character (0x00-0x1F) or unterminated string (<0).
928 if (c0_ < 0x20) return Handle<String>::null();
929 if (c0_ != '\\') {
930 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
931 Advance();
932 } else {
933 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, beg_pos,
934 position_);
935 }
936 } else {
937 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
938 position_);
939 }
940 } while (c0_ != '"');
941 int length = position_ - beg_pos;
942 Handle<String> result =
943 factory()->NewRawOneByteString(length, allocation_).ToHandleChecked();
944 DisallowHeapAllocation no_gc;
945 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(no_gc);
946 String::WriteToFlat(*source_, dest, beg_pos, position_);
947
948 DCHECK_EQ('"', c0_);
949 // Advance past the last '"'.
950 AdvanceSkipWhitespace();
951 return result;
952}
953
954// Explicit instantiation.
955template class JsonParser<true>;
956template class JsonParser<false>;
957
958} // namespace internal
959} // namespace v8
960