1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 * Copyright (C) 2014 University of Washington. All rights reserved.
4 * Copyright (C) 2017 Apple Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the name of Google Inc. nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include "config.h"
34#include <wtf/JSONValues.h>
35
36#include <wtf/text/StringBuilder.h>
37
38namespace WTF {
39namespace JSONImpl {
40
41namespace {
42
43static const int stackLimit = 1000;
44
45enum class Token {
46 ObjectBegin,
47 ObjectEnd,
48 ArrayBegin,
49 ArrayEnd,
50 String,
51 Number,
52 BoolTrue,
53 BoolFalse,
54 Null,
55 ListSeparator,
56 ObjectPairSeparator,
57 Invalid,
58};
59
60const char* const nullString = "null";
61const char* const trueString = "true";
62const char* const falseString = "false";
63
64bool parseConstToken(const UChar* start, const UChar* end, const UChar** tokenEnd, const char* token)
65{
66 while (start < end && *token != '\0' && *start++ == *token++) { }
67
68 if (*token != '\0')
69 return false;
70
71 *tokenEnd = start;
72 return true;
73}
74
75bool readInt(const UChar* start, const UChar* end, const UChar** tokenEnd, bool canHaveLeadingZeros)
76{
77 if (start == end)
78 return false;
79
80 bool haveLeadingZero = '0' == *start;
81 int length = 0;
82 while (start < end && '0' <= *start && *start <= '9') {
83 ++start;
84 ++length;
85 }
86
87 if (!length)
88 return false;
89
90 if (!canHaveLeadingZeros && length > 1 && haveLeadingZero)
91 return false;
92
93 *tokenEnd = start;
94 return true;
95}
96
97bool parseNumberToken(const UChar* start, const UChar* end, const UChar** tokenEnd)
98{
99 // We just grab the number here. We validate the size in DecodeNumber.
100 // According to RFC 4627, a valid number is: [minus] int [frac] [exp]
101 if (start == end)
102 return false;
103
104 UChar c = *start;
105 if ('-' == c)
106 ++start;
107
108 if (!readInt(start, end, &start, false))
109 return false;
110
111 if (start == end) {
112 *tokenEnd = start;
113 return true;
114 }
115
116 // Optional fraction part.
117 c = *start;
118 if ('.' == c) {
119 ++start;
120 if (!readInt(start, end, &start, true))
121 return false;
122 if (start == end) {
123 *tokenEnd = start;
124 return true;
125 }
126 c = *start;
127 }
128
129 // Optional exponent part.
130 if ('e' == c || 'E' == c) {
131 ++start;
132 if (start == end)
133 return false;
134 c = *start;
135 if ('-' == c || '+' == c) {
136 ++start;
137 if (start == end)
138 return false;
139 }
140 if (!readInt(start, end, &start, true))
141 return false;
142 }
143
144 *tokenEnd = start;
145 return true;
146}
147
148bool readHexDigits(const UChar* start, const UChar* end, const UChar** tokenEnd, int digits)
149{
150 if (end - start < digits)
151 return false;
152
153 for (int i = 0; i < digits; ++i) {
154 if (!isASCIIHexDigit(*start++))
155 return false;
156 }
157
158 *tokenEnd = start;
159 return true;
160}
161
162bool parseStringToken(const UChar* start, const UChar* end, const UChar** tokenEnd)
163{
164 while (start < end) {
165 UChar c = *start++;
166 if ('\\' == c) {
167 c = *start++;
168 // Make sure the escaped char is valid.
169 switch (c) {
170 case 'x':
171 if (!readHexDigits(start, end, &start, 2))
172 return false;
173 break;
174 case 'u':
175 if (!readHexDigits(start, end, &start, 4))
176 return false;
177 break;
178 case '\\':
179 case '/':
180 case 'b':
181 case 'f':
182 case 'n':
183 case 'r':
184 case 't':
185 case 'v':
186 case '"':
187 break;
188 default:
189 return false;
190 }
191 } else if ('"' == c) {
192 *tokenEnd = start;
193 return true;
194 }
195 }
196
197 return false;
198}
199
200Token parseToken(const UChar* start, const UChar* end, const UChar** tokenStart, const UChar** tokenEnd)
201{
202 while (start < end && isSpaceOrNewline(*start))
203 ++start;
204
205 if (start == end)
206 return Token::Invalid;
207
208 *tokenStart = start;
209
210 switch (*start) {
211 case 'n':
212 if (parseConstToken(start, end, tokenEnd, nullString))
213 return Token::Null;
214 break;
215 case 't':
216 if (parseConstToken(start, end, tokenEnd, trueString))
217 return Token::BoolTrue;
218 break;
219 case 'f':
220 if (parseConstToken(start, end, tokenEnd, falseString))
221 return Token::BoolFalse;
222 break;
223 case '[':
224 *tokenEnd = start + 1;
225 return Token::ArrayBegin;
226 case ']':
227 *tokenEnd = start + 1;
228 return Token::ArrayEnd;
229 case ',':
230 *tokenEnd = start + 1;
231 return Token::ListSeparator;
232 case '{':
233 *tokenEnd = start + 1;
234 return Token::ObjectBegin;
235 case '}':
236 *tokenEnd = start + 1;
237 return Token::ObjectEnd;
238 case ':':
239 *tokenEnd = start + 1;
240 return Token::ObjectPairSeparator;
241 case '0':
242 case '1':
243 case '2':
244 case '3':
245 case '4':
246 case '5':
247 case '6':
248 case '7':
249 case '8':
250 case '9':
251 case '-':
252 if (parseNumberToken(start, end, tokenEnd))
253 return Token::Number;
254 break;
255 case '"':
256 if (parseStringToken(start + 1, end, tokenEnd))
257 return Token::String;
258 break;
259 }
260
261 return Token::Invalid;
262}
263
264bool decodeString(const UChar* start, const UChar* end, StringBuilder& output)
265{
266 while (start < end) {
267 UChar c = *start++;
268 if ('\\' != c) {
269 output.append(c);
270 continue;
271 }
272 c = *start++;
273 switch (c) {
274 case '"':
275 case '/':
276 case '\\':
277 break;
278 case 'b':
279 c = '\b';
280 break;
281 case 'f':
282 c = '\f';
283 break;
284 case 'n':
285 c = '\n';
286 break;
287 case 'r':
288 c = '\r';
289 break;
290 case 't':
291 c = '\t';
292 break;
293 case 'v':
294 c = '\v';
295 break;
296 case 'x':
297 c = toASCIIHexValue(start[0], start[1]);
298 start += 2;
299 break;
300 case 'u':
301 c = toASCIIHexValue(start[0], start[1]) << 8 | toASCIIHexValue(start[2], start[3]);
302 start += 4;
303 break;
304 default:
305 return false;
306 }
307 output.append(c);
308 }
309
310 return true;
311}
312
313bool decodeString(const UChar* start, const UChar* end, String& output)
314{
315 if (start == end) {
316 output = emptyString();
317 return true;
318 }
319
320 if (start > end)
321 return false;
322
323 StringBuilder buffer;
324 buffer.reserveCapacity(end - start);
325 if (!decodeString(start, end, buffer))
326 return false;
327
328 output = buffer.toString();
329 return true;
330}
331
332RefPtr<JSON::Value> buildValue(const UChar* start, const UChar* end, const UChar** valueTokenEnd, int depth)
333{
334 if (depth > stackLimit)
335 return nullptr;
336
337 RefPtr<JSON::Value> result;
338 const UChar* tokenStart;
339 const UChar* tokenEnd;
340 Token token = parseToken(start, end, &tokenStart, &tokenEnd);
341 switch (token) {
342 case Token::Invalid:
343 return nullptr;
344 case Token::Null:
345 result = JSON::Value::null();
346 break;
347 case Token::BoolTrue:
348 result = JSON::Value::create(true);
349 break;
350 case Token::BoolFalse:
351 result = JSON::Value::create(false);
352 break;
353 case Token::Number: {
354 bool ok;
355 double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok);
356 if (!ok)
357 return nullptr;
358 result = JSON::Value::create(value);
359 break;
360 }
361 case Token::String: {
362 String value;
363 bool ok = decodeString(tokenStart + 1, tokenEnd - 1, value);
364 if (!ok)
365 return nullptr;
366 result = JSON::Value::create(value);
367 break;
368 }
369 case Token::ArrayBegin: {
370 Ref<JSON::Array> array = JSON::Array::create();
371 start = tokenEnd;
372 token = parseToken(start, end, &tokenStart, &tokenEnd);
373 while (token != Token::ArrayEnd) {
374 RefPtr<JSON::Value> arrayNode = buildValue(start, end, &tokenEnd, depth + 1);
375 if (!arrayNode)
376 return nullptr;
377 array->pushValue(WTFMove(arrayNode));
378
379 // After a list value, we expect a comma or the end of the list.
380 start = tokenEnd;
381 token = parseToken(start, end, &tokenStart, &tokenEnd);
382 if (token == Token::ListSeparator) {
383 start = tokenEnd;
384 token = parseToken(start, end, &tokenStart, &tokenEnd);
385 if (token == Token::ArrayEnd)
386 return nullptr;
387 } else if (token != Token::ArrayEnd) {
388 // Unexpected value after list value. Bail out.
389 return nullptr;
390 }
391 }
392 if (token != Token::ArrayEnd)
393 return nullptr;
394 result = WTFMove(array);
395 break;
396 }
397 case Token::ObjectBegin: {
398 Ref<JSON::Object> object = JSON::Object::create();
399 start = tokenEnd;
400 token = parseToken(start, end, &tokenStart, &tokenEnd);
401 while (token != Token::ObjectEnd) {
402 if (token != Token::String)
403 return nullptr;
404 String key;
405 if (!decodeString(tokenStart + 1, tokenEnd - 1, key))
406 return nullptr;
407 start = tokenEnd;
408
409 token = parseToken(start, end, &tokenStart, &tokenEnd);
410 if (token != Token::ObjectPairSeparator)
411 return nullptr;
412 start = tokenEnd;
413
414 RefPtr<JSON::Value> value = buildValue(start, end, &tokenEnd, depth + 1);
415 if (!value)
416 return nullptr;
417 object->setValue(key, WTFMove(value));
418 start = tokenEnd;
419
420 // After a key/value pair, we expect a comma or the end of the
421 // object.
422 token = parseToken(start, end, &tokenStart, &tokenEnd);
423 if (token == Token::ListSeparator) {
424 start = tokenEnd;
425 token = parseToken(start, end, &tokenStart, &tokenEnd);
426 if (token == Token::ObjectEnd)
427 return nullptr;
428 } else if (token != Token::ObjectEnd) {
429 // Unexpected value after last object value. Bail out.
430 return nullptr;
431 }
432 }
433 if (token != Token::ObjectEnd)
434 return nullptr;
435 result = WTFMove(object);
436 break;
437 }
438
439 default:
440 // We got a token that's not a value.
441 return nullptr;
442 }
443 *valueTokenEnd = tokenEnd;
444 return result;
445}
446
447inline void appendDoubleQuotedString(StringBuilder& builder, StringView string)
448{
449 builder.append('"');
450 for (UChar codeUnit : string.codeUnits()) {
451 switch (codeUnit) {
452 case '\b':
453 builder.appendLiteral("\\b");
454 continue;
455 case '\f':
456 builder.appendLiteral("\\f");
457 continue;
458 case '\n':
459 builder.appendLiteral("\\n");
460 continue;
461 case '\r':
462 builder.appendLiteral("\\r");
463 continue;
464 case '\t':
465 builder.appendLiteral("\\t");
466 continue;
467 case '\\':
468 builder.appendLiteral("\\\\");
469 continue;
470 case '"':
471 builder.appendLiteral("\\\"");
472 continue;
473 }
474 // We escape < and > to prevent script execution.
475 if (codeUnit >= 32 && codeUnit < 127 && codeUnit != '<' && codeUnit != '>') {
476 builder.append(codeUnit);
477 continue;
478 }
479 // We could encode characters >= 127 as UTF-8 instead of \u escape sequences.
480 // We could handle surrogates here if callers wanted that; for now we just
481 // write them out as a \u sequence, so a surrogate pair appears as two of them.
482 builder.appendLiteral("\\u");
483 builder.append(upperNibbleToASCIIHexDigit(codeUnit >> 8));
484 builder.append(lowerNibbleToASCIIHexDigit(codeUnit >> 8));
485 builder.append(upperNibbleToASCIIHexDigit(codeUnit));
486 builder.append(lowerNibbleToASCIIHexDigit(codeUnit));
487 }
488 builder.append('"');
489}
490
491} // anonymous namespace
492
493Ref<Value> Value::null()
494{
495 return adoptRef(*new Value);
496}
497
498Ref<Value> Value::create(bool value)
499{
500 return adoptRef(*new Value(value));
501}
502
503Ref<Value> Value::create(int value)
504{
505 return adoptRef(*new Value(value));
506}
507
508Ref<Value> Value::create(double value)
509{
510 return adoptRef(*new Value(value));
511}
512
513Ref<Value> Value::create(const String& value)
514{
515 return adoptRef(*new Value(value));
516}
517
518Ref<Value> Value::create(const char* value)
519{
520 return adoptRef(*new Value(value));
521}
522
523bool Value::asValue(RefPtr<Value>& value)
524{
525 value = this;
526 return true;
527}
528
529bool Value::asObject(RefPtr<Object>&)
530{
531 return false;
532}
533
534bool Value::asArray(RefPtr<Array>&)
535{
536 return false;
537}
538
539bool Value::parseJSON(const String& jsonInput, RefPtr<Value>& output)
540{
541 // FIXME: This whole file should just use StringView instead of UChar/length and avoid upconverting.
542 auto characters = StringView(jsonInput).upconvertedCharacters();
543 const UChar* start = characters;
544 const UChar* end = start + jsonInput.length();
545 const UChar* tokenEnd;
546 auto result = buildValue(start, end, &tokenEnd, 0);
547 if (!result)
548 return false;
549
550 for (const UChar* valueEnd = tokenEnd; valueEnd < end; ++valueEnd) {
551 if (!isSpaceOrNewline(*valueEnd))
552 return false;
553 }
554
555 output = WTFMove(result);
556 return true;
557}
558
559String Value::toJSONString() const
560{
561 StringBuilder result;
562 result.reserveCapacity(512);
563 writeJSON(result);
564 return result.toString();
565}
566
567bool Value::asBoolean(bool& output) const
568{
569 if (type() != Type::Boolean)
570 return false;
571
572 output = m_value.boolean;
573 return true;
574}
575
576bool Value::asDouble(double& output) const
577{
578 if (type() != Type::Double)
579 return false;
580
581 output = m_value.number;
582 return true;
583}
584
585bool Value::asDouble(float& output) const
586{
587 if (type() != Type::Double)
588 return false;
589
590 output = static_cast<float>(m_value.number);
591 return true;
592}
593
594bool Value::asInteger(int& output) const
595{
596 if (type() != Type::Integer && type() != Type::Double)
597 return false;
598
599 output = static_cast<int>(m_value.number);
600 return true;
601}
602
603bool Value::asInteger(unsigned& output) const
604{
605 if (type() != Type::Integer && type() != Type::Double)
606 return false;
607
608 output = static_cast<unsigned>(m_value.number);
609 return true;
610}
611
612bool Value::asInteger(long& output) const
613{
614 if (type() != Type::Integer && type() != Type::Double)
615 return false;
616
617 output = static_cast<long>(m_value.number);
618 return true;
619}
620
621bool Value::asInteger(long long& output) const
622{
623 if (type() != Type::Integer && type() != Type::Double)
624 return false;
625
626 output = static_cast<long long>(m_value.number);
627 return true;
628}
629
630bool Value::asInteger(unsigned long& output) const
631{
632 if (type() != Type::Integer && type() != Type::Double)
633 return false;
634
635 output = static_cast<unsigned long>(m_value.number);
636 return true;
637}
638
639bool Value::asInteger(unsigned long long& output) const
640{
641 if (type() != Type::Integer && type() != Type::Double)
642 return false;
643
644 output = static_cast<unsigned long long>(m_value.number);
645 return true;
646}
647
648bool Value::asString(String& output) const
649{
650 if (type() != Type::String)
651 return false;
652
653 output = m_value.string;
654 return true;
655}
656
657void Value::writeJSON(StringBuilder& output) const
658{
659 switch (m_type) {
660 case Type::Null:
661 output.appendLiteral("null");
662 break;
663 case Type::Boolean:
664 if (m_value.boolean)
665 output.appendLiteral("true");
666 else
667 output.appendLiteral("false");
668 break;
669 case Type::String:
670 appendDoubleQuotedString(output, m_value.string);
671 break;
672 case Type::Double:
673 case Type::Integer: {
674 if (!std::isfinite(m_value.number))
675 output.appendLiteral("null");
676 else
677 output.appendECMAScriptNumber(m_value.number);
678 break;
679 }
680 default:
681 ASSERT_NOT_REACHED();
682 }
683}
684
685size_t Value::memoryCost() const
686{
687 size_t memoryCost = sizeof(this);
688 if (m_type == Type::String && m_value.string)
689 memoryCost += m_value.string->sizeInBytes();
690 return memoryCost;
691}
692
693ObjectBase::~ObjectBase()
694{
695}
696
697bool ObjectBase::asObject(RefPtr<Object>& output)
698{
699 COMPILE_ASSERT(sizeof(Object) == sizeof(ObjectBase), cannot_cast);
700
701 output = static_cast<Object*>(this);
702 return true;
703}
704
705Object* ObjectBase::openAccessors()
706{
707 COMPILE_ASSERT(sizeof(Object) == sizeof(ObjectBase), cannot_cast);
708
709 return static_cast<Object*>(this);
710}
711
712size_t ObjectBase::memoryCost() const
713{
714 size_t memoryCost = Value::memoryCost();
715 for (const auto& entry : m_map) {
716 memoryCost += entry.key.sizeInBytes();
717 if (entry.value)
718 memoryCost += entry.value->memoryCost();
719 }
720 return memoryCost;
721}
722
723bool ObjectBase::getBoolean(const String& name, bool& output) const
724{
725 RefPtr<Value> value;
726 if (!getValue(name, value))
727 return false;
728
729 return value->asBoolean(output);
730}
731
732bool ObjectBase::getString(const String& name, String& output) const
733{
734 RefPtr<Value> value;
735 if (!getValue(name, value))
736 return false;
737
738 return value->asString(output);
739}
740
741bool ObjectBase::getObject(const String& name, RefPtr<Object>& output) const
742{
743 RefPtr<Value> value;
744 if (!getValue(name, value))
745 return false;
746
747 return value->asObject(output);
748}
749
750bool ObjectBase::getArray(const String& name, RefPtr<Array>& output) const
751{
752 RefPtr<Value> value;
753 if (!getValue(name, value))
754 return false;
755
756 return value->asArray(output);
757}
758
759bool ObjectBase::getValue(const String& name, RefPtr<Value>& output) const
760{
761 Dictionary::const_iterator findResult = m_map.find(name);
762 if (findResult == m_map.end())
763 return false;
764
765 output = findResult->value;
766 return true;
767}
768
769void ObjectBase::remove(const String& name)
770{
771 m_map.remove(name);
772 m_order.removeFirst(name);
773}
774
775void ObjectBase::writeJSON(StringBuilder& output) const
776{
777 output.append('{');
778 for (size_t i = 0; i < m_order.size(); ++i) {
779 auto findResult = m_map.find(m_order[i]);
780 ASSERT(findResult != m_map.end());
781 if (i)
782 output.append(',');
783 appendDoubleQuotedString(output, findResult->key);
784 output.append(':');
785 findResult->value->writeJSON(output);
786 }
787 output.append('}');
788}
789
790ObjectBase::ObjectBase()
791 : Value(Type::Object)
792 , m_map()
793 , m_order()
794{
795}
796
797ArrayBase::~ArrayBase()
798{
799}
800
801bool ArrayBase::asArray(RefPtr<Array>& output)
802{
803 COMPILE_ASSERT(sizeof(ArrayBase) == sizeof(Array), cannot_cast);
804 output = static_cast<Array*>(this);
805 return true;
806}
807
808void ArrayBase::writeJSON(StringBuilder& output) const
809{
810 output.append('[');
811 for (Vector<RefPtr<Value>>::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
812 if (it != m_map.begin())
813 output.append(',');
814 (*it)->writeJSON(output);
815 }
816 output.append(']');
817}
818
819ArrayBase::ArrayBase()
820 : Value(Type::Array)
821 , m_map()
822{
823}
824
825RefPtr<Value> ArrayBase::get(size_t index) const
826{
827 RELEASE_ASSERT_WITH_SECURITY_IMPLICATION(index < m_map.size());
828 return m_map[index];
829}
830
831Ref<Object> Object::create()
832{
833 return adoptRef(*new Object);
834}
835
836Ref<Array> Array::create()
837{
838 return adoptRef(*new Array);
839}
840
841size_t ArrayBase::memoryCost() const
842{
843 size_t memoryCost = Value::memoryCost();
844 for (const auto& item : m_map) {
845 if (item)
846 memoryCost += item->memoryCost();
847 }
848 return memoryCost;
849}
850
851} // namespace JSONImpl
852} // namespace WTF
853