1 | /* |
2 | * Copyright (C) 2016 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
20 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
21 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
22 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
23 | */ |
24 | |
25 | #include "config.h" |
26 | #include "TextDecoder.h" |
27 | |
28 | #include "HTMLParserIdioms.h" |
29 | #include <wtf/Optional.h> |
30 | |
31 | namespace WebCore { |
32 | |
33 | ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options) |
34 | { |
35 | String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label); |
36 | const UChar nullCharacter = '\0'; |
37 | if (strippedLabel.contains(nullCharacter)) |
38 | return Exception { RangeError }; |
39 | auto decoder = adoptRef(*new TextDecoder(strippedLabel.utf8().data(), options)); |
40 | if (!decoder->m_textEncoding.isValid() || !strcmp(decoder->m_textEncoding.name(), "replacement" )) |
41 | return Exception { RangeError }; |
42 | return decoder; |
43 | } |
44 | |
45 | TextDecoder::TextDecoder(const char* label, Options options) |
46 | : m_textEncoding(label) |
47 | , m_options(options) |
48 | { |
49 | } |
50 | |
51 | void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length) |
52 | { |
53 | const uint8_t utf8BOMBytes[3] = {0xEF, 0xBB, 0xBF}; |
54 | const uint8_t utf16BEBOMBytes[2] = {0xFE, 0xFF}; |
55 | const uint8_t utf16LEBOMBytes[2] = {0xFF, 0xFE}; |
56 | |
57 | if (m_textEncoding == UTF8Encoding() |
58 | && length >= sizeof(utf8BOMBytes) |
59 | && data[0] == utf8BOMBytes[0] |
60 | && data[1] == utf8BOMBytes[1] |
61 | && data[2] == utf8BOMBytes[2]) { |
62 | data += sizeof(utf8BOMBytes); |
63 | length -= sizeof(utf8BOMBytes); |
64 | } else if (m_textEncoding == UTF16BigEndianEncoding() |
65 | && length >= sizeof(utf16BEBOMBytes) |
66 | && data[0] == utf16BEBOMBytes[0] |
67 | && data[1] == utf16BEBOMBytes[1]) { |
68 | data += sizeof(utf16BEBOMBytes); |
69 | length -= sizeof(utf16BEBOMBytes); |
70 | } else if (m_textEncoding == UTF16LittleEndianEncoding() |
71 | && length >= sizeof(utf16LEBOMBytes) |
72 | && data[0] == utf16LEBOMBytes[0] |
73 | && data[1] == utf16LEBOMBytes[1]) { |
74 | data += sizeof(utf16LEBOMBytes); |
75 | length -= sizeof(utf16LEBOMBytes); |
76 | } |
77 | } |
78 | |
79 | String TextDecoder::prependBOMIfNecessary(const String& decoded) |
80 | { |
81 | if (m_hasDecoded || !m_options.ignoreBOM) |
82 | return decoded; |
83 | const UChar utf16BEBOM[2] = {0xFEFF, '\0'}; |
84 | |
85 | // FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy. |
86 | return makeString(utf16BEBOM, decoded); |
87 | } |
88 | |
89 | static size_t codeUnitByteSize(const TextEncoding& encoding) |
90 | { |
91 | return encoding.isByteBasedEncoding() ? 1 : 2; |
92 | } |
93 | |
94 | ExceptionOr<String> TextDecoder::decode(Optional<BufferSource::VariantType> input, DecodeOptions options) |
95 | { |
96 | Optional<BufferSource> inputBuffer; |
97 | const uint8_t* data = nullptr; |
98 | size_t length = 0; |
99 | if (input) { |
100 | inputBuffer = BufferSource(WTFMove(input.value())); |
101 | data = inputBuffer->data(); |
102 | length = inputBuffer->length(); |
103 | } |
104 | |
105 | ignoreBOMIfNecessary(data, length); |
106 | |
107 | if (m_buffer.size()) { |
108 | m_buffer.append(data, length); |
109 | data = m_buffer.data(); |
110 | length = m_buffer.size(); |
111 | } |
112 | |
113 | const bool stopOnError = true; |
114 | bool sawError = false; |
115 | if (length % codeUnitByteSize(m_textEncoding)) |
116 | sawError = true; |
117 | const char* charData = reinterpret_cast<const char*>(data); |
118 | String result; |
119 | if (!sawError) |
120 | result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError)); |
121 | |
122 | if (sawError) { |
123 | if (options.stream) { |
124 | result = String(); |
125 | if (!m_buffer.size()) |
126 | m_buffer.append(data, length); |
127 | } else { |
128 | if (m_options.fatal) |
129 | return Exception { TypeError }; |
130 | result = prependBOMIfNecessary(m_textEncoding.decode(charData, length)); |
131 | } |
132 | } else |
133 | m_buffer.clear(); |
134 | |
135 | m_hasDecoded = true; |
136 | return result; |
137 | } |
138 | |
139 | String TextDecoder::encoding() const |
140 | { |
141 | return String(m_textEncoding.name()).convertToASCIILowercase(); |
142 | } |
143 | |
144 | } |
145 | |