1/*
2 * Copyright (C) 2016 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include "config.h"
26#include "TextDecoder.h"
27
28#include "HTMLParserIdioms.h"
29#include <wtf/Optional.h>
30
31namespace WebCore {
32
33ExceptionOr<Ref<TextDecoder>> TextDecoder::create(const String& label, Options options)
34{
35 String strippedLabel = stripLeadingAndTrailingHTMLSpaces(label);
36 const UChar nullCharacter = '\0';
37 if (strippedLabel.contains(nullCharacter))
38 return Exception { RangeError };
39 auto decoder = adoptRef(*new TextDecoder(strippedLabel.utf8().data(), options));
40 if (!decoder->m_textEncoding.isValid() || !strcmp(decoder->m_textEncoding.name(), "replacement"))
41 return Exception { RangeError };
42 return decoder;
43}
44
45TextDecoder::TextDecoder(const char* label, Options options)
46 : m_textEncoding(label)
47 , m_options(options)
48{
49}
50
51void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length)
52{
53 const uint8_t utf8BOMBytes[3] = {0xEF, 0xBB, 0xBF};
54 const uint8_t utf16BEBOMBytes[2] = {0xFE, 0xFF};
55 const uint8_t utf16LEBOMBytes[2] = {0xFF, 0xFE};
56
57 if (m_textEncoding == UTF8Encoding()
58 && length >= sizeof(utf8BOMBytes)
59 && data[0] == utf8BOMBytes[0]
60 && data[1] == utf8BOMBytes[1]
61 && data[2] == utf8BOMBytes[2]) {
62 data += sizeof(utf8BOMBytes);
63 length -= sizeof(utf8BOMBytes);
64 } else if (m_textEncoding == UTF16BigEndianEncoding()
65 && length >= sizeof(utf16BEBOMBytes)
66 && data[0] == utf16BEBOMBytes[0]
67 && data[1] == utf16BEBOMBytes[1]) {
68 data += sizeof(utf16BEBOMBytes);
69 length -= sizeof(utf16BEBOMBytes);
70 } else if (m_textEncoding == UTF16LittleEndianEncoding()
71 && length >= sizeof(utf16LEBOMBytes)
72 && data[0] == utf16LEBOMBytes[0]
73 && data[1] == utf16LEBOMBytes[1]) {
74 data += sizeof(utf16LEBOMBytes);
75 length -= sizeof(utf16LEBOMBytes);
76 }
77}
78
79String TextDecoder::prependBOMIfNecessary(const String& decoded)
80{
81 if (m_hasDecoded || !m_options.ignoreBOM)
82 return decoded;
83 const UChar utf16BEBOM[2] = {0xFEFF, '\0'};
84
85 // FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy.
86 return makeString(utf16BEBOM, decoded);
87}
88
89static size_t codeUnitByteSize(const TextEncoding& encoding)
90{
91 return encoding.isByteBasedEncoding() ? 1 : 2;
92}
93
94ExceptionOr<String> TextDecoder::decode(Optional<BufferSource::VariantType> input, DecodeOptions options)
95{
96 Optional<BufferSource> inputBuffer;
97 const uint8_t* data = nullptr;
98 size_t length = 0;
99 if (input) {
100 inputBuffer = BufferSource(WTFMove(input.value()));
101 data = inputBuffer->data();
102 length = inputBuffer->length();
103 }
104
105 ignoreBOMIfNecessary(data, length);
106
107 if (m_buffer.size()) {
108 m_buffer.append(data, length);
109 data = m_buffer.data();
110 length = m_buffer.size();
111 }
112
113 const bool stopOnError = true;
114 bool sawError = false;
115 if (length % codeUnitByteSize(m_textEncoding))
116 sawError = true;
117 const char* charData = reinterpret_cast<const char*>(data);
118 String result;
119 if (!sawError)
120 result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError));
121
122 if (sawError) {
123 if (options.stream) {
124 result = String();
125 if (!m_buffer.size())
126 m_buffer.append(data, length);
127 } else {
128 if (m_options.fatal)
129 return Exception { TypeError };
130 result = prependBOMIfNecessary(m_textEncoding.decode(charData, length));
131 }
132 } else
133 m_buffer.clear();
134
135 m_hasDecoded = true;
136 return result;
137}
138
139String TextDecoder::encoding() const
140{
141 return String(m_textEncoding.name()).convertToASCIILowercase();
142}
143
144}
145