1/*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006-2017 Apple Inc. All rights reserved.
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public License
17 along with this library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA.
20
21*/
22
23#pragma once
24
25#include "TextEncoding.h"
26#include <wtf/RefCounted.h>
27
28namespace WebCore {
29
30class HTMLMetaCharsetParser;
31class TextCodec;
32
33class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
34public:
35 enum EncodingSource {
36 DefaultEncoding,
37 AutoDetectedEncoding,
38 EncodingFromXMLHeader,
39 EncodingFromMetaTag,
40 EncodingFromCSSCharset,
41 EncodingFromHTTPHeader,
42 UserChosenEncoding,
43 EncodingFromParentFrame
44 };
45
46 WEBCORE_EXPORT static Ref<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = { }, bool usesEncodingDetector = false);
47 WEBCORE_EXPORT ~TextResourceDecoder();
48
49 void setEncoding(const TextEncoding&, EncodingSource);
50 const TextEncoding& encoding() const { return m_encoding; }
51 const TextEncoding* encodingForURLParsing();
52
53 bool hasEqualEncodingForCharset(const String& charset) const;
54
55 WEBCORE_EXPORT String decode(const char* data, size_t length);
56 WEBCORE_EXPORT String flush();
57
58 WEBCORE_EXPORT String decodeAndFlush(const char* data, size_t length);
59
60 void setHintEncoding(const TextResourceDecoder* parentFrameDecoder);
61
62 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
63 bool sawError() const { return m_sawError; }
64
65private:
66 TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector);
67
68 enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
69 static ContentType determineContentType(const String& mimeType);
70 static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding);
71
72 size_t checkForBOM(const char*, size_t);
73 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
74 bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
75 bool checkForMetaCharset(const char*, size_t);
76 void detectJapaneseEncoding(const char*, size_t);
77 bool shouldAutoDetect() const;
78
79 ContentType m_contentType;
80 TextEncoding m_encoding;
81 std::unique_ptr<TextCodec> m_codec;
82 std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser;
83 EncodingSource m_source { DefaultEncoding };
84 const char* m_parentFrameAutoDetectedEncoding { nullptr };
85 Vector<char> m_buffer;
86 bool m_checkedForBOM { false };
87 bool m_checkedForCSSCharset { false };
88 bool m_checkedForHeadCharset { false };
89 bool m_useLenientXMLDecoding { false }; // Don't stop on XML decoding errors.
90 bool m_sawError { false };
91 bool m_usesEncodingDetector { false };
92};
93
94inline void TextResourceDecoder::setHintEncoding(const TextResourceDecoder* parentFrameDecoder)
95{
96 if (parentFrameDecoder && parentFrameDecoder->m_source == AutoDetectedEncoding)
97 m_parentFrameAutoDetectedEncoding = parentFrameDecoder->encoding().name();
98}
99
100} // namespace WebCore
101