1/*
2 * Copyright (C) 2009-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "UserContentURLPattern.h"
28
29#include <wtf/NeverDestroyed.h>
30#include <wtf/StdLibExtras.h>
31#include <wtf/URL.h>
32
33namespace WebCore {
34
35bool UserContentURLPattern::matchesPatterns(const URL& url, const Vector<String>& whitelist, const Vector<String>& blacklist)
36{
37 // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
38 // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
39 bool matchesWhitelist = whitelist.isEmpty();
40 if (!matchesWhitelist) {
41 for (auto& entry : whitelist) {
42 UserContentURLPattern contentPattern(entry);
43 if (contentPattern.matches(url)) {
44 matchesWhitelist = true;
45 break;
46 }
47 }
48 }
49
50 bool matchesBlacklist = false;
51 if (!blacklist.isEmpty()) {
52 for (auto& entry : blacklist) {
53 UserContentURLPattern contentPattern(entry);
54 if (contentPattern.matches(url)) {
55 matchesBlacklist = true;
56 break;
57 }
58 }
59 }
60
61 return matchesWhitelist && !matchesBlacklist;
62}
63
64bool UserContentURLPattern::parse(const String& pattern)
65{
66 static NeverDestroyed<const String> schemeSeparator(MAKE_STATIC_STRING_IMPL("://"));
67
68 size_t schemeEndPos = pattern.find(schemeSeparator);
69 if (schemeEndPos == notFound)
70 return false;
71
72 m_scheme = pattern.left(schemeEndPos);
73
74 unsigned hostStartPos = schemeEndPos + schemeSeparator.get().length();
75 if (hostStartPos >= pattern.length())
76 return false;
77
78 int pathStartPos = 0;
79
80 if (equalLettersIgnoringASCIICase(m_scheme, "file"))
81 pathStartPos = hostStartPos;
82 else {
83 size_t hostEndPos = pattern.find('/', hostStartPos);
84 if (hostEndPos == notFound)
85 return false;
86
87 m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
88 m_matchSubdomains = false;
89
90 if (m_host == "*") {
91 // The pattern can be just '*', which means match all domains.
92 m_host = emptyString();
93 m_matchSubdomains = true;
94 } else if (m_host.startsWith("*.")) {
95 // The first component can be '*', which means to match all subdomains.
96 m_host = m_host.substring(2); // Length of "*."
97 m_matchSubdomains = true;
98 }
99
100 // No other '*' can occur in the host.
101 if (m_host.find('*') != notFound)
102 return false;
103
104 pathStartPos = hostEndPos;
105 }
106
107 m_path = pattern.right(pattern.length() - pathStartPos);
108
109 return true;
110}
111
112bool UserContentURLPattern::matches(const URL& test) const
113{
114 if (m_invalid)
115 return false;
116
117 if (!equalIgnoringASCIICase(test.protocol(), m_scheme))
118 return false;
119
120 if (!equalLettersIgnoringASCIICase(m_scheme, "file") && !matchesHost(test))
121 return false;
122
123 return matchesPath(test);
124}
125
126bool UserContentURLPattern::matchesHost(const URL& test) const
127{
128 auto host = test.host();
129 if (equalIgnoringASCIICase(host, m_host))
130 return true;
131
132 if (!m_matchSubdomains)
133 return false;
134
135 // If we're matching subdomains, and we have no host, that means the pattern
136 // was <scheme>://*/<whatever>, so we match anything.
137 if (!m_host.length())
138 return true;
139
140 // Check if the domain is a subdomain of our host.
141 if (!host.endsWithIgnoringASCIICase(m_host))
142 return false;
143
144 ASSERT(host.length() > m_host.length());
145
146 // Check that the character before the suffix is a period.
147 return host[host.length() - m_host.length() - 1] == '.';
148}
149
150struct MatchTester
151{
152 const String m_pattern;
153 unsigned m_patternIndex;
154
155 const String m_test;
156 unsigned m_testIndex;
157
158 MatchTester(const String& pattern, const String& test)
159 : m_pattern(pattern)
160 , m_patternIndex(0)
161 , m_test(test)
162 , m_testIndex(0)
163 {
164 }
165
166 bool testStringFinished() const { return m_testIndex >= m_test.length(); }
167 bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
168
169 void eatWildcard()
170 {
171 while (!patternStringFinished()) {
172 if (m_pattern[m_patternIndex] != '*')
173 return;
174 m_patternIndex++;
175 }
176 }
177
178 void eatSameChars()
179 {
180 while (!patternStringFinished() && !testStringFinished()) {
181 if (m_pattern[m_patternIndex] == '*')
182 return;
183 if (m_pattern[m_patternIndex] != m_test[m_testIndex])
184 return;
185 m_patternIndex++;
186 m_testIndex++;
187 }
188 }
189
190 bool test()
191 {
192 // Eat all the matching chars.
193 eatSameChars();
194
195 // If the string is finished, then the pattern must be empty too, or contains
196 // only wildcards.
197 if (testStringFinished()) {
198 eatWildcard();
199 if (patternStringFinished())
200 return true;
201 return false;
202 }
203
204 // Pattern is empty but not string, this is not a match.
205 if (patternStringFinished())
206 return false;
207
208 // If we don't encounter a *, then we're hosed.
209 if (m_pattern[m_patternIndex] != '*')
210 return false;
211
212 while (!testStringFinished()) {
213 MatchTester nextMatch(*this);
214 nextMatch.m_patternIndex++;
215 if (nextMatch.test())
216 return true;
217 m_testIndex++;
218 }
219
220 // We reached the end of the string. Let's see if the pattern contains only
221 // wildcards.
222 eatWildcard();
223 return patternStringFinished();
224 }
225};
226
227bool UserContentURLPattern::matchesPath(const URL& test) const
228{
229 MatchTester match(m_path, test.path());
230 return match.test();
231}
232
233} // namespace WebCore
234