1 | /* |
2 | * Copyright (C) 2009-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "UserContentURLPattern.h" |
28 | |
29 | #include <wtf/NeverDestroyed.h> |
30 | #include <wtf/StdLibExtras.h> |
31 | #include <wtf/URL.h> |
32 | |
33 | namespace WebCore { |
34 | |
35 | bool UserContentURLPattern::matchesPatterns(const URL& url, const Vector<String>& whitelist, const Vector<String>& blacklist) |
36 | { |
37 | // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist. |
38 | // If there is no whitelist at all, then all URLs are assumed to be in the whitelist. |
39 | bool matchesWhitelist = whitelist.isEmpty(); |
40 | if (!matchesWhitelist) { |
41 | for (auto& entry : whitelist) { |
42 | UserContentURLPattern contentPattern(entry); |
43 | if (contentPattern.matches(url)) { |
44 | matchesWhitelist = true; |
45 | break; |
46 | } |
47 | } |
48 | } |
49 | |
50 | bool matchesBlacklist = false; |
51 | if (!blacklist.isEmpty()) { |
52 | for (auto& entry : blacklist) { |
53 | UserContentURLPattern contentPattern(entry); |
54 | if (contentPattern.matches(url)) { |
55 | matchesBlacklist = true; |
56 | break; |
57 | } |
58 | } |
59 | } |
60 | |
61 | return matchesWhitelist && !matchesBlacklist; |
62 | } |
63 | |
64 | bool UserContentURLPattern::parse(const String& pattern) |
65 | { |
66 | static NeverDestroyed<const String> schemeSeparator(MAKE_STATIC_STRING_IMPL("://" )); |
67 | |
68 | size_t schemeEndPos = pattern.find(schemeSeparator); |
69 | if (schemeEndPos == notFound) |
70 | return false; |
71 | |
72 | m_scheme = pattern.left(schemeEndPos); |
73 | |
74 | unsigned hostStartPos = schemeEndPos + schemeSeparator.get().length(); |
75 | if (hostStartPos >= pattern.length()) |
76 | return false; |
77 | |
78 | int pathStartPos = 0; |
79 | |
80 | if (equalLettersIgnoringASCIICase(m_scheme, "file" )) |
81 | pathStartPos = hostStartPos; |
82 | else { |
83 | size_t hostEndPos = pattern.find('/', hostStartPos); |
84 | if (hostEndPos == notFound) |
85 | return false; |
86 | |
87 | m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos); |
88 | m_matchSubdomains = false; |
89 | |
90 | if (m_host == "*" ) { |
91 | // The pattern can be just '*', which means match all domains. |
92 | m_host = emptyString(); |
93 | m_matchSubdomains = true; |
94 | } else if (m_host.startsWith("*." )) { |
95 | // The first component can be '*', which means to match all subdomains. |
96 | m_host = m_host.substring(2); // Length of "*." |
97 | m_matchSubdomains = true; |
98 | } |
99 | |
100 | // No other '*' can occur in the host. |
101 | if (m_host.find('*') != notFound) |
102 | return false; |
103 | |
104 | pathStartPos = hostEndPos; |
105 | } |
106 | |
107 | m_path = pattern.right(pattern.length() - pathStartPos); |
108 | |
109 | return true; |
110 | } |
111 | |
112 | bool UserContentURLPattern::matches(const URL& test) const |
113 | { |
114 | if (m_invalid) |
115 | return false; |
116 | |
117 | if (!equalIgnoringASCIICase(test.protocol(), m_scheme)) |
118 | return false; |
119 | |
120 | if (!equalLettersIgnoringASCIICase(m_scheme, "file" ) && !matchesHost(test)) |
121 | return false; |
122 | |
123 | return matchesPath(test); |
124 | } |
125 | |
126 | bool UserContentURLPattern::matchesHost(const URL& test) const |
127 | { |
128 | auto host = test.host(); |
129 | if (equalIgnoringASCIICase(host, m_host)) |
130 | return true; |
131 | |
132 | if (!m_matchSubdomains) |
133 | return false; |
134 | |
135 | // If we're matching subdomains, and we have no host, that means the pattern |
136 | // was <scheme>://*/<whatever>, so we match anything. |
137 | if (!m_host.length()) |
138 | return true; |
139 | |
140 | // Check if the domain is a subdomain of our host. |
141 | if (!host.endsWithIgnoringASCIICase(m_host)) |
142 | return false; |
143 | |
144 | ASSERT(host.length() > m_host.length()); |
145 | |
146 | // Check that the character before the suffix is a period. |
147 | return host[host.length() - m_host.length() - 1] == '.'; |
148 | } |
149 | |
150 | struct MatchTester |
151 | { |
152 | const String m_pattern; |
153 | unsigned m_patternIndex; |
154 | |
155 | const String m_test; |
156 | unsigned m_testIndex; |
157 | |
158 | MatchTester(const String& pattern, const String& test) |
159 | : m_pattern(pattern) |
160 | , m_patternIndex(0) |
161 | , m_test(test) |
162 | , m_testIndex(0) |
163 | { |
164 | } |
165 | |
166 | bool testStringFinished() const { return m_testIndex >= m_test.length(); } |
167 | bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); } |
168 | |
169 | void eatWildcard() |
170 | { |
171 | while (!patternStringFinished()) { |
172 | if (m_pattern[m_patternIndex] != '*') |
173 | return; |
174 | m_patternIndex++; |
175 | } |
176 | } |
177 | |
178 | void eatSameChars() |
179 | { |
180 | while (!patternStringFinished() && !testStringFinished()) { |
181 | if (m_pattern[m_patternIndex] == '*') |
182 | return; |
183 | if (m_pattern[m_patternIndex] != m_test[m_testIndex]) |
184 | return; |
185 | m_patternIndex++; |
186 | m_testIndex++; |
187 | } |
188 | } |
189 | |
190 | bool test() |
191 | { |
192 | // Eat all the matching chars. |
193 | eatSameChars(); |
194 | |
195 | // If the string is finished, then the pattern must be empty too, or contains |
196 | // only wildcards. |
197 | if (testStringFinished()) { |
198 | eatWildcard(); |
199 | if (patternStringFinished()) |
200 | return true; |
201 | return false; |
202 | } |
203 | |
204 | // Pattern is empty but not string, this is not a match. |
205 | if (patternStringFinished()) |
206 | return false; |
207 | |
208 | // If we don't encounter a *, then we're hosed. |
209 | if (m_pattern[m_patternIndex] != '*') |
210 | return false; |
211 | |
212 | while (!testStringFinished()) { |
213 | MatchTester nextMatch(*this); |
214 | nextMatch.m_patternIndex++; |
215 | if (nextMatch.test()) |
216 | return true; |
217 | m_testIndex++; |
218 | } |
219 | |
220 | // We reached the end of the string. Let's see if the pattern contains only |
221 | // wildcards. |
222 | eatWildcard(); |
223 | return patternStringFinished(); |
224 | } |
225 | }; |
226 | |
227 | bool UserContentURLPattern::matchesPath(const URL& test) const |
228 | { |
229 | MatchTester match(m_path, test.path()); |
230 | return match.test(); |
231 | } |
232 | |
233 | } // namespace WebCore |
234 | |