1/*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008, 2016 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 */
22
23#pragma once
24
25#include "RegExp.h"
26#include "JSCInlines.h"
27#include "Yarr.h"
28#include "YarrInterpreter.h"
29#include "YarrJIT.h"
30
31#define REGEXP_FUNC_TEST_DATA_GEN 0
32
33#if REGEXP_FUNC_TEST_DATA_GEN
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#endif
38
39namespace JSC {
40
41#if REGEXP_FUNC_TEST_DATA_GEN
42class RegExpFunctionalTestCollector {
43 // This class is not thread safe.
44protected:
45 static const char* const s_fileName;
46
47public:
48 static RegExpFunctionalTestCollector* get();
49
50 ~RegExpFunctionalTestCollector();
51
52 void outputOneTest(RegExp*, String, int, int*, int);
53 void clearRegExp(RegExp* regExp)
54 {
55 if (regExp == m_lastRegExp)
56 m_lastRegExp = 0;
57 }
58
59private:
60 RegExpFunctionalTestCollector();
61
62 void outputEscapedString(const String&, bool escapeSlash = false);
63
64 static RegExpFunctionalTestCollector* s_instance;
65 FILE* m_file;
66 RegExp* m_lastRegExp;
67};
68#endif // REGEXP_FUNC_TEST_DATA_GEN
69
70ALWAYS_INLINE bool RegExp::hasCodeFor(Yarr::YarrCharSize charSize)
71{
72 if (hasCode()) {
73#if ENABLE(YARR_JIT)
74 if (m_state != JITCode)
75 return true;
76 ASSERT(m_regExpJITCode);
77 if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCode()))
78 return true;
79 if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCode()))
80 return true;
81#else
82 UNUSED_PARAM(charSize);
83 return true;
84#endif
85 }
86 return false;
87}
88
89class PatternContextBufferHolder {
90 WTF_FORBID_HEAP_ALLOCATION;
91public:
92 PatternContextBufferHolder(VM& vm, bool needBuffer)
93 : m_vm(vm)
94 {
95#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
96 if (needBuffer)
97 m_buffer = m_vm.acquireRegExpPatternContexBuffer();
98#else
99 UNUSED_PARAM(needBuffer);
100#endif
101 }
102
103 ~PatternContextBufferHolder()
104 {
105#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
106 if (buffer())
107 m_vm.releaseRegExpPatternContexBuffer();
108#else
109 UNUSED_PARAM(m_vm);
110#endif
111 }
112
113 void* buffer() { return m_buffer; }
114 unsigned size() { return buffer() ? VM::patternContextBufferSize : 0; }
115
116private:
117 VM& m_vm;
118 void* m_buffer { nullptr };
119};
120
121ALWAYS_INLINE void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize)
122{
123 if (hasCodeFor(charSize))
124 return;
125
126 if (m_state == ParseError)
127 return;
128
129 compile(&vm, charSize);
130}
131
132template<typename VectorType>
133ALWAYS_INLINE int RegExp::matchInline(VM& vm, const String& s, unsigned startOffset, VectorType& ovector)
134{
135#if ENABLE(REGEXP_TRACING)
136 m_rtMatchCallCount++;
137 m_rtMatchTotalSubjectStringLen += (double)(s.length() - startOffset);
138#endif
139
140 compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
141
142 auto throwError = [&] {
143 auto throwScope = DECLARE_THROW_SCOPE(vm);
144 ExecState* exec = vm.topCallFrame;
145 throwScope.throwException(exec, errorToThrow(exec));
146 if (!hasHardError(m_constructionErrorCode))
147 reset();
148 return -1;
149 };
150
151 if (m_state == ParseError)
152 return throwError();
153
154 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
155 ovector.resize(offsetVectorSize);
156 int* offsetVector = ovector.data();
157
158 int result;
159#if ENABLE(YARR_JIT)
160 if (m_state == JITCode) {
161 {
162 ASSERT(m_regExpJITCode);
163 PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
164
165 if (s.is8Bit())
166 result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length(), offsetVector, patternContextBufferHolder.buffer(), patternContextBufferHolder.size()).start;
167 else
168 result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length(), offsetVector, patternContextBufferHolder.buffer(), patternContextBufferHolder.size()).start;
169 }
170
171 if (result == Yarr::JSRegExpJITCodeFailure) {
172 // JIT'ed code couldn't handle expression, so punt back to the interpreter.
173 byteCodeCompileIfNecessary(&vm);
174 if (m_state == ParseError)
175 return throwError();
176 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
177 }
178
179#if ENABLE(YARR_JIT_DEBUG)
180 if (m_state == JITCode) {
181 byteCodeCompileIfNecessary(&vm);
182 if (m_state == ParseError)
183 return throwError();
184 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
185 }
186#endif
187 } else
188#endif
189 result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
190
191 // FIXME: The YARR engine should handle unsigned or size_t length matches.
192 // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
193 // The offset vector handling needs to change as well.
194 // Right now we convert a match where the offsets overflowed into match failure.
195 // There are two places in WebCore that call the interpreter directly that need to
196 // have their offsets changed to int as well. They are yarr/RegularExpression.cpp
197 // and inspector/ContentSearchUtilities.cpp
198 if (s.length() > INT_MAX) {
199 bool overflowed = false;
200
201 if (result < -1)
202 overflowed = true;
203
204 for (unsigned i = 0; i <= m_numSubpatterns; i++) {
205 if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) {
206 overflowed = true;
207 offsetVector[i*2] = -1;
208 offsetVector[i*2+1] = -1;
209 }
210 }
211
212 if (overflowed)
213 result = -1;
214 }
215
216 ASSERT(result >= -1);
217
218#if REGEXP_FUNC_TEST_DATA_GEN
219 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
220#endif
221
222#if ENABLE(REGEXP_TRACING)
223 if (result != -1)
224 m_rtMatchFoundCount++;
225#endif
226
227 return result;
228}
229
230ALWAYS_INLINE bool RegExp::hasMatchOnlyCodeFor(Yarr::YarrCharSize charSize)
231{
232 if (hasCode()) {
233#if ENABLE(YARR_JIT)
234 if (m_state != JITCode)
235 return true;
236 ASSERT(m_regExpJITCode);
237 if ((charSize == Yarr::Char8) && (m_regExpJITCode->has8BitCodeMatchOnly()))
238 return true;
239 if ((charSize == Yarr::Char16) && (m_regExpJITCode->has16BitCodeMatchOnly()))
240 return true;
241#else
242 UNUSED_PARAM(charSize);
243 return true;
244#endif
245 }
246
247 return false;
248}
249
250ALWAYS_INLINE void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize)
251{
252 if (hasMatchOnlyCodeFor(charSize))
253 return;
254
255 if (m_state == ParseError)
256 return;
257
258 compileMatchOnly(&vm, charSize);
259}
260
261ALWAYS_INLINE MatchResult RegExp::matchInline(VM& vm, const String& s, unsigned startOffset)
262{
263#if ENABLE(REGEXP_TRACING)
264 m_rtMatchOnlyCallCount++;
265 m_rtMatchOnlyTotalSubjectStringLen += (double)(s.length() - startOffset);
266#endif
267
268 compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
269
270 auto throwError = [&] {
271 auto throwScope = DECLARE_THROW_SCOPE(vm);
272 ExecState* exec = vm.topCallFrame;
273 throwScope.throwException(exec, errorToThrow(exec));
274 if (!hasHardError(m_constructionErrorCode))
275 reset();
276 return MatchResult::failed();
277 };
278
279 if (m_state == ParseError)
280 return throwError();
281
282#if ENABLE(YARR_JIT)
283 MatchResult result;
284
285 if (m_state == JITCode) {
286 {
287 ASSERT(m_regExpJITCode);
288 PatternContextBufferHolder patternContextBufferHolder(vm, m_regExpJITCode->usesPatternContextBuffer());
289 if (s.is8Bit())
290 result = m_regExpJITCode->execute(s.characters8(), startOffset, s.length(), patternContextBufferHolder.buffer(), patternContextBufferHolder.size());
291 else
292 result = m_regExpJITCode->execute(s.characters16(), startOffset, s.length(), patternContextBufferHolder.buffer(), patternContextBufferHolder.size());
293 }
294
295#if ENABLE(REGEXP_TRACING)
296 if (!result)
297 m_rtMatchOnlyFoundCount++;
298#endif
299 if (result.start != static_cast<size_t>(Yarr::JSRegExpJITCodeFailure))
300 return result;
301
302 // JIT'ed code couldn't handle expression, so punt back to the interpreter.
303 byteCodeCompileIfNecessary(&vm);
304 if (m_state == ParseError)
305 return throwError();
306 }
307#endif
308
309 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
310 int* offsetVector;
311 Vector<int, 32> nonReturnedOvector;
312 nonReturnedOvector.grow(offsetVectorSize);
313 offsetVector = nonReturnedOvector.data();
314 int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
315#if REGEXP_FUNC_TEST_DATA_GEN
316 RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
317#endif
318
319 if (r >= 0) {
320#if ENABLE(REGEXP_TRACING)
321 m_rtMatchOnlyFoundCount++;
322#endif
323 return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]);
324 }
325
326 return MatchResult::failed();
327}
328
329} // namespace JSC
330