1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2007, 2008, 2009, 2016 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 */
21
22#pragma once
23
24#include "ConcurrentJSLock.h"
25#include "MatchResult.h"
26#include "RegExpKey.h"
27#include "Structure.h"
28#include "Yarr.h"
29#include <wtf/Forward.h>
30#include <wtf/text/WTFString.h>
31
32#if ENABLE(YARR_JIT)
33#include "YarrJIT.h"
34#endif
35
36namespace JSC {
37
38struct RegExpRepresentation;
39class VM;
40
41class RegExp final : public JSCell {
42 friend class CachedRegExp;
43
44public:
45 typedef JSCell Base;
46 static const unsigned StructureFlags = Base::StructureFlags | StructureIsImmortal;
47
48 JS_EXPORT_PRIVATE static RegExp* create(VM&, const String& pattern, OptionSet<Yarr::Flags>);
49 static const bool needsDestruction = true;
50 static void destroy(JSCell*);
51 static size_t estimatedSize(JSCell*, VM&);
52 JS_EXPORT_PRIVATE static void dumpToStream(const JSCell*, PrintStream&);
53
54 bool global() const { return m_flags.contains(Yarr::Flags::Global); }
55 bool ignoreCase() const { return m_flags.contains(Yarr::Flags::IgnoreCase); }
56 bool multiline() const { return m_flags.contains(Yarr::Flags::Multiline); }
57 bool sticky() const { return m_flags.contains(Yarr::Flags::Sticky); }
58 bool globalOrSticky() const { return global() || sticky(); }
59 bool unicode() const { return m_flags.contains(Yarr::Flags::Unicode); }
60 bool dotAll() const { return m_flags.contains(Yarr::Flags::DotAll); }
61
62 const String& pattern() const { return m_patternString; }
63
64 bool isValid() const { return !Yarr::hasError(m_constructionErrorCode); }
65 const char* errorMessage() const { return Yarr::errorMessage(m_constructionErrorCode); }
66 JSObject* errorToThrow(ExecState* exec) { return Yarr::errorToThrow(exec, m_constructionErrorCode); }
67 void reset()
68 {
69 m_state = NotCompiled;
70 m_constructionErrorCode = Yarr::ErrorCode::NoError;
71 }
72
73 JS_EXPORT_PRIVATE int match(VM&, const String&, unsigned startOffset, Vector<int>& ovector);
74
75 // Returns false if we couldn't run the regular expression for any reason.
76 bool matchConcurrently(VM&, const String&, unsigned startOffset, int& position, Vector<int>& ovector);
77
78 JS_EXPORT_PRIVATE MatchResult match(VM&, const String&, unsigned startOffset);
79
80 bool matchConcurrently(VM&, const String&, unsigned startOffset, MatchResult&);
81
82 // Call these versions of the match functions if you're desperate for performance.
83 template<typename VectorType>
84 int matchInline(VM&, const String&, unsigned startOffset, VectorType& ovector);
85 MatchResult matchInline(VM&, const String&, unsigned startOffset);
86
87 unsigned numSubpatterns() const { return m_numSubpatterns; }
88
89 bool hasNamedCaptures()
90 {
91 return m_rareData && !m_rareData->m_captureGroupNames.isEmpty();
92 }
93
94 String getCaptureGroupName(unsigned i)
95 {
96 if (!i || !m_rareData || m_rareData->m_captureGroupNames.size() <= i)
97 return String();
98 ASSERT(m_rareData);
99 return m_rareData->m_captureGroupNames[i];
100 }
101
102 unsigned subpatternForName(String groupName)
103 {
104 if (!m_rareData)
105 return 0;
106 auto it = m_rareData->m_namedGroupToParenIndex.find(groupName);
107 if (it == m_rareData->m_namedGroupToParenIndex.end())
108 return 0;
109 return it->value;
110 }
111
112 bool hasCode()
113 {
114 return m_state == JITCode || m_state == ByteCode;
115 }
116
117 bool hasCodeFor(Yarr::YarrCharSize);
118 bool hasMatchOnlyCodeFor(Yarr::YarrCharSize);
119
120 void deleteCode();
121
122#if ENABLE(REGEXP_TRACING)
123 void printTraceData();
124#endif
125
126 static Structure* createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
127 {
128 return Structure::create(vm, globalObject, prototype, TypeInfo(CellType, StructureFlags), info());
129 }
130
131 DECLARE_INFO;
132
133 RegExpKey key() { return RegExpKey(m_flags, m_patternString); }
134
135protected:
136 void finishCreation(VM&);
137
138private:
139 friend class RegExpCache;
140 RegExp(VM&, const String&, OptionSet<Yarr::Flags>);
141
142 static RegExp* createWithoutCaching(VM&, const String&, OptionSet<Yarr::Flags>);
143
144 enum RegExpState : uint8_t {
145 ParseError,
146 JITCode,
147 ByteCode,
148 NotCompiled
149 };
150
151 void byteCodeCompileIfNecessary(VM*);
152
153 void compile(VM*, Yarr::YarrCharSize);
154 void compileIfNecessary(VM&, Yarr::YarrCharSize);
155
156 void compileMatchOnly(VM*, Yarr::YarrCharSize);
157 void compileIfNecessaryMatchOnly(VM&, Yarr::YarrCharSize);
158
159#if ENABLE(YARR_JIT_DEBUG)
160 void matchCompareWithInterpreter(const String&, int startOffset, int* offsetVector, int jitResult);
161#endif
162
163#if ENABLE(YARR_JIT)
164 Yarr::YarrCodeBlock& ensureRegExpJITCode()
165 {
166 if (!m_regExpJITCode)
167 m_regExpJITCode = std::make_unique<Yarr::YarrCodeBlock>();
168 return *m_regExpJITCode.get();
169 }
170#endif
171
172 struct RareData {
173 WTF_MAKE_STRUCT_FAST_ALLOCATED;
174 Vector<String> m_captureGroupNames;
175 HashMap<String, unsigned> m_namedGroupToParenIndex;
176 };
177
178 String m_patternString;
179 RegExpState m_state { NotCompiled };
180 OptionSet<Yarr::Flags> m_flags;
181 Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError };
182 unsigned m_numSubpatterns { 0 };
183 std::unique_ptr<Yarr::BytecodePattern> m_regExpBytecode;
184#if ENABLE(YARR_JIT)
185 std::unique_ptr<Yarr::YarrCodeBlock> m_regExpJITCode;
186#endif
187 std::unique_ptr<RareData> m_rareData;
188#if ENABLE(REGEXP_TRACING)
189 double m_rtMatchOnlyTotalSubjectStringLen { 0.0 };
190 double m_rtMatchTotalSubjectStringLen { 0.0 };
191 unsigned m_rtMatchOnlyCallCount { 0 };
192 unsigned m_rtMatchOnlyFoundCount { 0 };
193 unsigned m_rtMatchCallCount { 0 };
194 unsigned m_rtMatchFoundCount { 0 };
195#endif
196};
197
198} // namespace JSC
199