1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <functional>
6
7#include "src/arguments-inl.h"
8#include "src/conversions-inl.h"
9#include "src/counters.h"
10#include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
11#include "src/isolate-inl.h"
12#include "src/message-template.h"
13#include "src/objects/js-array-inl.h"
14#include "src/regexp/jsregexp-inl.h"
15#include "src/regexp/regexp-utils.h"
16#include "src/runtime/runtime-utils.h"
17#include "src/string-builder-inl.h"
18#include "src/string-search.h"
19#include "src/zone/zone-chunk-list.h"
20
21namespace v8 {
22namespace internal {
23
24namespace {
25
26// Returns -1 for failure.
27uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
28 bool has_named_captures) {
29 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
30 const uint32_t kAdditionalArgsWithNamedCaptures = 3;
31 if (num_captures > Code::kMaxArguments) return -1;
32 uint32_t argc = has_named_captures
33 ? num_captures + kAdditionalArgsWithNamedCaptures
34 : num_captures + kAdditionalArgsWithoutNamedCaptures;
35 STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
36 kAdditionalArgsWithNamedCaptures);
37 return (argc > Code::kMaxArguments) ? -1 : argc;
38}
39
40// Looks up the capture of the given name. Returns the (1-based) numbered
41// capture index or -1 on failure.
42int LookupNamedCapture(const std::function<bool(String)>& name_matches,
43 FixedArray capture_name_map) {
44 // TODO(jgruber): Sort capture_name_map and do binary search via
45 // internalized strings.
46
47 int maybe_capture_index = -1;
48 const int named_capture_count = capture_name_map->length() >> 1;
49 for (int j = 0; j < named_capture_count; j++) {
50 // The format of {capture_name_map} is documented at
51 // JSRegExp::kIrregexpCaptureNameMapIndex.
52 const int name_ix = j * 2;
53 const int index_ix = j * 2 + 1;
54
55 String capture_name = String::cast(capture_name_map->get(name_ix));
56 if (!name_matches(capture_name)) continue;
57
58 maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
59 break;
60 }
61
62 return maybe_capture_index;
63}
64
65} // namespace
66
67class CompiledReplacement {
68 public:
69 explicit CompiledReplacement(Zone* zone)
70 : parts_(zone), replacement_substrings_(zone) {}
71
72 // Return whether the replacement is simple.
73 bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
74 Handle<String> replacement, int capture_count,
75 int subject_length);
76
77 // Use Apply only if Compile returned false.
78 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
79 int32_t* match);
80
81 // Number of distinct parts of the replacement pattern.
82 int parts() { return static_cast<int>(parts_.size()); }
83
84 private:
85 enum PartType {
86 SUBJECT_PREFIX = 1,
87 SUBJECT_SUFFIX,
88 SUBJECT_CAPTURE,
89 REPLACEMENT_SUBSTRING,
90 REPLACEMENT_STRING,
91 EMPTY_REPLACEMENT,
92 NUMBER_OF_PART_TYPES
93 };
94
95 struct ReplacementPart {
96 static inline ReplacementPart SubjectMatch() {
97 return ReplacementPart(SUBJECT_CAPTURE, 0);
98 }
99 static inline ReplacementPart SubjectCapture(int capture_index) {
100 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
101 }
102 static inline ReplacementPart SubjectPrefix() {
103 return ReplacementPart(SUBJECT_PREFIX, 0);
104 }
105 static inline ReplacementPart SubjectSuffix(int subject_length) {
106 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
107 }
108 static inline ReplacementPart ReplacementString() {
109 return ReplacementPart(REPLACEMENT_STRING, 0);
110 }
111 static inline ReplacementPart EmptyReplacement() {
112 return ReplacementPart(EMPTY_REPLACEMENT, 0);
113 }
114 static inline ReplacementPart ReplacementSubString(int from, int to) {
115 DCHECK_LE(0, from);
116 DCHECK_GT(to, from);
117 return ReplacementPart(-from, to);
118 }
119
120 // If tag <= 0 then it is the negation of a start index of a substring of
121 // the replacement pattern, otherwise it's a value from PartType.
122 ReplacementPart(int tag, int data) : tag(tag), data(data) {
123 // Must be non-positive or a PartType value.
124 DCHECK(tag < NUMBER_OF_PART_TYPES);
125 }
126 // Either a value of PartType or a non-positive number that is
127 // the negation of an index into the replacement string.
128 int tag;
129 // The data value's interpretation depends on the value of tag:
130 // tag == SUBJECT_PREFIX ||
131 // tag == SUBJECT_SUFFIX: data is unused.
132 // tag == SUBJECT_CAPTURE: data is the number of the capture.
133 // tag == REPLACEMENT_SUBSTRING ||
134 // tag == REPLACEMENT_STRING: data is index into array of substrings
135 // of the replacement string.
136 // tag == EMPTY_REPLACEMENT: data is unused.
137 // tag <= 0: Temporary representation of the substring of the replacement
138 // string ranging over -tag .. data.
139 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
140 // substring objects.
141 int data;
142 };
143
144 template <typename Char>
145 bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
146 Vector<Char> characters,
147 FixedArray capture_name_map, int capture_count,
148 int subject_length) {
149 // Equivalent to String::GetSubstitution, except that this method converts
150 // the replacement string into an internal representation that avoids
151 // repeated parsing when used repeatedly.
152 int length = characters.length();
153 int last = 0;
154 for (int i = 0; i < length; i++) {
155 Char c = characters[i];
156 if (c == '$') {
157 int next_index = i + 1;
158 if (next_index == length) { // No next character!
159 break;
160 }
161 Char c2 = characters[next_index];
162 switch (c2) {
163 case '$':
164 if (i > last) {
165 // There is a substring before. Include the first "$".
166 parts->push_back(
167 ReplacementPart::ReplacementSubString(last, next_index));
168 last = next_index + 1; // Continue after the second "$".
169 } else {
170 // Let the next substring start with the second "$".
171 last = next_index;
172 }
173 i = next_index;
174 break;
175 case '`':
176 if (i > last) {
177 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
178 }
179 parts->push_back(ReplacementPart::SubjectPrefix());
180 i = next_index;
181 last = i + 1;
182 break;
183 case '\'':
184 if (i > last) {
185 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
186 }
187 parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
188 i = next_index;
189 last = i + 1;
190 break;
191 case '&':
192 if (i > last) {
193 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
194 }
195 parts->push_back(ReplacementPart::SubjectMatch());
196 i = next_index;
197 last = i + 1;
198 break;
199 case '0':
200 case '1':
201 case '2':
202 case '3':
203 case '4':
204 case '5':
205 case '6':
206 case '7':
207 case '8':
208 case '9': {
209 int capture_ref = c2 - '0';
210 if (capture_ref > capture_count) {
211 i = next_index;
212 continue;
213 }
214 int second_digit_index = next_index + 1;
215 if (second_digit_index < length) {
216 // Peek ahead to see if we have two digits.
217 Char c3 = characters[second_digit_index];
218 if ('0' <= c3 && c3 <= '9') { // Double digits.
219 int double_digit_ref = capture_ref * 10 + c3 - '0';
220 if (double_digit_ref <= capture_count) {
221 next_index = second_digit_index;
222 capture_ref = double_digit_ref;
223 }
224 }
225 }
226 if (capture_ref > 0) {
227 if (i > last) {
228 parts->push_back(
229 ReplacementPart::ReplacementSubString(last, i));
230 }
231 DCHECK(capture_ref <= capture_count);
232 parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
233 last = next_index + 1;
234 }
235 i = next_index;
236 break;
237 }
238 case '<': {
239 if (capture_name_map.is_null()) {
240 i = next_index;
241 break;
242 }
243
244 // Scan until the next '>', and let the enclosed substring be the
245 // groupName.
246
247 const int name_start_index = next_index + 1;
248 int closing_bracket_index = -1;
249 for (int j = name_start_index; j < length; j++) {
250 if (characters[j] == '>') {
251 closing_bracket_index = j;
252 break;
253 }
254 }
255
256 // If no closing bracket is found, '$<' is treated as a string
257 // literal.
258 if (closing_bracket_index == -1) {
259 i = next_index;
260 break;
261 }
262
263 Vector<Char> requested_name =
264 characters.SubVector(name_start_index, closing_bracket_index);
265
266 // Let capture be ? Get(namedCaptures, groupName).
267
268 const int capture_index = LookupNamedCapture(
269 [=](String capture_name) {
270 return capture_name->IsEqualTo(requested_name);
271 },
272 capture_name_map);
273
274 // If capture is undefined or does not exist, replace the text
275 // through the following '>' with the empty string.
276 // Otherwise, replace the text through the following '>' with
277 // ? ToString(capture).
278
279 DCHECK(capture_index == -1 ||
280 (1 <= capture_index && capture_index <= capture_count));
281
282 if (i > last) {
283 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
284 }
285 parts->push_back(
286 (capture_index == -1)
287 ? ReplacementPart::EmptyReplacement()
288 : ReplacementPart::SubjectCapture(capture_index));
289 last = closing_bracket_index + 1;
290 i = closing_bracket_index;
291 break;
292 }
293 default:
294 i = next_index;
295 break;
296 }
297 }
298 }
299 if (length > last) {
300 if (last == 0) {
301 // Replacement is simple. Do not use Apply to do the replacement.
302 return true;
303 } else {
304 parts->push_back(ReplacementPart::ReplacementSubString(last, length));
305 }
306 }
307 return false;
308 }
309
310 ZoneChunkList<ReplacementPart> parts_;
311 ZoneVector<Handle<String>> replacement_substrings_;
312};
313
314bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
315 Handle<String> replacement, int capture_count,
316 int subject_length) {
317 {
318 DisallowHeapAllocation no_gc;
319 String::FlatContent content = replacement->GetFlatContent(no_gc);
320 DCHECK(content.IsFlat());
321
322 FixedArray capture_name_map;
323 if (capture_count > 0) {
324 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
325 Object maybe_capture_name_map = regexp->CaptureNameMap();
326 if (maybe_capture_name_map->IsFixedArray()) {
327 capture_name_map = FixedArray::cast(maybe_capture_name_map);
328 }
329 }
330
331 bool simple;
332 if (content.IsOneByte()) {
333 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
334 capture_name_map, capture_count,
335 subject_length);
336 } else {
337 DCHECK(content.IsTwoByte());
338 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
339 capture_name_map, capture_count,
340 subject_length);
341 }
342 if (simple) return true;
343 }
344
345 // Find substrings of replacement string and create them as String objects.
346 int substring_index = 0;
347 for (ReplacementPart& part : parts_) {
348 int tag = part.tag;
349 if (tag <= 0) { // A replacement string slice.
350 int from = -tag;
351 int to = part.data;
352 replacement_substrings_.push_back(
353 isolate->factory()->NewSubString(replacement, from, to));
354 part.tag = REPLACEMENT_SUBSTRING;
355 part.data = substring_index;
356 substring_index++;
357 } else if (tag == REPLACEMENT_STRING) {
358 replacement_substrings_.push_back(replacement);
359 part.data = substring_index;
360 substring_index++;
361 }
362 }
363 return false;
364}
365
366
367void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
368 int match_from, int match_to, int32_t* match) {
369 DCHECK_LT(0, parts_.size());
370 for (ReplacementPart& part : parts_) {
371 switch (part.tag) {
372 case SUBJECT_PREFIX:
373 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
374 break;
375 case SUBJECT_SUFFIX: {
376 int subject_length = part.data;
377 if (match_to < subject_length) {
378 builder->AddSubjectSlice(match_to, subject_length);
379 }
380 break;
381 }
382 case SUBJECT_CAPTURE: {
383 int capture = part.data;
384 int from = match[capture * 2];
385 int to = match[capture * 2 + 1];
386 if (from >= 0 && to > from) {
387 builder->AddSubjectSlice(from, to);
388 }
389 break;
390 }
391 case REPLACEMENT_SUBSTRING:
392 case REPLACEMENT_STRING:
393 builder->AddString(replacement_substrings_[part.data]);
394 break;
395 case EMPTY_REPLACEMENT:
396 break;
397 default:
398 UNREACHABLE();
399 }
400 }
401}
402
403void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
404 std::vector<int>* indices, unsigned int limit) {
405 DCHECK_LT(0, limit);
406 // Collect indices of pattern in subject using memchr.
407 // Stop after finding at most limit values.
408 const uint8_t* subject_start = subject.start();
409 const uint8_t* subject_end = subject_start + subject.length();
410 const uint8_t* pos = subject_start;
411 while (limit > 0) {
412 pos = reinterpret_cast<const uint8_t*>(
413 memchr(pos, pattern, subject_end - pos));
414 if (pos == nullptr) return;
415 indices->push_back(static_cast<int>(pos - subject_start));
416 pos++;
417 limit--;
418 }
419}
420
421void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
422 std::vector<int>* indices, unsigned int limit) {
423 DCHECK_LT(0, limit);
424 const uc16* subject_start = subject.start();
425 const uc16* subject_end = subject_start + subject.length();
426 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
427 if (*pos == pattern) {
428 indices->push_back(static_cast<int>(pos - subject_start));
429 limit--;
430 }
431 }
432}
433
434template <typename SubjectChar, typename PatternChar>
435void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
436 Vector<const PatternChar> pattern,
437 std::vector<int>* indices, unsigned int limit) {
438 DCHECK_LT(0, limit);
439 // Collect indices of pattern in subject.
440 // Stop after finding at most limit values.
441 int pattern_length = pattern.length();
442 int index = 0;
443 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
444 while (limit > 0) {
445 index = search.Search(subject, index);
446 if (index < 0) return;
447 indices->push_back(index);
448 index += pattern_length;
449 limit--;
450 }
451}
452
453void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
454 std::vector<int>* indices, unsigned int limit) {
455 {
456 DisallowHeapAllocation no_gc;
457 String::FlatContent subject_content = subject->GetFlatContent(no_gc);
458 String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
459 DCHECK(subject_content.IsFlat());
460 DCHECK(pattern_content.IsFlat());
461 if (subject_content.IsOneByte()) {
462 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463 if (pattern_content.IsOneByte()) {
464 Vector<const uint8_t> pattern_vector =
465 pattern_content.ToOneByteVector();
466 if (pattern_vector.length() == 1) {
467 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468 limit);
469 } else {
470 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471 limit);
472 }
473 } else {
474 FindStringIndices(isolate, subject_vector,
475 pattern_content.ToUC16Vector(), indices, limit);
476 }
477 } else {
478 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479 if (pattern_content.IsOneByte()) {
480 Vector<const uint8_t> pattern_vector =
481 pattern_content.ToOneByteVector();
482 if (pattern_vector.length() == 1) {
483 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484 limit);
485 } else {
486 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 limit);
488 }
489 } else {
490 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491 if (pattern_vector.length() == 1) {
492 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493 limit);
494 } else {
495 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496 limit);
497 }
498 }
499 }
500 }
501}
502
503namespace {
504std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505 std::vector<int>* list = isolate->regexp_indices();
506 list->clear();
507 return list;
508}
509
510void TruncateRegexpIndicesList(Isolate* isolate) {
511 // Same size as smallest zone segment, preserving behavior from the
512 // runtime zone.
513 static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514 std::vector<int>* indicies = isolate->regexp_indices();
515 if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516 // Throw away backing storage.
517 indicies->clear();
518 indicies->shrink_to_fit();
519 }
520}
521} // namespace
522
523template <typename ResultSeqString>
524V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
525 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527 DCHECK(subject->IsFlat());
528 DCHECK(replacement->IsFlat());
529
530 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531
532 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533 String pattern =
534 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535 int subject_len = subject->length();
536 int pattern_len = pattern->length();
537 int replacement_len = replacement->length();
538
539 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540
541 if (indices->empty()) return *subject;
542
543 // Detect integer overflow.
544 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545 static_cast<int64_t>(pattern_len)) *
546 static_cast<int64_t>(indices->size()) +
547 static_cast<int64_t>(subject_len);
548 int result_len;
549 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550 STATIC_ASSERT(String::kMaxLength < kMaxInt);
551 result_len = kMaxInt; // Provoke exception.
552 } else {
553 result_len = static_cast<int>(result_len_64);
554 }
555 if (result_len == 0) {
556 return ReadOnlyRoots(isolate).empty_string();
557 }
558
559 int subject_pos = 0;
560 int result_pos = 0;
561
562 MaybeHandle<SeqString> maybe_res;
563 if (ResultSeqString::kHasOneByteEncoding) {
564 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565 } else {
566 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567 }
568 Handle<SeqString> untyped_res;
569 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571
572 DisallowHeapAllocation no_gc;
573 for (int index : *indices) {
574 // Copy non-matched subject content.
575 if (subject_pos < index) {
576 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
577 subject_pos, index);
578 result_pos += index - subject_pos;
579 }
580
581 // Replace match.
582 if (replacement_len > 0) {
583 String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
584 replacement_len);
585 result_pos += replacement_len;
586 }
587
588 subject_pos = index + pattern_len;
589 }
590 // Add remaining subject content at the end.
591 if (subject_pos < subject_len) {
592 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
593 subject_pos, subject_len);
594 }
595
596 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
597 RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
598 match_indices);
599
600 TruncateRegexpIndicesList(isolate);
601
602 return *result;
603}
604
605V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608 DCHECK(subject->IsFlat());
609 DCHECK(replacement->IsFlat());
610
611 int capture_count = regexp->CaptureCount();
612 int subject_length = subject->length();
613
614 JSRegExp::Type typeTag = regexp->TypeTag();
615 if (typeTag == JSRegExp::IRREGEXP) {
616 // Ensure the RegExp is compiled so we can access the capture-name map.
617 if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
618 DCHECK(isolate->has_pending_exception());
619 return ReadOnlyRoots(isolate).exception();
620 }
621 }
622
623 // CompiledReplacement uses zone allocation.
624 Zone zone(isolate->allocator(), ZONE_NAME);
625 CompiledReplacement compiled_replacement(&zone);
626 const bool simple_replace = compiled_replacement.Compile(
627 isolate, regexp, replacement, capture_count, subject_length);
628
629 // Shortcut for simple non-regexp global replacements
630 if (typeTag == JSRegExp::ATOM && simple_replace) {
631 if (subject->IsOneByteRepresentation() &&
632 replacement->IsOneByteRepresentation()) {
633 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
634 isolate, subject, regexp, replacement, last_match_info);
635 } else {
636 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
637 isolate, subject, regexp, replacement, last_match_info);
638 }
639 }
640
641 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
642 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
643
644 int32_t* current_match = global_cache.FetchNext();
645 if (current_match == nullptr) {
646 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
647 return *subject;
648 }
649
650 // Guessing the number of parts that the final result string is built
651 // from. Global regexps can match any number of times, so we guess
652 // conservatively.
653 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
654 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
655
656 int prev = 0;
657
658 do {
659 int start = current_match[0];
660 int end = current_match[1];
661
662 if (prev < start) {
663 builder.AddSubjectSlice(prev, start);
664 }
665
666 if (simple_replace) {
667 builder.AddString(replacement);
668 } else {
669 compiled_replacement.Apply(&builder, start, end, current_match);
670 }
671 prev = end;
672
673 current_match = global_cache.FetchNext();
674 } while (current_match != nullptr);
675
676 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
677
678 if (prev < subject_length) {
679 builder.AddSubjectSlice(prev, subject_length);
680 }
681
682 RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
683 global_cache.LastSuccessfulMatch());
684
685 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
686}
687
688template <typename ResultSeqString>
689V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
690 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
691 Handle<RegExpMatchInfo> last_match_info) {
692 DCHECK(subject->IsFlat());
693
694 // Shortcut for simple non-regexp global replacements
695 if (regexp->TypeTag() == JSRegExp::ATOM) {
696 Handle<String> empty_string = isolate->factory()->empty_string();
697 if (subject->IsOneByteRepresentation()) {
698 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
699 isolate, subject, regexp, empty_string, last_match_info);
700 } else {
701 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
702 isolate, subject, regexp, empty_string, last_match_info);
703 }
704 }
705
706 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
707 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
708
709 int32_t* current_match = global_cache.FetchNext();
710 if (current_match == nullptr) {
711 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
712 return *subject;
713 }
714
715 int start = current_match[0];
716 int end = current_match[1];
717 int capture_count = regexp->CaptureCount();
718 int subject_length = subject->length();
719
720 int new_length = subject_length - (end - start);
721 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
722
723 Handle<ResultSeqString> answer;
724 if (ResultSeqString::kHasOneByteEncoding) {
725 answer = Handle<ResultSeqString>::cast(
726 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
727 } else {
728 answer = Handle<ResultSeqString>::cast(
729 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
730 }
731
732 int prev = 0;
733 int position = 0;
734
735 DisallowHeapAllocation no_gc;
736 do {
737 start = current_match[0];
738 end = current_match[1];
739 if (prev < start) {
740 // Add substring subject[prev;start] to answer string.
741 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
742 start);
743 position += start - prev;
744 }
745 prev = end;
746
747 current_match = global_cache.FetchNext();
748 } while (current_match != nullptr);
749
750 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
751
752 RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
753 global_cache.LastSuccessfulMatch());
754
755 if (prev < subject_length) {
756 // Add substring subject[prev;length] to answer string.
757 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
758 subject_length);
759 position += subject_length - prev;
760 }
761
762 if (position == 0) return ReadOnlyRoots(isolate).empty_string();
763
764 // Shorten string and fill
765 int string_size = ResultSeqString::SizeFor(position);
766 int allocated_string_size = ResultSeqString::SizeFor(new_length);
767 int delta = allocated_string_size - string_size;
768
769 answer->set_length(position);
770 if (delta == 0) return *answer;
771
772 Address end_of_string = answer->address() + string_size;
773 Heap* heap = isolate->heap();
774
775 // The trimming is performed on a newly allocated object, which is on a
776 // freshly allocated page or on an already swept page. Hence, the sweeper
777 // thread can not get confused with the filler creation. No synchronization
778 // needed.
779 // TODO(hpayer): We should shrink the large object page if the size
780 // of the object changed significantly.
781 if (!heap->IsLargeObject(*answer)) {
782 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
783 }
784 return *answer;
785}
786
787RUNTIME_FUNCTION(Runtime_StringSplit) {
788 HandleScope handle_scope(isolate);
789 DCHECK_EQ(3, args.length());
790 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
791 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
792 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
793 CHECK_LT(0, limit);
794
795 int subject_length = subject->length();
796 int pattern_length = pattern->length();
797 CHECK_LT(0, pattern_length);
798
799 if (limit == 0xFFFFFFFFu) {
800 FixedArray last_match_cache_unused;
801 Handle<Object> cached_answer(
802 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
803 &last_match_cache_unused,
804 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
805 isolate);
806 if (*cached_answer != Smi::kZero) {
807 // The cache FixedArray is a COW-array and can therefore be reused.
808 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
809 Handle<FixedArray>::cast(cached_answer));
810 return *result;
811 }
812 }
813
814 // The limit can be very large (0xFFFFFFFFu), but since the pattern
815 // isn't empty, we can never create more parts than ~half the length
816 // of the subject.
817
818 subject = String::Flatten(isolate, subject);
819 pattern = String::Flatten(isolate, pattern);
820
821 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
822
823 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
824
825 if (static_cast<uint32_t>(indices->size()) < limit) {
826 indices->push_back(subject_length);
827 }
828
829 // The list indices now contains the end of each part to create.
830
831 // Create JSArray of substrings separated by separator.
832 int part_count = static_cast<int>(indices->size());
833
834 Handle<JSArray> result =
835 isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
836 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
837
838 DCHECK(result->HasObjectElements());
839
840 Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
841
842 if (part_count == 1 && indices->at(0) == subject_length) {
843 elements->set(0, *subject);
844 } else {
845 int part_start = 0;
846 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
847 int part_end = indices->at(i);
848 Handle<String> substring =
849 isolate->factory()->NewProperSubString(subject, part_start, part_end);
850 elements->set(i, *substring);
851 part_start = part_end + pattern_length;
852 });
853 }
854
855 if (limit == 0xFFFFFFFFu) {
856 if (result->HasObjectElements()) {
857 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
858 isolate->factory()->empty_fixed_array(),
859 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
860 }
861 }
862
863 TruncateRegexpIndicesList(isolate);
864
865 return *result;
866}
867
868RUNTIME_FUNCTION(Runtime_RegExpExec) {
869 HandleScope scope(isolate);
870 DCHECK_EQ(4, args.length());
871 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
872 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
873 CONVERT_INT32_ARG_CHECKED(index, 2);
874 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
875 // Due to the way the JS calls are constructed this must be less than the
876 // length of a string, i.e. it is always a Smi. We check anyway for security.
877 CHECK_LE(0, index);
878 CHECK_GE(subject->length(), index);
879 isolate->counters()->regexp_entry_runtime()->Increment();
880 RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
881 index, last_match_info));
882}
883
884namespace {
885
886class MatchInfoBackedMatch : public String::Match {
887 public:
888 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
889 Handle<String> subject,
890 Handle<RegExpMatchInfo> match_info)
891 : isolate_(isolate), match_info_(match_info) {
892 subject_ = String::Flatten(isolate, subject);
893
894 if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
895 Object o = regexp->CaptureNameMap();
896 has_named_captures_ = o->IsFixedArray();
897 if (has_named_captures_) {
898 capture_name_map_ = handle(FixedArray::cast(o), isolate);
899 }
900 } else {
901 has_named_captures_ = false;
902 }
903 }
904
905 Handle<String> GetMatch() override {
906 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
907 }
908
909 Handle<String> GetPrefix() override {
910 const int match_start = match_info_->Capture(0);
911 return isolate_->factory()->NewSubString(subject_, 0, match_start);
912 }
913
914 Handle<String> GetSuffix() override {
915 const int match_end = match_info_->Capture(1);
916 return isolate_->factory()->NewSubString(subject_, match_end,
917 subject_->length());
918 }
919
920 bool HasNamedCaptures() override { return has_named_captures_; }
921
922 int CaptureCount() override {
923 return match_info_->NumberOfCaptureRegisters() / 2;
924 }
925
926 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
927 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
928 isolate_, match_info_, i, capture_exists);
929 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
930 : isolate_->factory()->empty_string();
931 }
932
933 MaybeHandle<String> GetNamedCapture(Handle<String> name,
934 CaptureState* state) override {
935 DCHECK(has_named_captures_);
936 const int capture_index = LookupNamedCapture(
937 [=](String capture_name) { return capture_name->Equals(*name); },
938 *capture_name_map_);
939
940 if (capture_index == -1) {
941 *state = INVALID;
942 return name; // Arbitrary string handle.
943 }
944
945 DCHECK(1 <= capture_index && capture_index <= CaptureCount());
946
947 bool capture_exists;
948 Handle<String> capture_value;
949 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
950 GetCapture(capture_index, &capture_exists),
951 String);
952
953 if (!capture_exists) {
954 *state = UNMATCHED;
955 return isolate_->factory()->empty_string();
956 } else {
957 *state = MATCHED;
958 return capture_value;
959 }
960 }
961
962 private:
963 Isolate* isolate_;
964 Handle<String> subject_;
965 Handle<RegExpMatchInfo> match_info_;
966
967 bool has_named_captures_;
968 Handle<FixedArray> capture_name_map_;
969};
970
971class VectorBackedMatch : public String::Match {
972 public:
973 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
974 Handle<String> match, int match_position,
975 ZoneVector<Handle<Object>>* captures,
976 Handle<Object> groups_obj)
977 : isolate_(isolate),
978 match_(match),
979 match_position_(match_position),
980 captures_(captures) {
981 subject_ = String::Flatten(isolate, subject);
982
983 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
984 has_named_captures_ = !groups_obj->IsUndefined(isolate);
985 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
986 }
987
988 Handle<String> GetMatch() override { return match_; }
989
990 Handle<String> GetPrefix() override {
991 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
992 }
993
994 Handle<String> GetSuffix() override {
995 const int match_end_position = match_position_ + match_->length();
996 return isolate_->factory()->NewSubString(subject_, match_end_position,
997 subject_->length());
998 }
999
1000 bool HasNamedCaptures() override { return has_named_captures_; }
1001
1002 int CaptureCount() override { return static_cast<int>(captures_->size()); }
1003
1004 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1005 Handle<Object> capture_obj = captures_->at(i);
1006 if (capture_obj->IsUndefined(isolate_)) {
1007 *capture_exists = false;
1008 return isolate_->factory()->empty_string();
1009 }
1010 *capture_exists = true;
1011 return Object::ToString(isolate_, capture_obj);
1012 }
1013
1014 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1015 CaptureState* state) override {
1016 DCHECK(has_named_captures_);
1017
1018 Maybe<bool> maybe_capture_exists =
1019 JSReceiver::HasProperty(groups_obj_, name);
1020 if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1021
1022 if (!maybe_capture_exists.FromJust()) {
1023 *state = INVALID;
1024 return name; // Arbitrary string handle.
1025 }
1026
1027 Handle<Object> capture_obj;
1028 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1029 Object::GetProperty(isolate_, groups_obj_, name),
1030 String);
1031 if (capture_obj->IsUndefined(isolate_)) {
1032 *state = UNMATCHED;
1033 return isolate_->factory()->empty_string();
1034 } else {
1035 *state = MATCHED;
1036 return Object::ToString(isolate_, capture_obj);
1037 }
1038 }
1039
1040 private:
1041 Isolate* isolate_;
1042 Handle<String> subject_;
1043 Handle<String> match_;
1044 const int match_position_;
1045 ZoneVector<Handle<Object>>* captures_;
1046
1047 bool has_named_captures_;
1048 Handle<JSReceiver> groups_obj_;
1049};
1050
1051// Create the groups object (see also the RegExp result creation in
1052// RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1053Handle<JSObject> ConstructNamedCaptureGroupsObject(
1054 Isolate* isolate, Handle<FixedArray> capture_map,
1055 const std::function<Object(int)>& f_get_capture) {
1056 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1057
1058 const int capture_count = capture_map->length() >> 1;
1059 for (int i = 0; i < capture_count; i++) {
1060 const int name_ix = i * 2;
1061 const int index_ix = i * 2 + 1;
1062
1063 Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1064 isolate);
1065 const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1066 DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1067
1068 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1069 DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1070
1071 JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1072 }
1073
1074 return groups;
1075}
1076
1077// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1078// separate last match info. See comment on that function.
1079template <bool has_capture>
1080static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1081 Handle<JSRegExp> regexp,
1082 Handle<RegExpMatchInfo> last_match_array,
1083 Handle<JSArray> result_array) {
1084 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1085 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1086 DCHECK(subject->IsFlat());
1087
1088 int capture_count = regexp->CaptureCount();
1089 int subject_length = subject->length();
1090
1091 static const int kMinLengthToCache = 0x1000;
1092
1093 if (subject_length > kMinLengthToCache) {
1094 FixedArray last_match_cache;
1095 Object cached_answer = RegExpResultsCache::Lookup(
1096 isolate->heap(), *subject, regexp->data(), &last_match_cache,
1097 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1098 if (cached_answer->IsFixedArray()) {
1099 int capture_registers = (capture_count + 1) * 2;
1100 int32_t* last_match = NewArray<int32_t>(capture_registers);
1101 for (int i = 0; i < capture_registers; i++) {
1102 last_match[i] = Smi::ToInt(last_match_cache->get(i));
1103 }
1104 Handle<FixedArray> cached_fixed_array =
1105 Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1106 // The cache FixedArray is a COW-array and we need to return a copy.
1107 Handle<FixedArray> copied_fixed_array =
1108 isolate->factory()->CopyFixedArrayWithMap(
1109 cached_fixed_array, isolate->factory()->fixed_array_map());
1110 JSArray::SetContent(result_array, copied_fixed_array);
1111 RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1112 capture_count, last_match);
1113 DeleteArray(last_match);
1114 return *result_array;
1115 }
1116 }
1117
1118 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1119 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1120
1121 // Ensured in Runtime_RegExpExecMultiple.
1122 DCHECK(result_array->HasObjectElements());
1123 Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1124 isolate);
1125 if (result_elements->length() < 16) {
1126 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1127 }
1128
1129 FixedArrayBuilder builder(result_elements);
1130
1131 // Position to search from.
1132 int match_start = -1;
1133 int match_end = 0;
1134 bool first = true;
1135
1136 // Two smis before and after the match, for very long strings.
1137 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1138
1139 while (true) {
1140 int32_t* current_match = global_cache.FetchNext();
1141 if (current_match == nullptr) break;
1142 match_start = current_match[0];
1143 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1144 if (match_end < match_start) {
1145 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1146 match_start);
1147 }
1148 match_end = current_match[1];
1149 {
1150 // Avoid accumulating new handles inside loop.
1151 HandleScope temp_scope(isolate);
1152 Handle<String> match;
1153 if (!first) {
1154 match = isolate->factory()->NewProperSubString(subject, match_start,
1155 match_end);
1156 } else {
1157 match =
1158 isolate->factory()->NewSubString(subject, match_start, match_end);
1159 first = false;
1160 }
1161
1162 if (has_capture) {
1163 // Arguments array to replace function is match, captures, index and
1164 // subject, i.e., 3 + capture count in total. If the RegExp contains
1165 // named captures, they are also passed as the last argument.
1166
1167 Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1168 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1169
1170 const int argc =
1171 has_named_captures ? 4 + capture_count : 3 + capture_count;
1172
1173 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1174 int cursor = 0;
1175
1176 elements->set(cursor++, *match);
1177 for (int i = 1; i <= capture_count; i++) {
1178 int start = current_match[i * 2];
1179 if (start >= 0) {
1180 int end = current_match[i * 2 + 1];
1181 DCHECK(start <= end);
1182 Handle<String> substring =
1183 isolate->factory()->NewSubString(subject, start, end);
1184 elements->set(cursor++, *substring);
1185 } else {
1186 DCHECK_GT(0, current_match[i * 2 + 1]);
1187 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1188 }
1189 }
1190
1191 elements->set(cursor++, Smi::FromInt(match_start));
1192 elements->set(cursor++, *subject);
1193
1194 if (has_named_captures) {
1195 Handle<FixedArray> capture_map =
1196 Handle<FixedArray>::cast(maybe_capture_map);
1197 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1198 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1199 elements->set(cursor++, *groups);
1200 }
1201
1202 DCHECK_EQ(cursor, argc);
1203 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1204 } else {
1205 builder.Add(*match);
1206 }
1207 }
1208 }
1209
1210 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1211
1212 if (match_start >= 0) {
1213 // Finished matching, with at least one match.
1214 if (match_end < subject_length) {
1215 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1216 subject_length);
1217 }
1218
1219 RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1220 capture_count,
1221 global_cache.LastSuccessfulMatch());
1222
1223 if (subject_length > kMinLengthToCache) {
1224 // Store the last successful match into the array for caching.
1225 // TODO(yangguo): do not expose last match to JS and simplify caching.
1226 int capture_registers = (capture_count + 1) * 2;
1227 Handle<FixedArray> last_match_cache =
1228 isolate->factory()->NewFixedArray(capture_registers);
1229 int32_t* last_match = global_cache.LastSuccessfulMatch();
1230 for (int i = 0; i < capture_registers; i++) {
1231 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1232 }
1233 Handle<FixedArray> result_fixed_array =
1234 FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1235 // Cache the result and copy the FixedArray into a COW array.
1236 Handle<FixedArray> copied_fixed_array =
1237 isolate->factory()->CopyFixedArrayWithMap(
1238 result_fixed_array, isolate->factory()->fixed_array_map());
1239 RegExpResultsCache::Enter(
1240 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1241 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1242 }
1243 return *builder.ToJSArray(result_array);
1244 } else {
1245 return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1246 }
1247}
1248
1249// Legacy implementation of RegExp.prototype[Symbol.replace] which
1250// doesn't properly call the underlying exec method.
1251V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1252 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1253 Handle<String> replace) {
1254 // Functional fast-paths are dispatched directly by replace builtin.
1255 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1256
1257 Factory* factory = isolate->factory();
1258
1259 const int flags = regexp->GetFlags();
1260 const bool global = (flags & JSRegExp::kGlobal) != 0;
1261 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1262
1263 replace = String::Flatten(isolate, replace);
1264
1265 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1266
1267 if (!global) {
1268 // Non-global regexp search, string replace.
1269
1270 uint32_t last_index = 0;
1271 if (sticky) {
1272 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1273 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1274 Object::ToLength(isolate, last_index_obj),
1275 String);
1276 last_index = PositiveNumberToUint32(*last_index_obj);
1277 }
1278
1279 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1280 isolate);
1281
1282 // A lastIndex exceeding the string length always returns null (signalling
1283 // failure) in RegExpBuiltinExec, thus we can skip the call.
1284 if (last_index <= static_cast<uint32_t>(string->length())) {
1285 ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1286 RegExpImpl::Exec(isolate, regexp, string,
1287 last_index, last_match_info),
1288 String);
1289 }
1290
1291 if (match_indices_obj->IsNull(isolate)) {
1292 if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1293 return string;
1294 }
1295
1296 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1297
1298 const int start_index = match_indices->Capture(0);
1299 const int end_index = match_indices->Capture(1);
1300
1301 if (sticky) {
1302 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1303 }
1304
1305 IncrementalStringBuilder builder(isolate);
1306 builder.AppendString(factory->NewSubString(string, 0, start_index));
1307
1308 if (replace->length() > 0) {
1309 MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1310 Handle<String> replacement;
1311 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1312 String::GetSubstitution(isolate, &m, replace),
1313 String);
1314 builder.AppendString(replacement);
1315 }
1316
1317 builder.AppendString(
1318 factory->NewSubString(string, end_index, string->length()));
1319 return builder.Finish();
1320 } else {
1321 // Global regexp search, string replace.
1322 DCHECK(global);
1323 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1324 String);
1325
1326 if (replace->length() == 0) {
1327 if (string->IsOneByteRepresentation()) {
1328 Object result =
1329 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1330 isolate, string, regexp, last_match_info);
1331 return handle(String::cast(result), isolate);
1332 } else {
1333 Object result =
1334 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1335 isolate, string, regexp, last_match_info);
1336 return handle(String::cast(result), isolate);
1337 }
1338 }
1339
1340 Object result = StringReplaceGlobalRegExpWithString(
1341 isolate, string, regexp, replace, last_match_info);
1342 if (result->IsString()) {
1343 return handle(String::cast(result), isolate);
1344 } else {
1345 return MaybeHandle<String>();
1346 }
1347 }
1348
1349 UNREACHABLE();
1350}
1351
1352} // namespace
1353
1354// This is only called for StringReplaceGlobalRegExpWithFunction.
1355RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1356 HandleScope handles(isolate);
1357 DCHECK_EQ(4, args.length());
1358
1359 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1360 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1361 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1362 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1363
1364 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1365 CHECK(result_array->HasObjectElements());
1366
1367 subject = String::Flatten(isolate, subject);
1368 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1369
1370 Object result;
1371 if (regexp->CaptureCount() == 0) {
1372 result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1373 last_match_info, result_array);
1374 } else {
1375 result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1376 last_match_info, result_array);
1377 }
1378 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1379 return result;
1380}
1381
1382RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1383 HandleScope scope(isolate);
1384 DCHECK_EQ(3, args.length());
1385 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1386 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1387 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1388
1389 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1390 DCHECK(replace_obj->map()->is_callable());
1391
1392 Factory* factory = isolate->factory();
1393 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1394
1395 const int flags = regexp->GetFlags();
1396 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1397
1398 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1399
1400 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1401 uint32_t last_index = 0;
1402 if (sticky) {
1403 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1404 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1405 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1406 last_index = PositiveNumberToUint32(*last_index_obj);
1407 }
1408
1409 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1410 isolate);
1411
1412 // A lastIndex exceeding the string length always returns null (signalling
1413 // failure) in RegExpBuiltinExec, thus we can skip the call.
1414 if (last_index <= static_cast<uint32_t>(subject->length())) {
1415 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1416 isolate, match_indices_obj,
1417 RegExpImpl::Exec(isolate, regexp, subject, last_index,
1418 last_match_info));
1419 }
1420
1421 if (match_indices_obj->IsNull(isolate)) {
1422 if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1423 return *subject;
1424 }
1425
1426 Handle<RegExpMatchInfo> match_indices =
1427 Handle<RegExpMatchInfo>::cast(match_indices_obj);
1428
1429 const int index = match_indices->Capture(0);
1430 const int end_of_match = match_indices->Capture(1);
1431
1432 if (sticky) {
1433 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1434 }
1435
1436 IncrementalStringBuilder builder(isolate);
1437 builder.AppendString(factory->NewSubString(subject, 0, index));
1438
1439 // Compute the parameter list consisting of the match, captures, index,
1440 // and subject for the replace function invocation. If the RegExp contains
1441 // named captures, they are also passed as the last argument.
1442
1443 // The number of captures plus one for the match.
1444 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1445
1446 bool has_named_captures = false;
1447 Handle<FixedArray> capture_map;
1448 if (m > 1) {
1449 // The existence of capture groups implies IRREGEXP kind.
1450 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1451
1452 Object maybe_capture_map = regexp->CaptureNameMap();
1453 if (maybe_capture_map->IsFixedArray()) {
1454 has_named_captures = true;
1455 capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1456 }
1457 }
1458
1459 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1460 if (argc == static_cast<uint32_t>(-1)) {
1461 THROW_NEW_ERROR_RETURN_FAILURE(
1462 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1463 }
1464 ScopedVector<Handle<Object>> argv(argc);
1465
1466 int cursor = 0;
1467 for (int j = 0; j < m; j++) {
1468 bool ok;
1469 Handle<String> capture =
1470 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1471 if (ok) {
1472 argv[cursor++] = capture;
1473 } else {
1474 argv[cursor++] = factory->undefined_value();
1475 }
1476 }
1477
1478 argv[cursor++] = handle(Smi::FromInt(index), isolate);
1479 argv[cursor++] = subject;
1480
1481 if (has_named_captures) {
1482 argv[cursor++] = ConstructNamedCaptureGroupsObject(
1483 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1484 }
1485
1486 DCHECK_EQ(cursor, argc);
1487
1488 Handle<Object> replacement_obj;
1489 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1490 isolate, replacement_obj,
1491 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1492 argv.start()));
1493
1494 Handle<String> replacement;
1495 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1496 isolate, replacement, Object::ToString(isolate, replacement_obj));
1497
1498 builder.AppendString(replacement);
1499 builder.AppendString(
1500 factory->NewSubString(subject, end_of_match, subject->length()));
1501
1502 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1503}
1504
1505namespace {
1506
1507V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1508 Handle<Object> object,
1509 uint32_t* out) {
1510 if (object->IsUndefined(isolate)) {
1511 *out = kMaxUInt32;
1512 return object;
1513 }
1514
1515 Handle<Object> number;
1516 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1517 Object);
1518 *out = NumberToUint32(*number);
1519 return object;
1520}
1521
1522Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1523 Handle<FixedArray> elems,
1524 int num_elems) {
1525 return isolate->factory()->NewJSArrayWithElements(
1526 FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1527}
1528
1529} // namespace
1530
1531// Slow path for:
1532// ES#sec-regexp.prototype-@@replace
1533// RegExp.prototype [ @@split ] ( string, limit )
1534RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1535 HandleScope scope(isolate);
1536 DCHECK_EQ(3, args.length());
1537
1538 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1539 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1540 CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1541
1542 Factory* factory = isolate->factory();
1543
1544 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1545 Handle<Object> ctor;
1546 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1547 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1548
1549 Handle<Object> flags_obj;
1550 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1551 isolate, flags_obj,
1552 JSObject::GetProperty(isolate, recv, factory->flags_string()));
1553
1554 Handle<String> flags;
1555 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1556 Object::ToString(isolate, flags_obj));
1557
1558 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1559 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1560
1561 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1562 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1563
1564 Handle<String> new_flags = flags;
1565 if (!sticky) {
1566 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1567 factory->NewConsString(flags, y_str));
1568 }
1569
1570 Handle<JSReceiver> splitter;
1571 {
1572 const int argc = 2;
1573
1574 ScopedVector<Handle<Object>> argv(argc);
1575 argv[0] = recv;
1576 argv[1] = new_flags;
1577
1578 Handle<Object> splitter_obj;
1579 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1580 isolate, splitter_obj,
1581 Execution::New(isolate, ctor, argc, argv.start()));
1582
1583 splitter = Handle<JSReceiver>::cast(splitter_obj);
1584 }
1585
1586 uint32_t limit;
1587 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1588
1589 const uint32_t length = string->length();
1590
1591 if (limit == 0) return *factory->NewJSArray(0);
1592
1593 if (length == 0) {
1594 Handle<Object> result;
1595 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1596 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1597 factory->undefined_value()));
1598
1599 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1600
1601 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1602 elems->set(0, *string);
1603 return *factory->NewJSArrayWithElements(elems);
1604 }
1605
1606 static const int kInitialArraySize = 8;
1607 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1608 uint32_t num_elems = 0;
1609
1610 uint32_t string_index = 0;
1611 uint32_t prev_string_index = 0;
1612 while (string_index < length) {
1613 RETURN_FAILURE_ON_EXCEPTION(
1614 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1615
1616 Handle<Object> result;
1617 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1618 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1619 factory->undefined_value()));
1620
1621 if (result->IsNull(isolate)) {
1622 string_index = static_cast<uint32_t>(
1623 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1624 continue;
1625 }
1626
1627 Handle<Object> last_index_obj;
1628 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1629 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1630
1631 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1632 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1633
1634 const uint32_t end =
1635 std::min(PositiveNumberToUint32(*last_index_obj), length);
1636 if (end == prev_string_index) {
1637 string_index = static_cast<uint32_t>(
1638 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1639 continue;
1640 }
1641
1642 {
1643 Handle<String> substr =
1644 factory->NewSubString(string, prev_string_index, string_index);
1645 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1646 if (num_elems == limit) {
1647 return *NewJSArrayWithElements(isolate, elems, num_elems);
1648 }
1649 }
1650
1651 prev_string_index = end;
1652
1653 Handle<Object> num_captures_obj;
1654 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655 isolate, num_captures_obj,
1656 Object::GetProperty(isolate, result,
1657 isolate->factory()->length_string()));
1658
1659 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1660 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1661 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1662
1663 for (uint32_t i = 1; i < num_captures; i++) {
1664 Handle<Object> capture;
1665 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1666 isolate, capture, Object::GetElement(isolate, result, i));
1667 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1668 if (num_elems == limit) {
1669 return *NewJSArrayWithElements(isolate, elems, num_elems);
1670 }
1671 }
1672
1673 string_index = prev_string_index;
1674 }
1675
1676 {
1677 Handle<String> substr =
1678 factory->NewSubString(string, prev_string_index, length);
1679 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1680 }
1681
1682 return *NewJSArrayWithElements(isolate, elems, num_elems);
1683}
1684
1685// Slow path for:
1686// ES#sec-regexp.prototype-@@replace
1687// RegExp.prototype [ @@replace ] ( string, replaceValue )
1688RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1689 HandleScope scope(isolate);
1690 DCHECK_EQ(3, args.length());
1691
1692 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1693 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1694 Handle<Object> replace_obj = args.at(2);
1695
1696 Factory* factory = isolate->factory();
1697
1698 string = String::Flatten(isolate, string);
1699
1700 const bool functional_replace = replace_obj->IsCallable();
1701
1702 Handle<String> replace;
1703 if (!functional_replace) {
1704 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1705 Object::ToString(isolate, replace_obj));
1706 }
1707
1708 // Fast-path for unmodified JSRegExps (and non-functional replace).
1709 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1710 // We should never get here with functional replace because unmodified
1711 // regexp and functional replace should be fully handled in CSA code.
1712 CHECK(!functional_replace);
1713 Handle<Object> result;
1714 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1715 isolate, result,
1716 RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1717 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1718 return *result;
1719 }
1720
1721 const uint32_t length = string->length();
1722
1723 Handle<Object> global_obj;
1724 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1725 isolate, global_obj,
1726 JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1727 const bool global = global_obj->BooleanValue(isolate);
1728
1729 bool unicode = false;
1730 if (global) {
1731 Handle<Object> unicode_obj;
1732 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1733 isolate, unicode_obj,
1734 JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1735 unicode = unicode_obj->BooleanValue(isolate);
1736
1737 RETURN_FAILURE_ON_EXCEPTION(isolate,
1738 RegExpUtils::SetLastIndex(isolate, recv, 0));
1739 }
1740
1741 Zone zone(isolate->allocator(), ZONE_NAME);
1742 ZoneVector<Handle<Object>> results(&zone);
1743
1744 while (true) {
1745 Handle<Object> result;
1746 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1747 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1748 factory->undefined_value()));
1749
1750 if (result->IsNull(isolate)) break;
1751
1752 results.push_back(result);
1753 if (!global) break;
1754
1755 Handle<Object> match_obj;
1756 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1757 Object::GetElement(isolate, result, 0));
1758
1759 Handle<String> match;
1760 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1761 Object::ToString(isolate, match_obj));
1762
1763 if (match->length() == 0) {
1764 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1765 isolate, recv, string, unicode));
1766 }
1767 }
1768
1769 // TODO(jgruber): Look into ReplacementStringBuilder instead.
1770 IncrementalStringBuilder builder(isolate);
1771 uint32_t next_source_position = 0;
1772
1773 for (const auto& result : results) {
1774 HandleScope handle_scope(isolate);
1775 Handle<Object> captures_length_obj;
1776 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1777 isolate, captures_length_obj,
1778 Object::GetProperty(isolate, result, factory->length_string()));
1779
1780 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1781 isolate, captures_length_obj,
1782 Object::ToLength(isolate, captures_length_obj));
1783 const uint32_t captures_length =
1784 PositiveNumberToUint32(*captures_length_obj);
1785
1786 Handle<Object> match_obj;
1787 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1788 Object::GetElement(isolate, result, 0));
1789
1790 Handle<String> match;
1791 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1792 Object::ToString(isolate, match_obj));
1793
1794 const int match_length = match->length();
1795
1796 Handle<Object> position_obj;
1797 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1798 isolate, position_obj,
1799 Object::GetProperty(isolate, result, factory->index_string()));
1800
1801 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1802 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1803 const uint32_t position =
1804 std::min(PositiveNumberToUint32(*position_obj), length);
1805
1806 // Do not reserve capacity since captures_length is user-controlled.
1807 ZoneVector<Handle<Object>> captures(&zone);
1808
1809 for (uint32_t n = 0; n < captures_length; n++) {
1810 Handle<Object> capture;
1811 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1812 isolate, capture, Object::GetElement(isolate, result, n));
1813
1814 if (!capture->IsUndefined(isolate)) {
1815 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1816 Object::ToString(isolate, capture));
1817 }
1818 captures.push_back(capture);
1819 }
1820
1821 Handle<Object> groups_obj = isolate->factory()->undefined_value();
1822 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1823 isolate, groups_obj,
1824 Object::GetProperty(isolate, result, factory->groups_string()));
1825
1826 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1827
1828 Handle<String> replacement;
1829 if (functional_replace) {
1830 const uint32_t argc =
1831 GetArgcForReplaceCallable(captures_length, has_named_captures);
1832 if (argc == static_cast<uint32_t>(-1)) {
1833 THROW_NEW_ERROR_RETURN_FAILURE(
1834 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1835 }
1836
1837 ScopedVector<Handle<Object>> argv(argc);
1838
1839 int cursor = 0;
1840 for (uint32_t j = 0; j < captures_length; j++) {
1841 argv[cursor++] = captures[j];
1842 }
1843
1844 argv[cursor++] = handle(Smi::FromInt(position), isolate);
1845 argv[cursor++] = string;
1846 if (has_named_captures) argv[cursor++] = groups_obj;
1847
1848 DCHECK_EQ(cursor, argc);
1849
1850 Handle<Object> replacement_obj;
1851 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1852 isolate, replacement_obj,
1853 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1854 argc, argv.start()));
1855
1856 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1857 isolate, replacement, Object::ToString(isolate, replacement_obj));
1858 } else {
1859 DCHECK(!functional_replace);
1860 if (!groups_obj->IsUndefined(isolate)) {
1861 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1862 isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1863 }
1864 VectorBackedMatch m(isolate, string, match, position, &captures,
1865 groups_obj);
1866 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1867 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1868 }
1869
1870 if (position >= next_source_position) {
1871 builder.AppendString(
1872 factory->NewSubString(string, next_source_position, position));
1873 builder.AppendString(replacement);
1874
1875 next_source_position = position + match_length;
1876 }
1877 }
1878
1879 if (next_source_position < length) {
1880 builder.AppendString(
1881 factory->NewSubString(string, next_source_position, length));
1882 }
1883
1884 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1885}
1886
1887RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1888 HandleScope scope(isolate);
1889 DCHECK_EQ(3, args.length());
1890 // TODO(pwong): To follow the spec more closely and simplify calling code,
1891 // this could handle the canonicalization of pattern and flags. See
1892 // https://tc39.github.io/ecma262/#sec-regexpinitialize
1893 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1894 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1895 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1896
1897 RETURN_FAILURE_ON_EXCEPTION(isolate,
1898 JSRegExp::Initialize(regexp, source, flags));
1899
1900 return *regexp;
1901}
1902
1903RUNTIME_FUNCTION(Runtime_IsRegExp) {
1904 SealHandleScope shs(isolate);
1905 DCHECK_EQ(1, args.length());
1906 CONVERT_ARG_CHECKED(Object, obj, 0);
1907 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1908}
1909
1910} // namespace internal
1911} // namespace v8
1912