1 | // Copyright 2014 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #include <functional> |
6 | |
7 | #include "src/arguments-inl.h" |
8 | #include "src/conversions-inl.h" |
9 | #include "src/counters.h" |
10 | #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop. |
11 | #include "src/isolate-inl.h" |
12 | #include "src/message-template.h" |
13 | #include "src/objects/js-array-inl.h" |
14 | #include "src/regexp/jsregexp-inl.h" |
15 | #include "src/regexp/regexp-utils.h" |
16 | #include "src/runtime/runtime-utils.h" |
17 | #include "src/string-builder-inl.h" |
18 | #include "src/string-search.h" |
19 | #include "src/zone/zone-chunk-list.h" |
20 | |
21 | namespace v8 { |
22 | namespace internal { |
23 | |
24 | namespace { |
25 | |
26 | // Returns -1 for failure. |
27 | uint32_t GetArgcForReplaceCallable(uint32_t num_captures, |
28 | bool has_named_captures) { |
29 | const uint32_t kAdditionalArgsWithoutNamedCaptures = 2; |
30 | const uint32_t kAdditionalArgsWithNamedCaptures = 3; |
31 | if (num_captures > Code::kMaxArguments) return -1; |
32 | uint32_t argc = has_named_captures |
33 | ? num_captures + kAdditionalArgsWithNamedCaptures |
34 | : num_captures + kAdditionalArgsWithoutNamedCaptures; |
35 | STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() - |
36 | kAdditionalArgsWithNamedCaptures); |
37 | return (argc > Code::kMaxArguments) ? -1 : argc; |
38 | } |
39 | |
40 | // Looks up the capture of the given name. Returns the (1-based) numbered |
41 | // capture index or -1 on failure. |
42 | int LookupNamedCapture(const std::function<bool(String)>& name_matches, |
43 | FixedArray capture_name_map) { |
44 | // TODO(jgruber): Sort capture_name_map and do binary search via |
45 | // internalized strings. |
46 | |
47 | int maybe_capture_index = -1; |
48 | const int named_capture_count = capture_name_map->length() >> 1; |
49 | for (int j = 0; j < named_capture_count; j++) { |
50 | // The format of {capture_name_map} is documented at |
51 | // JSRegExp::kIrregexpCaptureNameMapIndex. |
52 | const int name_ix = j * 2; |
53 | const int index_ix = j * 2 + 1; |
54 | |
55 | String capture_name = String::cast(capture_name_map->get(name_ix)); |
56 | if (!name_matches(capture_name)) continue; |
57 | |
58 | maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix)); |
59 | break; |
60 | } |
61 | |
62 | return maybe_capture_index; |
63 | } |
64 | |
65 | } // namespace |
66 | |
67 | class CompiledReplacement { |
68 | public: |
69 | explicit CompiledReplacement(Zone* zone) |
70 | : parts_(zone), replacement_substrings_(zone) {} |
71 | |
72 | // Return whether the replacement is simple. |
73 | bool Compile(Isolate* isolate, Handle<JSRegExp> regexp, |
74 | Handle<String> replacement, int capture_count, |
75 | int subject_length); |
76 | |
77 | // Use Apply only if Compile returned false. |
78 | void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, |
79 | int32_t* match); |
80 | |
81 | // Number of distinct parts of the replacement pattern. |
82 | int parts() { return static_cast<int>(parts_.size()); } |
83 | |
84 | private: |
85 | enum PartType { |
86 | SUBJECT_PREFIX = 1, |
87 | SUBJECT_SUFFIX, |
88 | SUBJECT_CAPTURE, |
89 | REPLACEMENT_SUBSTRING, |
90 | REPLACEMENT_STRING, |
91 | EMPTY_REPLACEMENT, |
92 | NUMBER_OF_PART_TYPES |
93 | }; |
94 | |
95 | struct ReplacementPart { |
96 | static inline ReplacementPart SubjectMatch() { |
97 | return ReplacementPart(SUBJECT_CAPTURE, 0); |
98 | } |
99 | static inline ReplacementPart SubjectCapture(int capture_index) { |
100 | return ReplacementPart(SUBJECT_CAPTURE, capture_index); |
101 | } |
102 | static inline ReplacementPart SubjectPrefix() { |
103 | return ReplacementPart(SUBJECT_PREFIX, 0); |
104 | } |
105 | static inline ReplacementPart SubjectSuffix(int subject_length) { |
106 | return ReplacementPart(SUBJECT_SUFFIX, subject_length); |
107 | } |
108 | static inline ReplacementPart ReplacementString() { |
109 | return ReplacementPart(REPLACEMENT_STRING, 0); |
110 | } |
111 | static inline ReplacementPart EmptyReplacement() { |
112 | return ReplacementPart(EMPTY_REPLACEMENT, 0); |
113 | } |
114 | static inline ReplacementPart ReplacementSubString(int from, int to) { |
115 | DCHECK_LE(0, from); |
116 | DCHECK_GT(to, from); |
117 | return ReplacementPart(-from, to); |
118 | } |
119 | |
120 | // If tag <= 0 then it is the negation of a start index of a substring of |
121 | // the replacement pattern, otherwise it's a value from PartType. |
122 | ReplacementPart(int tag, int data) : tag(tag), data(data) { |
123 | // Must be non-positive or a PartType value. |
124 | DCHECK(tag < NUMBER_OF_PART_TYPES); |
125 | } |
126 | // Either a value of PartType or a non-positive number that is |
127 | // the negation of an index into the replacement string. |
128 | int tag; |
129 | // The data value's interpretation depends on the value of tag: |
130 | // tag == SUBJECT_PREFIX || |
131 | // tag == SUBJECT_SUFFIX: data is unused. |
132 | // tag == SUBJECT_CAPTURE: data is the number of the capture. |
133 | // tag == REPLACEMENT_SUBSTRING || |
134 | // tag == REPLACEMENT_STRING: data is index into array of substrings |
135 | // of the replacement string. |
136 | // tag == EMPTY_REPLACEMENT: data is unused. |
137 | // tag <= 0: Temporary representation of the substring of the replacement |
138 | // string ranging over -tag .. data. |
139 | // Is replaced by REPLACEMENT_{SUB,}STRING when we create the |
140 | // substring objects. |
141 | int data; |
142 | }; |
143 | |
144 | template <typename Char> |
145 | bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts, |
146 | Vector<Char> characters, |
147 | FixedArray capture_name_map, int capture_count, |
148 | int subject_length) { |
149 | // Equivalent to String::GetSubstitution, except that this method converts |
150 | // the replacement string into an internal representation that avoids |
151 | // repeated parsing when used repeatedly. |
152 | int length = characters.length(); |
153 | int last = 0; |
154 | for (int i = 0; i < length; i++) { |
155 | Char c = characters[i]; |
156 | if (c == '$') { |
157 | int next_index = i + 1; |
158 | if (next_index == length) { // No next character! |
159 | break; |
160 | } |
161 | Char c2 = characters[next_index]; |
162 | switch (c2) { |
163 | case '$': |
164 | if (i > last) { |
165 | // There is a substring before. Include the first "$". |
166 | parts->push_back( |
167 | ReplacementPart::ReplacementSubString(last, next_index)); |
168 | last = next_index + 1; // Continue after the second "$". |
169 | } else { |
170 | // Let the next substring start with the second "$". |
171 | last = next_index; |
172 | } |
173 | i = next_index; |
174 | break; |
175 | case '`': |
176 | if (i > last) { |
177 | parts->push_back(ReplacementPart::ReplacementSubString(last, i)); |
178 | } |
179 | parts->push_back(ReplacementPart::SubjectPrefix()); |
180 | i = next_index; |
181 | last = i + 1; |
182 | break; |
183 | case '\'': |
184 | if (i > last) { |
185 | parts->push_back(ReplacementPart::ReplacementSubString(last, i)); |
186 | } |
187 | parts->push_back(ReplacementPart::SubjectSuffix(subject_length)); |
188 | i = next_index; |
189 | last = i + 1; |
190 | break; |
191 | case '&': |
192 | if (i > last) { |
193 | parts->push_back(ReplacementPart::ReplacementSubString(last, i)); |
194 | } |
195 | parts->push_back(ReplacementPart::SubjectMatch()); |
196 | i = next_index; |
197 | last = i + 1; |
198 | break; |
199 | case '0': |
200 | case '1': |
201 | case '2': |
202 | case '3': |
203 | case '4': |
204 | case '5': |
205 | case '6': |
206 | case '7': |
207 | case '8': |
208 | case '9': { |
209 | int capture_ref = c2 - '0'; |
210 | if (capture_ref > capture_count) { |
211 | i = next_index; |
212 | continue; |
213 | } |
214 | int second_digit_index = next_index + 1; |
215 | if (second_digit_index < length) { |
216 | // Peek ahead to see if we have two digits. |
217 | Char c3 = characters[second_digit_index]; |
218 | if ('0' <= c3 && c3 <= '9') { // Double digits. |
219 | int double_digit_ref = capture_ref * 10 + c3 - '0'; |
220 | if (double_digit_ref <= capture_count) { |
221 | next_index = second_digit_index; |
222 | capture_ref = double_digit_ref; |
223 | } |
224 | } |
225 | } |
226 | if (capture_ref > 0) { |
227 | if (i > last) { |
228 | parts->push_back( |
229 | ReplacementPart::ReplacementSubString(last, i)); |
230 | } |
231 | DCHECK(capture_ref <= capture_count); |
232 | parts->push_back(ReplacementPart::SubjectCapture(capture_ref)); |
233 | last = next_index + 1; |
234 | } |
235 | i = next_index; |
236 | break; |
237 | } |
238 | case '<': { |
239 | if (capture_name_map.is_null()) { |
240 | i = next_index; |
241 | break; |
242 | } |
243 | |
244 | // Scan until the next '>', and let the enclosed substring be the |
245 | // groupName. |
246 | |
247 | const int name_start_index = next_index + 1; |
248 | int closing_bracket_index = -1; |
249 | for (int j = name_start_index; j < length; j++) { |
250 | if (characters[j] == '>') { |
251 | closing_bracket_index = j; |
252 | break; |
253 | } |
254 | } |
255 | |
256 | // If no closing bracket is found, '$<' is treated as a string |
257 | // literal. |
258 | if (closing_bracket_index == -1) { |
259 | i = next_index; |
260 | break; |
261 | } |
262 | |
263 | Vector<Char> requested_name = |
264 | characters.SubVector(name_start_index, closing_bracket_index); |
265 | |
266 | // Let capture be ? Get(namedCaptures, groupName). |
267 | |
268 | const int capture_index = LookupNamedCapture( |
269 | [=](String capture_name) { |
270 | return capture_name->IsEqualTo(requested_name); |
271 | }, |
272 | capture_name_map); |
273 | |
274 | // If capture is undefined or does not exist, replace the text |
275 | // through the following '>' with the empty string. |
276 | // Otherwise, replace the text through the following '>' with |
277 | // ? ToString(capture). |
278 | |
279 | DCHECK(capture_index == -1 || |
280 | (1 <= capture_index && capture_index <= capture_count)); |
281 | |
282 | if (i > last) { |
283 | parts->push_back(ReplacementPart::ReplacementSubString(last, i)); |
284 | } |
285 | parts->push_back( |
286 | (capture_index == -1) |
287 | ? ReplacementPart::EmptyReplacement() |
288 | : ReplacementPart::SubjectCapture(capture_index)); |
289 | last = closing_bracket_index + 1; |
290 | i = closing_bracket_index; |
291 | break; |
292 | } |
293 | default: |
294 | i = next_index; |
295 | break; |
296 | } |
297 | } |
298 | } |
299 | if (length > last) { |
300 | if (last == 0) { |
301 | // Replacement is simple. Do not use Apply to do the replacement. |
302 | return true; |
303 | } else { |
304 | parts->push_back(ReplacementPart::ReplacementSubString(last, length)); |
305 | } |
306 | } |
307 | return false; |
308 | } |
309 | |
310 | ZoneChunkList<ReplacementPart> parts_; |
311 | ZoneVector<Handle<String>> replacement_substrings_; |
312 | }; |
313 | |
314 | bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp, |
315 | Handle<String> replacement, int capture_count, |
316 | int subject_length) { |
317 | { |
318 | DisallowHeapAllocation no_gc; |
319 | String::FlatContent content = replacement->GetFlatContent(no_gc); |
320 | DCHECK(content.IsFlat()); |
321 | |
322 | FixedArray capture_name_map; |
323 | if (capture_count > 0) { |
324 | DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
325 | Object maybe_capture_name_map = regexp->CaptureNameMap(); |
326 | if (maybe_capture_name_map->IsFixedArray()) { |
327 | capture_name_map = FixedArray::cast(maybe_capture_name_map); |
328 | } |
329 | } |
330 | |
331 | bool simple; |
332 | if (content.IsOneByte()) { |
333 | simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(), |
334 | capture_name_map, capture_count, |
335 | subject_length); |
336 | } else { |
337 | DCHECK(content.IsTwoByte()); |
338 | simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(), |
339 | capture_name_map, capture_count, |
340 | subject_length); |
341 | } |
342 | if (simple) return true; |
343 | } |
344 | |
345 | // Find substrings of replacement string and create them as String objects. |
346 | int substring_index = 0; |
347 | for (ReplacementPart& part : parts_) { |
348 | int tag = part.tag; |
349 | if (tag <= 0) { // A replacement string slice. |
350 | int from = -tag; |
351 | int to = part.data; |
352 | replacement_substrings_.push_back( |
353 | isolate->factory()->NewSubString(replacement, from, to)); |
354 | part.tag = REPLACEMENT_SUBSTRING; |
355 | part.data = substring_index; |
356 | substring_index++; |
357 | } else if (tag == REPLACEMENT_STRING) { |
358 | replacement_substrings_.push_back(replacement); |
359 | part.data = substring_index; |
360 | substring_index++; |
361 | } |
362 | } |
363 | return false; |
364 | } |
365 | |
366 | |
367 | void CompiledReplacement::Apply(ReplacementStringBuilder* builder, |
368 | int match_from, int match_to, int32_t* match) { |
369 | DCHECK_LT(0, parts_.size()); |
370 | for (ReplacementPart& part : parts_) { |
371 | switch (part.tag) { |
372 | case SUBJECT_PREFIX: |
373 | if (match_from > 0) builder->AddSubjectSlice(0, match_from); |
374 | break; |
375 | case SUBJECT_SUFFIX: { |
376 | int subject_length = part.data; |
377 | if (match_to < subject_length) { |
378 | builder->AddSubjectSlice(match_to, subject_length); |
379 | } |
380 | break; |
381 | } |
382 | case SUBJECT_CAPTURE: { |
383 | int capture = part.data; |
384 | int from = match[capture * 2]; |
385 | int to = match[capture * 2 + 1]; |
386 | if (from >= 0 && to > from) { |
387 | builder->AddSubjectSlice(from, to); |
388 | } |
389 | break; |
390 | } |
391 | case REPLACEMENT_SUBSTRING: |
392 | case REPLACEMENT_STRING: |
393 | builder->AddString(replacement_substrings_[part.data]); |
394 | break; |
395 | case EMPTY_REPLACEMENT: |
396 | break; |
397 | default: |
398 | UNREACHABLE(); |
399 | } |
400 | } |
401 | } |
402 | |
403 | void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, |
404 | std::vector<int>* indices, unsigned int limit) { |
405 | DCHECK_LT(0, limit); |
406 | // Collect indices of pattern in subject using memchr. |
407 | // Stop after finding at most limit values. |
408 | const uint8_t* subject_start = subject.start(); |
409 | const uint8_t* subject_end = subject_start + subject.length(); |
410 | const uint8_t* pos = subject_start; |
411 | while (limit > 0) { |
412 | pos = reinterpret_cast<const uint8_t*>( |
413 | memchr(pos, pattern, subject_end - pos)); |
414 | if (pos == nullptr) return; |
415 | indices->push_back(static_cast<int>(pos - subject_start)); |
416 | pos++; |
417 | limit--; |
418 | } |
419 | } |
420 | |
421 | void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, |
422 | std::vector<int>* indices, unsigned int limit) { |
423 | DCHECK_LT(0, limit); |
424 | const uc16* subject_start = subject.start(); |
425 | const uc16* subject_end = subject_start + subject.length(); |
426 | for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { |
427 | if (*pos == pattern) { |
428 | indices->push_back(static_cast<int>(pos - subject_start)); |
429 | limit--; |
430 | } |
431 | } |
432 | } |
433 | |
434 | template <typename SubjectChar, typename PatternChar> |
435 | void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, |
436 | Vector<const PatternChar> pattern, |
437 | std::vector<int>* indices, unsigned int limit) { |
438 | DCHECK_LT(0, limit); |
439 | // Collect indices of pattern in subject. |
440 | // Stop after finding at most limit values. |
441 | int pattern_length = pattern.length(); |
442 | int index = 0; |
443 | StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
444 | while (limit > 0) { |
445 | index = search.Search(subject, index); |
446 | if (index < 0) return; |
447 | indices->push_back(index); |
448 | index += pattern_length; |
449 | limit--; |
450 | } |
451 | } |
452 | |
453 | void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern, |
454 | std::vector<int>* indices, unsigned int limit) { |
455 | { |
456 | DisallowHeapAllocation no_gc; |
457 | String::FlatContent subject_content = subject->GetFlatContent(no_gc); |
458 | String::FlatContent pattern_content = pattern->GetFlatContent(no_gc); |
459 | DCHECK(subject_content.IsFlat()); |
460 | DCHECK(pattern_content.IsFlat()); |
461 | if (subject_content.IsOneByte()) { |
462 | Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
463 | if (pattern_content.IsOneByte()) { |
464 | Vector<const uint8_t> pattern_vector = |
465 | pattern_content.ToOneByteVector(); |
466 | if (pattern_vector.length() == 1) { |
467 | FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, |
468 | limit); |
469 | } else { |
470 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
471 | limit); |
472 | } |
473 | } else { |
474 | FindStringIndices(isolate, subject_vector, |
475 | pattern_content.ToUC16Vector(), indices, limit); |
476 | } |
477 | } else { |
478 | Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
479 | if (pattern_content.IsOneByte()) { |
480 | Vector<const uint8_t> pattern_vector = |
481 | pattern_content.ToOneByteVector(); |
482 | if (pattern_vector.length() == 1) { |
483 | FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
484 | limit); |
485 | } else { |
486 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
487 | limit); |
488 | } |
489 | } else { |
490 | Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); |
491 | if (pattern_vector.length() == 1) { |
492 | FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
493 | limit); |
494 | } else { |
495 | FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
496 | limit); |
497 | } |
498 | } |
499 | } |
500 | } |
501 | } |
502 | |
503 | namespace { |
504 | std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) { |
505 | std::vector<int>* list = isolate->regexp_indices(); |
506 | list->clear(); |
507 | return list; |
508 | } |
509 | |
510 | void TruncateRegexpIndicesList(Isolate* isolate) { |
511 | // Same size as smallest zone segment, preserving behavior from the |
512 | // runtime zone. |
513 | static const int kMaxRegexpIndicesListCapacity = 8 * KB; |
514 | std::vector<int>* indicies = isolate->regexp_indices(); |
515 | if (indicies->capacity() > kMaxRegexpIndicesListCapacity) { |
516 | // Throw away backing storage. |
517 | indicies->clear(); |
518 | indicies->shrink_to_fit(); |
519 | } |
520 | } |
521 | } // namespace |
522 | |
523 | template <typename ResultSeqString> |
524 | V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString( |
525 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, |
526 | Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { |
527 | DCHECK(subject->IsFlat()); |
528 | DCHECK(replacement->IsFlat()); |
529 | |
530 | std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate); |
531 | |
532 | DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
533 | String pattern = |
534 | String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
535 | int subject_len = subject->length(); |
536 | int pattern_len = pattern->length(); |
537 | int replacement_len = replacement->length(); |
538 | |
539 | FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF); |
540 | |
541 | if (indices->empty()) return *subject; |
542 | |
543 | // Detect integer overflow. |
544 | int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - |
545 | static_cast<int64_t>(pattern_len)) * |
546 | static_cast<int64_t>(indices->size()) + |
547 | static_cast<int64_t>(subject_len); |
548 | int result_len; |
549 | if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { |
550 | STATIC_ASSERT(String::kMaxLength < kMaxInt); |
551 | result_len = kMaxInt; // Provoke exception. |
552 | } else { |
553 | result_len = static_cast<int>(result_len_64); |
554 | } |
555 | if (result_len == 0) { |
556 | return ReadOnlyRoots(isolate).empty_string(); |
557 | } |
558 | |
559 | int subject_pos = 0; |
560 | int result_pos = 0; |
561 | |
562 | MaybeHandle<SeqString> maybe_res; |
563 | if (ResultSeqString::kHasOneByteEncoding) { |
564 | maybe_res = isolate->factory()->NewRawOneByteString(result_len); |
565 | } else { |
566 | maybe_res = isolate->factory()->NewRawTwoByteString(result_len); |
567 | } |
568 | Handle<SeqString> untyped_res; |
569 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); |
570 | Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); |
571 | |
572 | DisallowHeapAllocation no_gc; |
573 | for (int index : *indices) { |
574 | // Copy non-matched subject content. |
575 | if (subject_pos < index) { |
576 | String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos, |
577 | subject_pos, index); |
578 | result_pos += index - subject_pos; |
579 | } |
580 | |
581 | // Replace match. |
582 | if (replacement_len > 0) { |
583 | String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0, |
584 | replacement_len); |
585 | result_pos += replacement_len; |
586 | } |
587 | |
588 | subject_pos = index + pattern_len; |
589 | } |
590 | // Add remaining subject content at the end. |
591 | if (subject_pos < subject_len) { |
592 | String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos, |
593 | subject_pos, subject_len); |
594 | } |
595 | |
596 | int32_t match_indices[] = {indices->back(), indices->back() + pattern_len}; |
597 | RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0, |
598 | match_indices); |
599 | |
600 | TruncateRegexpIndicesList(isolate); |
601 | |
602 | return *result; |
603 | } |
604 | |
605 | V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString( |
606 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
607 | Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) { |
608 | DCHECK(subject->IsFlat()); |
609 | DCHECK(replacement->IsFlat()); |
610 | |
611 | int capture_count = regexp->CaptureCount(); |
612 | int subject_length = subject->length(); |
613 | |
614 | JSRegExp::Type typeTag = regexp->TypeTag(); |
615 | if (typeTag == JSRegExp::IRREGEXP) { |
616 | // Ensure the RegExp is compiled so we can access the capture-name map. |
617 | if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) { |
618 | DCHECK(isolate->has_pending_exception()); |
619 | return ReadOnlyRoots(isolate).exception(); |
620 | } |
621 | } |
622 | |
623 | // CompiledReplacement uses zone allocation. |
624 | Zone zone(isolate->allocator(), ZONE_NAME); |
625 | CompiledReplacement compiled_replacement(&zone); |
626 | const bool simple_replace = compiled_replacement.Compile( |
627 | isolate, regexp, replacement, capture_count, subject_length); |
628 | |
629 | // Shortcut for simple non-regexp global replacements |
630 | if (typeTag == JSRegExp::ATOM && simple_replace) { |
631 | if (subject->IsOneByteRepresentation() && |
632 | replacement->IsOneByteRepresentation()) { |
633 | return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
634 | isolate, subject, regexp, replacement, last_match_info); |
635 | } else { |
636 | return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
637 | isolate, subject, regexp, replacement, last_match_info); |
638 | } |
639 | } |
640 | |
641 | RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); |
642 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
643 | |
644 | int32_t* current_match = global_cache.FetchNext(); |
645 | if (current_match == nullptr) { |
646 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
647 | return *subject; |
648 | } |
649 | |
650 | // Guessing the number of parts that the final result string is built |
651 | // from. Global regexps can match any number of times, so we guess |
652 | // conservatively. |
653 | int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1; |
654 | ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts); |
655 | |
656 | int prev = 0; |
657 | |
658 | do { |
659 | int start = current_match[0]; |
660 | int end = current_match[1]; |
661 | |
662 | if (prev < start) { |
663 | builder.AddSubjectSlice(prev, start); |
664 | } |
665 | |
666 | if (simple_replace) { |
667 | builder.AddString(replacement); |
668 | } else { |
669 | compiled_replacement.Apply(&builder, start, end, current_match); |
670 | } |
671 | prev = end; |
672 | |
673 | current_match = global_cache.FetchNext(); |
674 | } while (current_match != nullptr); |
675 | |
676 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
677 | |
678 | if (prev < subject_length) { |
679 | builder.AddSubjectSlice(prev, subject_length); |
680 | } |
681 | |
682 | RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, |
683 | global_cache.LastSuccessfulMatch()); |
684 | |
685 | RETURN_RESULT_OR_FAILURE(isolate, builder.ToString()); |
686 | } |
687 | |
688 | template <typename ResultSeqString> |
689 | V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString( |
690 | Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
691 | Handle<RegExpMatchInfo> last_match_info) { |
692 | DCHECK(subject->IsFlat()); |
693 | |
694 | // Shortcut for simple non-regexp global replacements |
695 | if (regexp->TypeTag() == JSRegExp::ATOM) { |
696 | Handle<String> empty_string = isolate->factory()->empty_string(); |
697 | if (subject->IsOneByteRepresentation()) { |
698 | return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
699 | isolate, subject, regexp, empty_string, last_match_info); |
700 | } else { |
701 | return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
702 | isolate, subject, regexp, empty_string, last_match_info); |
703 | } |
704 | } |
705 | |
706 | RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); |
707 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
708 | |
709 | int32_t* current_match = global_cache.FetchNext(); |
710 | if (current_match == nullptr) { |
711 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
712 | return *subject; |
713 | } |
714 | |
715 | int start = current_match[0]; |
716 | int end = current_match[1]; |
717 | int capture_count = regexp->CaptureCount(); |
718 | int subject_length = subject->length(); |
719 | |
720 | int new_length = subject_length - (end - start); |
721 | if (new_length == 0) return ReadOnlyRoots(isolate).empty_string(); |
722 | |
723 | Handle<ResultSeqString> answer; |
724 | if (ResultSeqString::kHasOneByteEncoding) { |
725 | answer = Handle<ResultSeqString>::cast( |
726 | isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked()); |
727 | } else { |
728 | answer = Handle<ResultSeqString>::cast( |
729 | isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked()); |
730 | } |
731 | |
732 | int prev = 0; |
733 | int position = 0; |
734 | |
735 | DisallowHeapAllocation no_gc; |
736 | do { |
737 | start = current_match[0]; |
738 | end = current_match[1]; |
739 | if (prev < start) { |
740 | // Add substring subject[prev;start] to answer string. |
741 | String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev, |
742 | start); |
743 | position += start - prev; |
744 | } |
745 | prev = end; |
746 | |
747 | current_match = global_cache.FetchNext(); |
748 | } while (current_match != nullptr); |
749 | |
750 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
751 | |
752 | RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, |
753 | global_cache.LastSuccessfulMatch()); |
754 | |
755 | if (prev < subject_length) { |
756 | // Add substring subject[prev;length] to answer string. |
757 | String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev, |
758 | subject_length); |
759 | position += subject_length - prev; |
760 | } |
761 | |
762 | if (position == 0) return ReadOnlyRoots(isolate).empty_string(); |
763 | |
764 | // Shorten string and fill |
765 | int string_size = ResultSeqString::SizeFor(position); |
766 | int allocated_string_size = ResultSeqString::SizeFor(new_length); |
767 | int delta = allocated_string_size - string_size; |
768 | |
769 | answer->set_length(position); |
770 | if (delta == 0) return *answer; |
771 | |
772 | Address end_of_string = answer->address() + string_size; |
773 | Heap* heap = isolate->heap(); |
774 | |
775 | // The trimming is performed on a newly allocated object, which is on a |
776 | // freshly allocated page or on an already swept page. Hence, the sweeper |
777 | // thread can not get confused with the filler creation. No synchronization |
778 | // needed. |
779 | // TODO(hpayer): We should shrink the large object page if the size |
780 | // of the object changed significantly. |
781 | if (!heap->IsLargeObject(*answer)) { |
782 | heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo); |
783 | } |
784 | return *answer; |
785 | } |
786 | |
787 | RUNTIME_FUNCTION(Runtime_StringSplit) { |
788 | HandleScope handle_scope(isolate); |
789 | DCHECK_EQ(3, args.length()); |
790 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
791 | CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1); |
792 | CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
793 | CHECK_LT(0, limit); |
794 | |
795 | int subject_length = subject->length(); |
796 | int pattern_length = pattern->length(); |
797 | CHECK_LT(0, pattern_length); |
798 | |
799 | if (limit == 0xFFFFFFFFu) { |
800 | FixedArray last_match_cache_unused; |
801 | Handle<Object> cached_answer( |
802 | RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern, |
803 | &last_match_cache_unused, |
804 | RegExpResultsCache::STRING_SPLIT_SUBSTRINGS), |
805 | isolate); |
806 | if (*cached_answer != Smi::kZero) { |
807 | // The cache FixedArray is a COW-array and can therefore be reused. |
808 | Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements( |
809 | Handle<FixedArray>::cast(cached_answer)); |
810 | return *result; |
811 | } |
812 | } |
813 | |
814 | // The limit can be very large (0xFFFFFFFFu), but since the pattern |
815 | // isn't empty, we can never create more parts than ~half the length |
816 | // of the subject. |
817 | |
818 | subject = String::Flatten(isolate, subject); |
819 | pattern = String::Flatten(isolate, pattern); |
820 | |
821 | std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate); |
822 | |
823 | FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit); |
824 | |
825 | if (static_cast<uint32_t>(indices->size()) < limit) { |
826 | indices->push_back(subject_length); |
827 | } |
828 | |
829 | // The list indices now contains the end of each part to create. |
830 | |
831 | // Create JSArray of substrings separated by separator. |
832 | int part_count = static_cast<int>(indices->size()); |
833 | |
834 | Handle<JSArray> result = |
835 | isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count, |
836 | INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); |
837 | |
838 | DCHECK(result->HasObjectElements()); |
839 | |
840 | Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate); |
841 | |
842 | if (part_count == 1 && indices->at(0) == subject_length) { |
843 | elements->set(0, *subject); |
844 | } else { |
845 | int part_start = 0; |
846 | FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, { |
847 | int part_end = indices->at(i); |
848 | Handle<String> substring = |
849 | isolate->factory()->NewProperSubString(subject, part_start, part_end); |
850 | elements->set(i, *substring); |
851 | part_start = part_end + pattern_length; |
852 | }); |
853 | } |
854 | |
855 | if (limit == 0xFFFFFFFFu) { |
856 | if (result->HasObjectElements()) { |
857 | RegExpResultsCache::Enter(isolate, subject, pattern, elements, |
858 | isolate->factory()->empty_fixed_array(), |
859 | RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); |
860 | } |
861 | } |
862 | |
863 | TruncateRegexpIndicesList(isolate); |
864 | |
865 | return *result; |
866 | } |
867 | |
868 | RUNTIME_FUNCTION(Runtime_RegExpExec) { |
869 | HandleScope scope(isolate); |
870 | DCHECK_EQ(4, args.length()); |
871 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
872 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
873 | CONVERT_INT32_ARG_CHECKED(index, 2); |
874 | CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3); |
875 | // Due to the way the JS calls are constructed this must be less than the |
876 | // length of a string, i.e. it is always a Smi. We check anyway for security. |
877 | CHECK_LE(0, index); |
878 | CHECK_GE(subject->length(), index); |
879 | isolate->counters()->regexp_entry_runtime()->Increment(); |
880 | RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject, |
881 | index, last_match_info)); |
882 | } |
883 | |
884 | namespace { |
885 | |
886 | class MatchInfoBackedMatch : public String::Match { |
887 | public: |
888 | MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp, |
889 | Handle<String> subject, |
890 | Handle<RegExpMatchInfo> match_info) |
891 | : isolate_(isolate), match_info_(match_info) { |
892 | subject_ = String::Flatten(isolate, subject); |
893 | |
894 | if (regexp->TypeTag() == JSRegExp::IRREGEXP) { |
895 | Object o = regexp->CaptureNameMap(); |
896 | has_named_captures_ = o->IsFixedArray(); |
897 | if (has_named_captures_) { |
898 | capture_name_map_ = handle(FixedArray::cast(o), isolate); |
899 | } |
900 | } else { |
901 | has_named_captures_ = false; |
902 | } |
903 | } |
904 | |
905 | Handle<String> GetMatch() override { |
906 | return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr); |
907 | } |
908 | |
909 | Handle<String> GetPrefix() override { |
910 | const int match_start = match_info_->Capture(0); |
911 | return isolate_->factory()->NewSubString(subject_, 0, match_start); |
912 | } |
913 | |
914 | Handle<String> GetSuffix() override { |
915 | const int match_end = match_info_->Capture(1); |
916 | return isolate_->factory()->NewSubString(subject_, match_end, |
917 | subject_->length()); |
918 | } |
919 | |
920 | bool HasNamedCaptures() override { return has_named_captures_; } |
921 | |
922 | int CaptureCount() override { |
923 | return match_info_->NumberOfCaptureRegisters() / 2; |
924 | } |
925 | |
926 | MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { |
927 | Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter( |
928 | isolate_, match_info_, i, capture_exists); |
929 | return (*capture_exists) ? Object::ToString(isolate_, capture_obj) |
930 | : isolate_->factory()->empty_string(); |
931 | } |
932 | |
933 | MaybeHandle<String> GetNamedCapture(Handle<String> name, |
934 | CaptureState* state) override { |
935 | DCHECK(has_named_captures_); |
936 | const int capture_index = LookupNamedCapture( |
937 | [=](String capture_name) { return capture_name->Equals(*name); }, |
938 | *capture_name_map_); |
939 | |
940 | if (capture_index == -1) { |
941 | *state = INVALID; |
942 | return name; // Arbitrary string handle. |
943 | } |
944 | |
945 | DCHECK(1 <= capture_index && capture_index <= CaptureCount()); |
946 | |
947 | bool capture_exists; |
948 | Handle<String> capture_value; |
949 | ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value, |
950 | GetCapture(capture_index, &capture_exists), |
951 | String); |
952 | |
953 | if (!capture_exists) { |
954 | *state = UNMATCHED; |
955 | return isolate_->factory()->empty_string(); |
956 | } else { |
957 | *state = MATCHED; |
958 | return capture_value; |
959 | } |
960 | } |
961 | |
962 | private: |
963 | Isolate* isolate_; |
964 | Handle<String> subject_; |
965 | Handle<RegExpMatchInfo> match_info_; |
966 | |
967 | bool has_named_captures_; |
968 | Handle<FixedArray> capture_name_map_; |
969 | }; |
970 | |
971 | class VectorBackedMatch : public String::Match { |
972 | public: |
973 | VectorBackedMatch(Isolate* isolate, Handle<String> subject, |
974 | Handle<String> match, int match_position, |
975 | ZoneVector<Handle<Object>>* captures, |
976 | Handle<Object> groups_obj) |
977 | : isolate_(isolate), |
978 | match_(match), |
979 | match_position_(match_position), |
980 | captures_(captures) { |
981 | subject_ = String::Flatten(isolate, subject); |
982 | |
983 | DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver()); |
984 | has_named_captures_ = !groups_obj->IsUndefined(isolate); |
985 | if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj); |
986 | } |
987 | |
988 | Handle<String> GetMatch() override { return match_; } |
989 | |
990 | Handle<String> GetPrefix() override { |
991 | return isolate_->factory()->NewSubString(subject_, 0, match_position_); |
992 | } |
993 | |
994 | Handle<String> GetSuffix() override { |
995 | const int match_end_position = match_position_ + match_->length(); |
996 | return isolate_->factory()->NewSubString(subject_, match_end_position, |
997 | subject_->length()); |
998 | } |
999 | |
1000 | bool HasNamedCaptures() override { return has_named_captures_; } |
1001 | |
1002 | int CaptureCount() override { return static_cast<int>(captures_->size()); } |
1003 | |
1004 | MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { |
1005 | Handle<Object> capture_obj = captures_->at(i); |
1006 | if (capture_obj->IsUndefined(isolate_)) { |
1007 | *capture_exists = false; |
1008 | return isolate_->factory()->empty_string(); |
1009 | } |
1010 | *capture_exists = true; |
1011 | return Object::ToString(isolate_, capture_obj); |
1012 | } |
1013 | |
1014 | MaybeHandle<String> GetNamedCapture(Handle<String> name, |
1015 | CaptureState* state) override { |
1016 | DCHECK(has_named_captures_); |
1017 | |
1018 | Maybe<bool> maybe_capture_exists = |
1019 | JSReceiver::HasProperty(groups_obj_, name); |
1020 | if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>(); |
1021 | |
1022 | if (!maybe_capture_exists.FromJust()) { |
1023 | *state = INVALID; |
1024 | return name; // Arbitrary string handle. |
1025 | } |
1026 | |
1027 | Handle<Object> capture_obj; |
1028 | ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj, |
1029 | Object::GetProperty(isolate_, groups_obj_, name), |
1030 | String); |
1031 | if (capture_obj->IsUndefined(isolate_)) { |
1032 | *state = UNMATCHED; |
1033 | return isolate_->factory()->empty_string(); |
1034 | } else { |
1035 | *state = MATCHED; |
1036 | return Object::ToString(isolate_, capture_obj); |
1037 | } |
1038 | } |
1039 | |
1040 | private: |
1041 | Isolate* isolate_; |
1042 | Handle<String> subject_; |
1043 | Handle<String> match_; |
1044 | const int match_position_; |
1045 | ZoneVector<Handle<Object>>* captures_; |
1046 | |
1047 | bool has_named_captures_; |
1048 | Handle<JSReceiver> groups_obj_; |
1049 | }; |
1050 | |
1051 | // Create the groups object (see also the RegExp result creation in |
1052 | // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo). |
1053 | Handle<JSObject> ConstructNamedCaptureGroupsObject( |
1054 | Isolate* isolate, Handle<FixedArray> capture_map, |
1055 | const std::function<Object(int)>& f_get_capture) { |
1056 | Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto(); |
1057 | |
1058 | const int capture_count = capture_map->length() >> 1; |
1059 | for (int i = 0; i < capture_count; i++) { |
1060 | const int name_ix = i * 2; |
1061 | const int index_ix = i * 2 + 1; |
1062 | |
1063 | Handle<String> capture_name(String::cast(capture_map->get(name_ix)), |
1064 | isolate); |
1065 | const int capture_ix = Smi::ToInt(capture_map->get(index_ix)); |
1066 | DCHECK(1 <= capture_ix && capture_ix <= capture_count); |
1067 | |
1068 | Handle<Object> capture_value(f_get_capture(capture_ix), isolate); |
1069 | DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString()); |
1070 | |
1071 | JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE); |
1072 | } |
1073 | |
1074 | return groups; |
1075 | } |
1076 | |
1077 | // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain |
1078 | // separate last match info. See comment on that function. |
1079 | template <bool has_capture> |
1080 | static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, |
1081 | Handle<JSRegExp> regexp, |
1082 | Handle<RegExpMatchInfo> last_match_array, |
1083 | Handle<JSArray> result_array) { |
1084 | DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); |
1085 | DCHECK_NE(has_capture, regexp->CaptureCount() == 0); |
1086 | DCHECK(subject->IsFlat()); |
1087 | |
1088 | int capture_count = regexp->CaptureCount(); |
1089 | int subject_length = subject->length(); |
1090 | |
1091 | static const int kMinLengthToCache = 0x1000; |
1092 | |
1093 | if (subject_length > kMinLengthToCache) { |
1094 | FixedArray last_match_cache; |
1095 | Object cached_answer = RegExpResultsCache::Lookup( |
1096 | isolate->heap(), *subject, regexp->data(), &last_match_cache, |
1097 | RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
1098 | if (cached_answer->IsFixedArray()) { |
1099 | int capture_registers = (capture_count + 1) * 2; |
1100 | int32_t* last_match = NewArray<int32_t>(capture_registers); |
1101 | for (int i = 0; i < capture_registers; i++) { |
1102 | last_match[i] = Smi::ToInt(last_match_cache->get(i)); |
1103 | } |
1104 | Handle<FixedArray> cached_fixed_array = |
1105 | Handle<FixedArray>(FixedArray::cast(cached_answer), isolate); |
1106 | // The cache FixedArray is a COW-array and we need to return a copy. |
1107 | Handle<FixedArray> copied_fixed_array = |
1108 | isolate->factory()->CopyFixedArrayWithMap( |
1109 | cached_fixed_array, isolate->factory()->fixed_array_map()); |
1110 | JSArray::SetContent(result_array, copied_fixed_array); |
1111 | RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject, |
1112 | capture_count, last_match); |
1113 | DeleteArray(last_match); |
1114 | return *result_array; |
1115 | } |
1116 | } |
1117 | |
1118 | RegExpImpl::GlobalCache global_cache(regexp, subject, isolate); |
1119 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
1120 | |
1121 | // Ensured in Runtime_RegExpExecMultiple. |
1122 | DCHECK(result_array->HasObjectElements()); |
1123 | Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()), |
1124 | isolate); |
1125 | if (result_elements->length() < 16) { |
1126 | result_elements = isolate->factory()->NewFixedArrayWithHoles(16); |
1127 | } |
1128 | |
1129 | FixedArrayBuilder builder(result_elements); |
1130 | |
1131 | // Position to search from. |
1132 | int match_start = -1; |
1133 | int match_end = 0; |
1134 | bool first = true; |
1135 | |
1136 | // Two smis before and after the match, for very long strings. |
1137 | static const int kMaxBuilderEntriesPerRegExpMatch = 5; |
1138 | |
1139 | while (true) { |
1140 | int32_t* current_match = global_cache.FetchNext(); |
1141 | if (current_match == nullptr) break; |
1142 | match_start = current_match[0]; |
1143 | builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch); |
1144 | if (match_end < match_start) { |
1145 | ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
1146 | match_start); |
1147 | } |
1148 | match_end = current_match[1]; |
1149 | { |
1150 | // Avoid accumulating new handles inside loop. |
1151 | HandleScope temp_scope(isolate); |
1152 | Handle<String> match; |
1153 | if (!first) { |
1154 | match = isolate->factory()->NewProperSubString(subject, match_start, |
1155 | match_end); |
1156 | } else { |
1157 | match = |
1158 | isolate->factory()->NewSubString(subject, match_start, match_end); |
1159 | first = false; |
1160 | } |
1161 | |
1162 | if (has_capture) { |
1163 | // Arguments array to replace function is match, captures, index and |
1164 | // subject, i.e., 3 + capture count in total. If the RegExp contains |
1165 | // named captures, they are also passed as the last argument. |
1166 | |
1167 | Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate); |
1168 | const bool has_named_captures = maybe_capture_map->IsFixedArray(); |
1169 | |
1170 | const int argc = |
1171 | has_named_captures ? 4 + capture_count : 3 + capture_count; |
1172 | |
1173 | Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc); |
1174 | int cursor = 0; |
1175 | |
1176 | elements->set(cursor++, *match); |
1177 | for (int i = 1; i <= capture_count; i++) { |
1178 | int start = current_match[i * 2]; |
1179 | if (start >= 0) { |
1180 | int end = current_match[i * 2 + 1]; |
1181 | DCHECK(start <= end); |
1182 | Handle<String> substring = |
1183 | isolate->factory()->NewSubString(subject, start, end); |
1184 | elements->set(cursor++, *substring); |
1185 | } else { |
1186 | DCHECK_GT(0, current_match[i * 2 + 1]); |
1187 | elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value()); |
1188 | } |
1189 | } |
1190 | |
1191 | elements->set(cursor++, Smi::FromInt(match_start)); |
1192 | elements->set(cursor++, *subject); |
1193 | |
1194 | if (has_named_captures) { |
1195 | Handle<FixedArray> capture_map = |
1196 | Handle<FixedArray>::cast(maybe_capture_map); |
1197 | Handle<JSObject> groups = ConstructNamedCaptureGroupsObject( |
1198 | isolate, capture_map, [=](int ix) { return elements->get(ix); }); |
1199 | elements->set(cursor++, *groups); |
1200 | } |
1201 | |
1202 | DCHECK_EQ(cursor, argc); |
1203 | builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); |
1204 | } else { |
1205 | builder.Add(*match); |
1206 | } |
1207 | } |
1208 | } |
1209 | |
1210 | if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception(); |
1211 | |
1212 | if (match_start >= 0) { |
1213 | // Finished matching, with at least one match. |
1214 | if (match_end < subject_length) { |
1215 | ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, |
1216 | subject_length); |
1217 | } |
1218 | |
1219 | RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject, |
1220 | capture_count, |
1221 | global_cache.LastSuccessfulMatch()); |
1222 | |
1223 | if (subject_length > kMinLengthToCache) { |
1224 | // Store the last successful match into the array for caching. |
1225 | // TODO(yangguo): do not expose last match to JS and simplify caching. |
1226 | int capture_registers = (capture_count + 1) * 2; |
1227 | Handle<FixedArray> last_match_cache = |
1228 | isolate->factory()->NewFixedArray(capture_registers); |
1229 | int32_t* last_match = global_cache.LastSuccessfulMatch(); |
1230 | for (int i = 0; i < capture_registers; i++) { |
1231 | last_match_cache->set(i, Smi::FromInt(last_match[i])); |
1232 | } |
1233 | Handle<FixedArray> result_fixed_array = |
1234 | FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length()); |
1235 | // Cache the result and copy the FixedArray into a COW array. |
1236 | Handle<FixedArray> copied_fixed_array = |
1237 | isolate->factory()->CopyFixedArrayWithMap( |
1238 | result_fixed_array, isolate->factory()->fixed_array_map()); |
1239 | RegExpResultsCache::Enter( |
1240 | isolate, subject, handle(regexp->data(), isolate), copied_fixed_array, |
1241 | last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES); |
1242 | } |
1243 | return *builder.ToJSArray(result_array); |
1244 | } else { |
1245 | return ReadOnlyRoots(isolate).null_value(); // No matches at all. |
1246 | } |
1247 | } |
1248 | |
1249 | // Legacy implementation of RegExp.prototype[Symbol.replace] which |
1250 | // doesn't properly call the underlying exec method. |
1251 | V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace( |
1252 | Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string, |
1253 | Handle<String> replace) { |
1254 | // Functional fast-paths are dispatched directly by replace builtin. |
1255 | DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); |
1256 | |
1257 | Factory* factory = isolate->factory(); |
1258 | |
1259 | const int flags = regexp->GetFlags(); |
1260 | const bool global = (flags & JSRegExp::kGlobal) != 0; |
1261 | const bool sticky = (flags & JSRegExp::kSticky) != 0; |
1262 | |
1263 | replace = String::Flatten(isolate, replace); |
1264 | |
1265 | Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); |
1266 | |
1267 | if (!global) { |
1268 | // Non-global regexp search, string replace. |
1269 | |
1270 | uint32_t last_index = 0; |
1271 | if (sticky) { |
1272 | Handle<Object> last_index_obj(regexp->last_index(), isolate); |
1273 | ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, |
1274 | Object::ToLength(isolate, last_index_obj), |
1275 | String); |
1276 | last_index = PositiveNumberToUint32(*last_index_obj); |
1277 | } |
1278 | |
1279 | Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(), |
1280 | isolate); |
1281 | |
1282 | // A lastIndex exceeding the string length always returns null (signalling |
1283 | // failure) in RegExpBuiltinExec, thus we can skip the call. |
1284 | if (last_index <= static_cast<uint32_t>(string->length())) { |
1285 | ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj, |
1286 | RegExpImpl::Exec(isolate, regexp, string, |
1287 | last_index, last_match_info), |
1288 | String); |
1289 | } |
1290 | |
1291 | if (match_indices_obj->IsNull(isolate)) { |
1292 | if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER); |
1293 | return string; |
1294 | } |
1295 | |
1296 | auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj); |
1297 | |
1298 | const int start_index = match_indices->Capture(0); |
1299 | const int end_index = match_indices->Capture(1); |
1300 | |
1301 | if (sticky) { |
1302 | regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER); |
1303 | } |
1304 | |
1305 | IncrementalStringBuilder builder(isolate); |
1306 | builder.AppendString(factory->NewSubString(string, 0, start_index)); |
1307 | |
1308 | if (replace->length() > 0) { |
1309 | MatchInfoBackedMatch m(isolate, regexp, string, match_indices); |
1310 | Handle<String> replacement; |
1311 | ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement, |
1312 | String::GetSubstitution(isolate, &m, replace), |
1313 | String); |
1314 | builder.AppendString(replacement); |
1315 | } |
1316 | |
1317 | builder.AppendString( |
1318 | factory->NewSubString(string, end_index, string->length())); |
1319 | return builder.Finish(); |
1320 | } else { |
1321 | // Global regexp search, string replace. |
1322 | DCHECK(global); |
1323 | RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), |
1324 | String); |
1325 | |
1326 | if (replace->length() == 0) { |
1327 | if (string->IsOneByteRepresentation()) { |
1328 | Object result = |
1329 | StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>( |
1330 | isolate, string, regexp, last_match_info); |
1331 | return handle(String::cast(result), isolate); |
1332 | } else { |
1333 | Object result = |
1334 | StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>( |
1335 | isolate, string, regexp, last_match_info); |
1336 | return handle(String::cast(result), isolate); |
1337 | } |
1338 | } |
1339 | |
1340 | Object result = StringReplaceGlobalRegExpWithString( |
1341 | isolate, string, regexp, replace, last_match_info); |
1342 | if (result->IsString()) { |
1343 | return handle(String::cast(result), isolate); |
1344 | } else { |
1345 | return MaybeHandle<String>(); |
1346 | } |
1347 | } |
1348 | |
1349 | UNREACHABLE(); |
1350 | } |
1351 | |
1352 | } // namespace |
1353 | |
1354 | // This is only called for StringReplaceGlobalRegExpWithFunction. |
1355 | RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) { |
1356 | HandleScope handles(isolate); |
1357 | DCHECK_EQ(4, args.length()); |
1358 | |
1359 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
1360 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); |
1361 | CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2); |
1362 | CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); |
1363 | |
1364 | DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); |
1365 | CHECK(result_array->HasObjectElements()); |
1366 | |
1367 | subject = String::Flatten(isolate, subject); |
1368 | CHECK(regexp->GetFlags() & JSRegExp::kGlobal); |
1369 | |
1370 | Object result; |
1371 | if (regexp->CaptureCount() == 0) { |
1372 | result = SearchRegExpMultiple<false>(isolate, subject, regexp, |
1373 | last_match_info, result_array); |
1374 | } else { |
1375 | result = SearchRegExpMultiple<true>(isolate, subject, regexp, |
1376 | last_match_info, result_array); |
1377 | } |
1378 | DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); |
1379 | return result; |
1380 | } |
1381 | |
1382 | RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) { |
1383 | HandleScope scope(isolate); |
1384 | DCHECK_EQ(3, args.length()); |
1385 | CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); |
1386 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); |
1387 | CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2); |
1388 | |
1389 | DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp)); |
1390 | DCHECK(replace_obj->map()->is_callable()); |
1391 | |
1392 | Factory* factory = isolate->factory(); |
1393 | Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); |
1394 | |
1395 | const int flags = regexp->GetFlags(); |
1396 | DCHECK_EQ(flags & JSRegExp::kGlobal, 0); |
1397 | |
1398 | // TODO(jgruber): This should be an easy port to CSA with massive payback. |
1399 | |
1400 | const bool sticky = (flags & JSRegExp::kSticky) != 0; |
1401 | uint32_t last_index = 0; |
1402 | if (sticky) { |
1403 | Handle<Object> last_index_obj(regexp->last_index(), isolate); |
1404 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1405 | isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); |
1406 | last_index = PositiveNumberToUint32(*last_index_obj); |
1407 | } |
1408 | |
1409 | Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(), |
1410 | isolate); |
1411 | |
1412 | // A lastIndex exceeding the string length always returns null (signalling |
1413 | // failure) in RegExpBuiltinExec, thus we can skip the call. |
1414 | if (last_index <= static_cast<uint32_t>(subject->length())) { |
1415 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1416 | isolate, match_indices_obj, |
1417 | RegExpImpl::Exec(isolate, regexp, subject, last_index, |
1418 | last_match_info)); |
1419 | } |
1420 | |
1421 | if (match_indices_obj->IsNull(isolate)) { |
1422 | if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER); |
1423 | return *subject; |
1424 | } |
1425 | |
1426 | Handle<RegExpMatchInfo> match_indices = |
1427 | Handle<RegExpMatchInfo>::cast(match_indices_obj); |
1428 | |
1429 | const int index = match_indices->Capture(0); |
1430 | const int end_of_match = match_indices->Capture(1); |
1431 | |
1432 | if (sticky) { |
1433 | regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER); |
1434 | } |
1435 | |
1436 | IncrementalStringBuilder builder(isolate); |
1437 | builder.AppendString(factory->NewSubString(subject, 0, index)); |
1438 | |
1439 | // Compute the parameter list consisting of the match, captures, index, |
1440 | // and subject for the replace function invocation. If the RegExp contains |
1441 | // named captures, they are also passed as the last argument. |
1442 | |
1443 | // The number of captures plus one for the match. |
1444 | const int m = match_indices->NumberOfCaptureRegisters() / 2; |
1445 | |
1446 | bool has_named_captures = false; |
1447 | Handle<FixedArray> capture_map; |
1448 | if (m > 1) { |
1449 | // The existence of capture groups implies IRREGEXP kind. |
1450 | DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
1451 | |
1452 | Object maybe_capture_map = regexp->CaptureNameMap(); |
1453 | if (maybe_capture_map->IsFixedArray()) { |
1454 | has_named_captures = true; |
1455 | capture_map = handle(FixedArray::cast(maybe_capture_map), isolate); |
1456 | } |
1457 | } |
1458 | |
1459 | const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures); |
1460 | if (argc == static_cast<uint32_t>(-1)) { |
1461 | THROW_NEW_ERROR_RETURN_FAILURE( |
1462 | isolate, NewRangeError(MessageTemplate::kTooManyArguments)); |
1463 | } |
1464 | ScopedVector<Handle<Object>> argv(argc); |
1465 | |
1466 | int cursor = 0; |
1467 | for (int j = 0; j < m; j++) { |
1468 | bool ok; |
1469 | Handle<String> capture = |
1470 | RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok); |
1471 | if (ok) { |
1472 | argv[cursor++] = capture; |
1473 | } else { |
1474 | argv[cursor++] = factory->undefined_value(); |
1475 | } |
1476 | } |
1477 | |
1478 | argv[cursor++] = handle(Smi::FromInt(index), isolate); |
1479 | argv[cursor++] = subject; |
1480 | |
1481 | if (has_named_captures) { |
1482 | argv[cursor++] = ConstructNamedCaptureGroupsObject( |
1483 | isolate, capture_map, [&argv](int ix) { return *argv[ix]; }); |
1484 | } |
1485 | |
1486 | DCHECK_EQ(cursor, argc); |
1487 | |
1488 | Handle<Object> replacement_obj; |
1489 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1490 | isolate, replacement_obj, |
1491 | Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, |
1492 | argv.start())); |
1493 | |
1494 | Handle<String> replacement; |
1495 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1496 | isolate, replacement, Object::ToString(isolate, replacement_obj)); |
1497 | |
1498 | builder.AppendString(replacement); |
1499 | builder.AppendString( |
1500 | factory->NewSubString(subject, end_of_match, subject->length())); |
1501 | |
1502 | RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); |
1503 | } |
1504 | |
1505 | namespace { |
1506 | |
1507 | V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate, |
1508 | Handle<Object> object, |
1509 | uint32_t* out) { |
1510 | if (object->IsUndefined(isolate)) { |
1511 | *out = kMaxUInt32; |
1512 | return object; |
1513 | } |
1514 | |
1515 | Handle<Object> number; |
1516 | ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object), |
1517 | Object); |
1518 | *out = NumberToUint32(*number); |
1519 | return object; |
1520 | } |
1521 | |
1522 | Handle<JSArray> NewJSArrayWithElements(Isolate* isolate, |
1523 | Handle<FixedArray> elems, |
1524 | int num_elems) { |
1525 | return isolate->factory()->NewJSArrayWithElements( |
1526 | FixedArray::ShrinkOrEmpty(isolate, elems, num_elems)); |
1527 | } |
1528 | |
1529 | } // namespace |
1530 | |
1531 | // Slow path for: |
1532 | // ES#sec-regexp.prototype-@@replace |
1533 | // RegExp.prototype [ @@split ] ( string, limit ) |
1534 | RUNTIME_FUNCTION(Runtime_RegExpSplit) { |
1535 | HandleScope scope(isolate); |
1536 | DCHECK_EQ(3, args.length()); |
1537 | |
1538 | CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0); |
1539 | CONVERT_ARG_HANDLE_CHECKED(String, string, 1); |
1540 | CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2); |
1541 | |
1542 | Factory* factory = isolate->factory(); |
1543 | |
1544 | Handle<JSFunction> regexp_fun = isolate->regexp_function(); |
1545 | Handle<Object> ctor; |
1546 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1547 | isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun)); |
1548 | |
1549 | Handle<Object> flags_obj; |
1550 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1551 | isolate, flags_obj, |
1552 | JSObject::GetProperty(isolate, recv, factory->flags_string())); |
1553 | |
1554 | Handle<String> flags; |
1555 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags, |
1556 | Object::ToString(isolate, flags_obj)); |
1557 | |
1558 | Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u'); |
1559 | const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0); |
1560 | |
1561 | Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y'); |
1562 | const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0); |
1563 | |
1564 | Handle<String> new_flags = flags; |
1565 | if (!sticky) { |
1566 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags, |
1567 | factory->NewConsString(flags, y_str)); |
1568 | } |
1569 | |
1570 | Handle<JSReceiver> splitter; |
1571 | { |
1572 | const int argc = 2; |
1573 | |
1574 | ScopedVector<Handle<Object>> argv(argc); |
1575 | argv[0] = recv; |
1576 | argv[1] = new_flags; |
1577 | |
1578 | Handle<Object> splitter_obj; |
1579 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1580 | isolate, splitter_obj, |
1581 | Execution::New(isolate, ctor, argc, argv.start())); |
1582 | |
1583 | splitter = Handle<JSReceiver>::cast(splitter_obj); |
1584 | } |
1585 | |
1586 | uint32_t limit; |
1587 | RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit)); |
1588 | |
1589 | const uint32_t length = string->length(); |
1590 | |
1591 | if (limit == 0) return *factory->NewJSArray(0); |
1592 | |
1593 | if (length == 0) { |
1594 | Handle<Object> result; |
1595 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1596 | isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, |
1597 | factory->undefined_value())); |
1598 | |
1599 | if (!result->IsNull(isolate)) return *factory->NewJSArray(0); |
1600 | |
1601 | Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1); |
1602 | elems->set(0, *string); |
1603 | return *factory->NewJSArrayWithElements(elems); |
1604 | } |
1605 | |
1606 | static const int kInitialArraySize = 8; |
1607 | Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize); |
1608 | uint32_t num_elems = 0; |
1609 | |
1610 | uint32_t string_index = 0; |
1611 | uint32_t prev_string_index = 0; |
1612 | while (string_index < length) { |
1613 | RETURN_FAILURE_ON_EXCEPTION( |
1614 | isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index)); |
1615 | |
1616 | Handle<Object> result; |
1617 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1618 | isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string, |
1619 | factory->undefined_value())); |
1620 | |
1621 | if (result->IsNull(isolate)) { |
1622 | string_index = static_cast<uint32_t>( |
1623 | RegExpUtils::AdvanceStringIndex(string, string_index, unicode)); |
1624 | continue; |
1625 | } |
1626 | |
1627 | Handle<Object> last_index_obj; |
1628 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1629 | isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter)); |
1630 | |
1631 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1632 | isolate, last_index_obj, Object::ToLength(isolate, last_index_obj)); |
1633 | |
1634 | const uint32_t end = |
1635 | std::min(PositiveNumberToUint32(*last_index_obj), length); |
1636 | if (end == prev_string_index) { |
1637 | string_index = static_cast<uint32_t>( |
1638 | RegExpUtils::AdvanceStringIndex(string, string_index, unicode)); |
1639 | continue; |
1640 | } |
1641 | |
1642 | { |
1643 | Handle<String> substr = |
1644 | factory->NewSubString(string, prev_string_index, string_index); |
1645 | elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr); |
1646 | if (num_elems == limit) { |
1647 | return *NewJSArrayWithElements(isolate, elems, num_elems); |
1648 | } |
1649 | } |
1650 | |
1651 | prev_string_index = end; |
1652 | |
1653 | Handle<Object> num_captures_obj; |
1654 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1655 | isolate, num_captures_obj, |
1656 | Object::GetProperty(isolate, result, |
1657 | isolate->factory()->length_string())); |
1658 | |
1659 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1660 | isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj)); |
1661 | const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj); |
1662 | |
1663 | for (uint32_t i = 1; i < num_captures; i++) { |
1664 | Handle<Object> capture; |
1665 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1666 | isolate, capture, Object::GetElement(isolate, result, i)); |
1667 | elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture); |
1668 | if (num_elems == limit) { |
1669 | return *NewJSArrayWithElements(isolate, elems, num_elems); |
1670 | } |
1671 | } |
1672 | |
1673 | string_index = prev_string_index; |
1674 | } |
1675 | |
1676 | { |
1677 | Handle<String> substr = |
1678 | factory->NewSubString(string, prev_string_index, length); |
1679 | elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr); |
1680 | } |
1681 | |
1682 | return *NewJSArrayWithElements(isolate, elems, num_elems); |
1683 | } |
1684 | |
1685 | // Slow path for: |
1686 | // ES#sec-regexp.prototype-@@replace |
1687 | // RegExp.prototype [ @@replace ] ( string, replaceValue ) |
1688 | RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) { |
1689 | HandleScope scope(isolate); |
1690 | DCHECK_EQ(3, args.length()); |
1691 | |
1692 | CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0); |
1693 | CONVERT_ARG_HANDLE_CHECKED(String, string, 1); |
1694 | Handle<Object> replace_obj = args.at(2); |
1695 | |
1696 | Factory* factory = isolate->factory(); |
1697 | |
1698 | string = String::Flatten(isolate, string); |
1699 | |
1700 | const bool functional_replace = replace_obj->IsCallable(); |
1701 | |
1702 | Handle<String> replace; |
1703 | if (!functional_replace) { |
1704 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace, |
1705 | Object::ToString(isolate, replace_obj)); |
1706 | } |
1707 | |
1708 | // Fast-path for unmodified JSRegExps (and non-functional replace). |
1709 | if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) { |
1710 | // We should never get here with functional replace because unmodified |
1711 | // regexp and functional replace should be fully handled in CSA code. |
1712 | CHECK(!functional_replace); |
1713 | Handle<Object> result; |
1714 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1715 | isolate, result, |
1716 | RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace)); |
1717 | DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv)); |
1718 | return *result; |
1719 | } |
1720 | |
1721 | const uint32_t length = string->length(); |
1722 | |
1723 | Handle<Object> global_obj; |
1724 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1725 | isolate, global_obj, |
1726 | JSReceiver::GetProperty(isolate, recv, factory->global_string())); |
1727 | const bool global = global_obj->BooleanValue(isolate); |
1728 | |
1729 | bool unicode = false; |
1730 | if (global) { |
1731 | Handle<Object> unicode_obj; |
1732 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1733 | isolate, unicode_obj, |
1734 | JSReceiver::GetProperty(isolate, recv, factory->unicode_string())); |
1735 | unicode = unicode_obj->BooleanValue(isolate); |
1736 | |
1737 | RETURN_FAILURE_ON_EXCEPTION(isolate, |
1738 | RegExpUtils::SetLastIndex(isolate, recv, 0)); |
1739 | } |
1740 | |
1741 | Zone zone(isolate->allocator(), ZONE_NAME); |
1742 | ZoneVector<Handle<Object>> results(&zone); |
1743 | |
1744 | while (true) { |
1745 | Handle<Object> result; |
1746 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1747 | isolate, result, RegExpUtils::RegExpExec(isolate, recv, string, |
1748 | factory->undefined_value())); |
1749 | |
1750 | if (result->IsNull(isolate)) break; |
1751 | |
1752 | results.push_back(result); |
1753 | if (!global) break; |
1754 | |
1755 | Handle<Object> match_obj; |
1756 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, |
1757 | Object::GetElement(isolate, result, 0)); |
1758 | |
1759 | Handle<String> match; |
1760 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, |
1761 | Object::ToString(isolate, match_obj)); |
1762 | |
1763 | if (match->length() == 0) { |
1764 | RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex( |
1765 | isolate, recv, string, unicode)); |
1766 | } |
1767 | } |
1768 | |
1769 | // TODO(jgruber): Look into ReplacementStringBuilder instead. |
1770 | IncrementalStringBuilder builder(isolate); |
1771 | uint32_t next_source_position = 0; |
1772 | |
1773 | for (const auto& result : results) { |
1774 | HandleScope handle_scope(isolate); |
1775 | Handle<Object> captures_length_obj; |
1776 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1777 | isolate, captures_length_obj, |
1778 | Object::GetProperty(isolate, result, factory->length_string())); |
1779 | |
1780 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1781 | isolate, captures_length_obj, |
1782 | Object::ToLength(isolate, captures_length_obj)); |
1783 | const uint32_t captures_length = |
1784 | PositiveNumberToUint32(*captures_length_obj); |
1785 | |
1786 | Handle<Object> match_obj; |
1787 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj, |
1788 | Object::GetElement(isolate, result, 0)); |
1789 | |
1790 | Handle<String> match; |
1791 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, |
1792 | Object::ToString(isolate, match_obj)); |
1793 | |
1794 | const int match_length = match->length(); |
1795 | |
1796 | Handle<Object> position_obj; |
1797 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1798 | isolate, position_obj, |
1799 | Object::GetProperty(isolate, result, factory->index_string())); |
1800 | |
1801 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1802 | isolate, position_obj, Object::ToInteger(isolate, position_obj)); |
1803 | const uint32_t position = |
1804 | std::min(PositiveNumberToUint32(*position_obj), length); |
1805 | |
1806 | // Do not reserve capacity since captures_length is user-controlled. |
1807 | ZoneVector<Handle<Object>> captures(&zone); |
1808 | |
1809 | for (uint32_t n = 0; n < captures_length; n++) { |
1810 | Handle<Object> capture; |
1811 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1812 | isolate, capture, Object::GetElement(isolate, result, n)); |
1813 | |
1814 | if (!capture->IsUndefined(isolate)) { |
1815 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture, |
1816 | Object::ToString(isolate, capture)); |
1817 | } |
1818 | captures.push_back(capture); |
1819 | } |
1820 | |
1821 | Handle<Object> groups_obj = isolate->factory()->undefined_value(); |
1822 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1823 | isolate, groups_obj, |
1824 | Object::GetProperty(isolate, result, factory->groups_string())); |
1825 | |
1826 | const bool has_named_captures = !groups_obj->IsUndefined(isolate); |
1827 | |
1828 | Handle<String> replacement; |
1829 | if (functional_replace) { |
1830 | const uint32_t argc = |
1831 | GetArgcForReplaceCallable(captures_length, has_named_captures); |
1832 | if (argc == static_cast<uint32_t>(-1)) { |
1833 | THROW_NEW_ERROR_RETURN_FAILURE( |
1834 | isolate, NewRangeError(MessageTemplate::kTooManyArguments)); |
1835 | } |
1836 | |
1837 | ScopedVector<Handle<Object>> argv(argc); |
1838 | |
1839 | int cursor = 0; |
1840 | for (uint32_t j = 0; j < captures_length; j++) { |
1841 | argv[cursor++] = captures[j]; |
1842 | } |
1843 | |
1844 | argv[cursor++] = handle(Smi::FromInt(position), isolate); |
1845 | argv[cursor++] = string; |
1846 | if (has_named_captures) argv[cursor++] = groups_obj; |
1847 | |
1848 | DCHECK_EQ(cursor, argc); |
1849 | |
1850 | Handle<Object> replacement_obj; |
1851 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1852 | isolate, replacement_obj, |
1853 | Execution::Call(isolate, replace_obj, factory->undefined_value(), |
1854 | argc, argv.start())); |
1855 | |
1856 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1857 | isolate, replacement, Object::ToString(isolate, replacement_obj)); |
1858 | } else { |
1859 | DCHECK(!functional_replace); |
1860 | if (!groups_obj->IsUndefined(isolate)) { |
1861 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1862 | isolate, groups_obj, Object::ToObject(isolate, groups_obj)); |
1863 | } |
1864 | VectorBackedMatch m(isolate, string, match, position, &captures, |
1865 | groups_obj); |
1866 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1867 | isolate, replacement, String::GetSubstitution(isolate, &m, replace)); |
1868 | } |
1869 | |
1870 | if (position >= next_source_position) { |
1871 | builder.AppendString( |
1872 | factory->NewSubString(string, next_source_position, position)); |
1873 | builder.AppendString(replacement); |
1874 | |
1875 | next_source_position = position + match_length; |
1876 | } |
1877 | } |
1878 | |
1879 | if (next_source_position < length) { |
1880 | builder.AppendString( |
1881 | factory->NewSubString(string, next_source_position, length)); |
1882 | } |
1883 | |
1884 | RETURN_RESULT_OR_FAILURE(isolate, builder.Finish()); |
1885 | } |
1886 | |
1887 | RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) { |
1888 | HandleScope scope(isolate); |
1889 | DCHECK_EQ(3, args.length()); |
1890 | // TODO(pwong): To follow the spec more closely and simplify calling code, |
1891 | // this could handle the canonicalization of pattern and flags. See |
1892 | // https://tc39.github.io/ecma262/#sec-regexpinitialize |
1893 | CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); |
1894 | CONVERT_ARG_HANDLE_CHECKED(String, source, 1); |
1895 | CONVERT_ARG_HANDLE_CHECKED(String, flags, 2); |
1896 | |
1897 | RETURN_FAILURE_ON_EXCEPTION(isolate, |
1898 | JSRegExp::Initialize(regexp, source, flags)); |
1899 | |
1900 | return *regexp; |
1901 | } |
1902 | |
1903 | RUNTIME_FUNCTION(Runtime_IsRegExp) { |
1904 | SealHandleScope shs(isolate); |
1905 | DCHECK_EQ(1, args.length()); |
1906 | CONVERT_ARG_CHECKED(Object, obj, 0); |
1907 | return isolate->heap()->ToBoolean(obj->IsJSRegExp()); |
1908 | } |
1909 | |
1910 | } // namespace internal |
1911 | } // namespace v8 |
1912 | |