1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// A simple interpreter for the Irregexp byte code.
6
7#include "src/regexp/interpreter-irregexp.h"
8
9#include "src/ast/ast.h"
10#include "src/objects-inl.h"
11#include "src/regexp/bytecodes-irregexp.h"
12#include "src/regexp/jsregexp.h"
13#include "src/regexp/regexp-macro-assembler.h"
14#include "src/unicode.h"
15#include "src/utils.h"
16
17#ifdef V8_INTL_SUPPORT
18#include "unicode/uchar.h"
19#endif // V8_INTL_SUPPORT
20
21namespace v8 {
22namespace internal {
23
24static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
25 int len, Vector<const uc16> subject,
26 bool unicode) {
27 Address offset_a =
28 reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
29 Address offset_b =
30 reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
31 size_t length = len * kUC16Size;
32 return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
33 offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
34}
35
36
37static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
38 int len, Vector<const uint8_t> subject,
39 bool unicode) {
40 // For Latin1 characters the unicode flag makes no difference.
41 for (int i = 0; i < len; i++) {
42 unsigned int old_char = subject[from++];
43 unsigned int new_char = subject[current++];
44 if (old_char == new_char) continue;
45 // Convert both characters to lower case.
46 old_char |= 0x20;
47 new_char |= 0x20;
48 if (old_char != new_char) return false;
49 // Not letters in the ASCII range and Latin-1 range.
50 if (!(old_char - 'a' <= 'z' - 'a') &&
51 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
52 return false;
53 }
54 }
55 return true;
56}
57
58
59#ifdef DEBUG
60static void TraceInterpreter(const byte* code_base,
61 const byte* pc,
62 int stack_depth,
63 int current_position,
64 uint32_t current_char,
65 int bytecode_length,
66 const char* bytecode_name) {
67 if (FLAG_trace_regexp_bytecodes) {
68 bool printable = (current_char < 127 && current_char >= 32);
69 const char* format =
70 printable ?
71 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
72 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
73 PrintF(format,
74 pc - code_base,
75 stack_depth,
76 current_position,
77 current_char,
78 printable ? current_char : '.',
79 bytecode_name);
80 for (int i = 0; i < bytecode_length; i++) {
81 printf(", %02x", pc[i]);
82 }
83 printf(" ");
84 for (int i = 1; i < bytecode_length; i++) {
85 unsigned char b = pc[i];
86 if (b < 127 && b >= 32) {
87 printf("%c", b);
88 } else {
89 printf(".");
90 }
91 }
92 printf("\n");
93 }
94}
95
96
97#define BYTECODE(name) \
98 case BC_##name: \
99 TraceInterpreter(code_base, \
100 pc, \
101 static_cast<int>(backtrack_sp - backtrack_stack_base), \
102 current, \
103 current_char, \
104 BC_##name##_LENGTH, \
105 #name);
106#else
107#define BYTECODE(name) \
108 case BC_##name:
109#endif
110
111
112static int32_t Load32Aligned(const byte* pc) {
113 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
114 return *reinterpret_cast<const int32_t *>(pc);
115}
116
117
118static int32_t Load16Aligned(const byte* pc) {
119 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
120 return *reinterpret_cast<const uint16_t *>(pc);
121}
122
123
124// A simple abstraction over the backtracking stack used by the interpreter.
125// This backtracking stack does not grow automatically, but it ensures that the
126// the memory held by the stack is released or remembered in a cache if the
127// matching terminates.
128class BacktrackStack {
129 public:
130 BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
131
132 ~BacktrackStack() {
133 DeleteArray(data_);
134 }
135
136 int* data() const { return data_; }
137
138 int max_size() const { return kBacktrackStackSize; }
139
140 private:
141 static const int kBacktrackStackSize = 10000;
142
143 int* data_;
144
145 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
146};
147
148namespace {
149
150IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
151 // We abort interpreter execution after the stack overflow is thrown, and thus
152 // allow allocation here despite the outer DisallowHeapAllocationScope.
153 AllowHeapAllocation yes_gc;
154 isolate->StackOverflow();
155 return IrregexpInterpreter::EXCEPTION;
156}
157
158// Runs all pending interrupts. Callers must update unhandlified object
159// references after this function completes.
160IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
161 Handle<String> subject_string) {
162 DisallowHeapAllocation no_gc;
163
164 StackLimitCheck check(isolate);
165 if (check.JsHasOverflowed()) {
166 // A real stack overflow.
167 return StackOverflow(isolate);
168 }
169
170 const bool was_one_byte =
171 String::IsOneByteRepresentationUnderneath(*subject_string);
172
173 Object result;
174 {
175 AllowHeapAllocation yes_gc;
176 result = isolate->stack_guard()->HandleInterrupts();
177 }
178
179 if (result->IsException(isolate)) {
180 return IrregexpInterpreter::EXCEPTION;
181 }
182
183 // If we changed between a LATIN1 and a UC16 string, we need to restart
184 // regexp matching with the appropriate template instantiation of RawMatch.
185 if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
186 was_one_byte) {
187 return IrregexpInterpreter::RETRY;
188 }
189
190 return IrregexpInterpreter::SUCCESS;
191}
192
193template <typename Char>
194void UpdateCodeAndSubjectReferences(Isolate* isolate,
195 Handle<ByteArray> code_array,
196 Handle<String> subject_string,
197 const byte** code_base_out,
198 const byte** pc_out,
199 Vector<const Char>* subject_string_out) {
200 DisallowHeapAllocation no_gc;
201
202 if (*code_base_out != code_array->GetDataStartAddress()) {
203 const intptr_t pc_offset = *pc_out - *code_base_out;
204 DCHECK_GT(pc_offset, 0);
205 *code_base_out = code_array->GetDataStartAddress();
206 *pc_out = *code_base_out + pc_offset;
207 }
208
209 DCHECK(subject_string->IsFlat());
210 *subject_string_out = subject_string->GetCharVector<Char>(no_gc);
211}
212
213template <typename Char>
214IrregexpInterpreter::Result RawMatch(Isolate* isolate,
215 Handle<ByteArray> code_array,
216 Handle<String> subject_string,
217 Vector<const Char> subject, int* registers,
218 int current, uint32_t current_char) {
219 DisallowHeapAllocation no_gc;
220
221 const byte* pc = code_array->GetDataStartAddress();
222 const byte* code_base = pc;
223
224 // BacktrackStack ensures that the memory allocated for the backtracking stack
225 // is returned to the system or cached if there is no stack being cached at
226 // the moment.
227 BacktrackStack backtrack_stack;
228 int* backtrack_stack_base = backtrack_stack.data();
229 int* backtrack_sp = backtrack_stack_base;
230 int backtrack_stack_space = backtrack_stack.max_size();
231#ifdef DEBUG
232 if (FLAG_trace_regexp_bytecodes) {
233 PrintF("\n\nStart bytecode interpreter\n\n");
234 }
235#endif
236 while (true) {
237 int32_t insn = Load32Aligned(pc);
238 switch (insn & BYTECODE_MASK) {
239 BYTECODE(BREAK)
240 UNREACHABLE();
241 BYTECODE(PUSH_CP)
242 if (--backtrack_stack_space < 0) {
243 return StackOverflow(isolate);
244 }
245 *backtrack_sp++ = current;
246 pc += BC_PUSH_CP_LENGTH;
247 break;
248 BYTECODE(PUSH_BT)
249 if (--backtrack_stack_space < 0) {
250 return StackOverflow(isolate);
251 }
252 *backtrack_sp++ = Load32Aligned(pc + 4);
253 pc += BC_PUSH_BT_LENGTH;
254 break;
255 BYTECODE(PUSH_REGISTER)
256 if (--backtrack_stack_space < 0) {
257 return StackOverflow(isolate);
258 }
259 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
260 pc += BC_PUSH_REGISTER_LENGTH;
261 break;
262 BYTECODE(SET_REGISTER)
263 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
264 pc += BC_SET_REGISTER_LENGTH;
265 break;
266 BYTECODE(ADVANCE_REGISTER)
267 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
268 pc += BC_ADVANCE_REGISTER_LENGTH;
269 break;
270 BYTECODE(SET_REGISTER_TO_CP)
271 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
272 pc += BC_SET_REGISTER_TO_CP_LENGTH;
273 break;
274 BYTECODE(SET_CP_TO_REGISTER)
275 current = registers[insn >> BYTECODE_SHIFT];
276 pc += BC_SET_CP_TO_REGISTER_LENGTH;
277 break;
278 BYTECODE(SET_REGISTER_TO_SP)
279 registers[insn >> BYTECODE_SHIFT] =
280 static_cast<int>(backtrack_sp - backtrack_stack_base);
281 pc += BC_SET_REGISTER_TO_SP_LENGTH;
282 break;
283 BYTECODE(SET_SP_TO_REGISTER)
284 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
285 backtrack_stack_space = backtrack_stack.max_size() -
286 static_cast<int>(backtrack_sp - backtrack_stack_base);
287 pc += BC_SET_SP_TO_REGISTER_LENGTH;
288 break;
289 BYTECODE(POP_CP)
290 backtrack_stack_space++;
291 --backtrack_sp;
292 current = *backtrack_sp;
293 pc += BC_POP_CP_LENGTH;
294 break;
295 // clang-format off
296 BYTECODE(POP_BT) {
297 IrregexpInterpreter::Result return_code = HandleInterrupts(
298 isolate, subject_string);
299 if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
300
301 UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
302 &code_base, &pc, &subject);
303
304 backtrack_stack_space++;
305 --backtrack_sp;
306 pc = code_base + *backtrack_sp;
307 break;
308 }
309 BYTECODE(POP_REGISTER) // clang-format on
310 backtrack_stack_space++;
311 --backtrack_sp;
312 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
313 pc += BC_POP_REGISTER_LENGTH;
314 break;
315 BYTECODE(FAIL)
316 return IrregexpInterpreter::FAILURE;
317 BYTECODE(SUCCEED)
318 return IrregexpInterpreter::SUCCESS;
319 BYTECODE(ADVANCE_CP)
320 current += insn >> BYTECODE_SHIFT;
321 pc += BC_ADVANCE_CP_LENGTH;
322 break;
323 BYTECODE(GOTO)
324 pc = code_base + Load32Aligned(pc + 4);
325 break;
326 BYTECODE(ADVANCE_CP_AND_GOTO)
327 current += insn >> BYTECODE_SHIFT;
328 pc = code_base + Load32Aligned(pc + 4);
329 break;
330 BYTECODE(CHECK_GREEDY)
331 if (current == backtrack_sp[-1]) {
332 backtrack_sp--;
333 backtrack_stack_space++;
334 pc = code_base + Load32Aligned(pc + 4);
335 } else {
336 pc += BC_CHECK_GREEDY_LENGTH;
337 }
338 break;
339 BYTECODE(LOAD_CURRENT_CHAR) {
340 int pos = current + (insn >> BYTECODE_SHIFT);
341 if (pos >= subject.length() || pos < 0) {
342 pc = code_base + Load32Aligned(pc + 4);
343 } else {
344 current_char = subject[pos];
345 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
346 }
347 break;
348 }
349 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
350 int pos = current + (insn >> BYTECODE_SHIFT);
351 current_char = subject[pos];
352 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
353 break;
354 }
355 BYTECODE(LOAD_2_CURRENT_CHARS) {
356 int pos = current + (insn >> BYTECODE_SHIFT);
357 if (pos + 2 > subject.length() || pos < 0) {
358 pc = code_base + Load32Aligned(pc + 4);
359 } else {
360 Char next = subject[pos + 1];
361 current_char =
362 (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
363 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
364 }
365 break;
366 }
367 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
368 int pos = current + (insn >> BYTECODE_SHIFT);
369 Char next = subject[pos + 1];
370 current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
371 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
372 break;
373 }
374 BYTECODE(LOAD_4_CURRENT_CHARS) {
375 DCHECK_EQ(1, sizeof(Char));
376 int pos = current + (insn >> BYTECODE_SHIFT);
377 if (pos + 4 > subject.length() || pos < 0) {
378 pc = code_base + Load32Aligned(pc + 4);
379 } else {
380 Char next1 = subject[pos + 1];
381 Char next2 = subject[pos + 2];
382 Char next3 = subject[pos + 3];
383 current_char = (subject[pos] |
384 (next1 << 8) |
385 (next2 << 16) |
386 (next3 << 24));
387 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
388 }
389 break;
390 }
391 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
392 DCHECK_EQ(1, sizeof(Char));
393 int pos = current + (insn >> BYTECODE_SHIFT);
394 Char next1 = subject[pos + 1];
395 Char next2 = subject[pos + 2];
396 Char next3 = subject[pos + 3];
397 current_char = (subject[pos] |
398 (next1 << 8) |
399 (next2 << 16) |
400 (next3 << 24));
401 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
402 break;
403 }
404 BYTECODE(CHECK_4_CHARS) {
405 uint32_t c = Load32Aligned(pc + 4);
406 if (c == current_char) {
407 pc = code_base + Load32Aligned(pc + 8);
408 } else {
409 pc += BC_CHECK_4_CHARS_LENGTH;
410 }
411 break;
412 }
413 BYTECODE(CHECK_CHAR) {
414 uint32_t c = (insn >> BYTECODE_SHIFT);
415 if (c == current_char) {
416 pc = code_base + Load32Aligned(pc + 4);
417 } else {
418 pc += BC_CHECK_CHAR_LENGTH;
419 }
420 break;
421 }
422 BYTECODE(CHECK_NOT_4_CHARS) {
423 uint32_t c = Load32Aligned(pc + 4);
424 if (c != current_char) {
425 pc = code_base + Load32Aligned(pc + 8);
426 } else {
427 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
428 }
429 break;
430 }
431 BYTECODE(CHECK_NOT_CHAR) {
432 uint32_t c = (insn >> BYTECODE_SHIFT);
433 if (c != current_char) {
434 pc = code_base + Load32Aligned(pc + 4);
435 } else {
436 pc += BC_CHECK_NOT_CHAR_LENGTH;
437 }
438 break;
439 }
440 BYTECODE(AND_CHECK_4_CHARS) {
441 uint32_t c = Load32Aligned(pc + 4);
442 if (c == (current_char & Load32Aligned(pc + 8))) {
443 pc = code_base + Load32Aligned(pc + 12);
444 } else {
445 pc += BC_AND_CHECK_4_CHARS_LENGTH;
446 }
447 break;
448 }
449 BYTECODE(AND_CHECK_CHAR) {
450 uint32_t c = (insn >> BYTECODE_SHIFT);
451 if (c == (current_char & Load32Aligned(pc + 4))) {
452 pc = code_base + Load32Aligned(pc + 8);
453 } else {
454 pc += BC_AND_CHECK_CHAR_LENGTH;
455 }
456 break;
457 }
458 BYTECODE(AND_CHECK_NOT_4_CHARS) {
459 uint32_t c = Load32Aligned(pc + 4);
460 if (c != (current_char & Load32Aligned(pc + 8))) {
461 pc = code_base + Load32Aligned(pc + 12);
462 } else {
463 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
464 }
465 break;
466 }
467 BYTECODE(AND_CHECK_NOT_CHAR) {
468 uint32_t c = (insn >> BYTECODE_SHIFT);
469 if (c != (current_char & Load32Aligned(pc + 4))) {
470 pc = code_base + Load32Aligned(pc + 8);
471 } else {
472 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
473 }
474 break;
475 }
476 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
477 uint32_t c = (insn >> BYTECODE_SHIFT);
478 uint32_t minus = Load16Aligned(pc + 4);
479 uint32_t mask = Load16Aligned(pc + 6);
480 if (c != ((current_char - minus) & mask)) {
481 pc = code_base + Load32Aligned(pc + 8);
482 } else {
483 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
484 }
485 break;
486 }
487 BYTECODE(CHECK_CHAR_IN_RANGE) {
488 uint32_t from = Load16Aligned(pc + 4);
489 uint32_t to = Load16Aligned(pc + 6);
490 if (from <= current_char && current_char <= to) {
491 pc = code_base + Load32Aligned(pc + 8);
492 } else {
493 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
494 }
495 break;
496 }
497 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
498 uint32_t from = Load16Aligned(pc + 4);
499 uint32_t to = Load16Aligned(pc + 6);
500 if (from > current_char || current_char > to) {
501 pc = code_base + Load32Aligned(pc + 8);
502 } else {
503 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
504 }
505 break;
506 }
507 BYTECODE(CHECK_BIT_IN_TABLE) {
508 int mask = RegExpMacroAssembler::kTableMask;
509 byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
510 int bit = (current_char & (kBitsPerByte - 1));
511 if ((b & (1 << bit)) != 0) {
512 pc = code_base + Load32Aligned(pc + 4);
513 } else {
514 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
515 }
516 break;
517 }
518 BYTECODE(CHECK_LT) {
519 uint32_t limit = (insn >> BYTECODE_SHIFT);
520 if (current_char < limit) {
521 pc = code_base + Load32Aligned(pc + 4);
522 } else {
523 pc += BC_CHECK_LT_LENGTH;
524 }
525 break;
526 }
527 BYTECODE(CHECK_GT) {
528 uint32_t limit = (insn >> BYTECODE_SHIFT);
529 if (current_char > limit) {
530 pc = code_base + Load32Aligned(pc + 4);
531 } else {
532 pc += BC_CHECK_GT_LENGTH;
533 }
534 break;
535 }
536 BYTECODE(CHECK_REGISTER_LT)
537 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
538 pc = code_base + Load32Aligned(pc + 8);
539 } else {
540 pc += BC_CHECK_REGISTER_LT_LENGTH;
541 }
542 break;
543 BYTECODE(CHECK_REGISTER_GE)
544 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
545 pc = code_base + Load32Aligned(pc + 8);
546 } else {
547 pc += BC_CHECK_REGISTER_GE_LENGTH;
548 }
549 break;
550 BYTECODE(CHECK_REGISTER_EQ_POS)
551 if (registers[insn >> BYTECODE_SHIFT] == current) {
552 pc = code_base + Load32Aligned(pc + 4);
553 } else {
554 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
555 }
556 break;
557 BYTECODE(CHECK_NOT_REGS_EQUAL)
558 if (registers[insn >> BYTECODE_SHIFT] ==
559 registers[Load32Aligned(pc + 4)]) {
560 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
561 } else {
562 pc = code_base + Load32Aligned(pc + 8);
563 }
564 break;
565 BYTECODE(CHECK_NOT_BACK_REF) {
566 int from = registers[insn >> BYTECODE_SHIFT];
567 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
568 if (from >= 0 && len > 0) {
569 if (current + len > subject.length() ||
570 CompareChars(&subject[from], &subject[current], len) != 0) {
571 pc = code_base + Load32Aligned(pc + 4);
572 break;
573 }
574 current += len;
575 }
576 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
577 break;
578 }
579 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
580 int from = registers[insn >> BYTECODE_SHIFT];
581 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
582 if (from >= 0 && len > 0) {
583 if (current - len < 0 ||
584 CompareChars(&subject[from], &subject[current - len], len) != 0) {
585 pc = code_base + Load32Aligned(pc + 4);
586 break;
587 }
588 current -= len;
589 }
590 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
591 break;
592 }
593 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
594 V8_FALLTHROUGH;
595 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
596 bool unicode =
597 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
598 int from = registers[insn >> BYTECODE_SHIFT];
599 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
600 if (from >= 0 && len > 0) {
601 if (current + len > subject.length() ||
602 !BackRefMatchesNoCase(isolate, from, current, len, subject,
603 unicode)) {
604 pc = code_base + Load32Aligned(pc + 4);
605 break;
606 }
607 current += len;
608 }
609 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
610 break;
611 }
612 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
613 V8_FALLTHROUGH;
614 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
615 bool unicode = (insn & BYTECODE_MASK) ==
616 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
617 int from = registers[insn >> BYTECODE_SHIFT];
618 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
619 if (from >= 0 && len > 0) {
620 if (current - len < 0 ||
621 !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
622 unicode)) {
623 pc = code_base + Load32Aligned(pc + 4);
624 break;
625 }
626 current -= len;
627 }
628 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
629 break;
630 }
631 BYTECODE(CHECK_AT_START)
632 if (current == 0) {
633 pc = code_base + Load32Aligned(pc + 4);
634 } else {
635 pc += BC_CHECK_AT_START_LENGTH;
636 }
637 break;
638 BYTECODE(CHECK_NOT_AT_START)
639 if (current + (insn >> BYTECODE_SHIFT) == 0) {
640 pc += BC_CHECK_NOT_AT_START_LENGTH;
641 } else {
642 pc = code_base + Load32Aligned(pc + 4);
643 }
644 break;
645 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
646 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
647 if (subject.length() - current > by) {
648 current = subject.length() - by;
649 current_char = subject[current - 1];
650 }
651 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
652 break;
653 }
654 default:
655 UNREACHABLE();
656 break;
657 }
658 }
659}
660
661} // namespace
662
663// static
664IrregexpInterpreter::Result IrregexpInterpreter::Match(
665 Isolate* isolate, Handle<ByteArray> code_array,
666 Handle<String> subject_string, int* registers, int start_position) {
667 DCHECK(subject_string->IsFlat());
668
669 // Note: Heap allocation *is* allowed in two situations:
670 // 1. When creating & throwing a stack overflow exception. The interpreter
671 // aborts afterwards, and thus possible-moved objects are never used.
672 // 2. When handling interrupts. We manually relocate unhandlified references
673 // after interrupts have run.
674 DisallowHeapAllocation no_gc;
675
676 uc16 previous_char = '\n';
677 String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
678 if (subject_content.IsOneByte()) {
679 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
680 if (start_position != 0) previous_char = subject_vector[start_position - 1];
681 return RawMatch(isolate, code_array, subject_string, subject_vector,
682 registers, start_position, previous_char);
683 } else {
684 DCHECK(subject_content.IsTwoByte());
685 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
686 if (start_position != 0) previous_char = subject_vector[start_position - 1];
687 return RawMatch(isolate, code_array, subject_string, subject_vector,
688 registers, start_position, previous_char);
689 }
690}
691
692} // namespace internal
693} // namespace v8
694