1 | // Copyright 2011 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | // A simple interpreter for the Irregexp byte code. |
6 | |
7 | #include "src/regexp/interpreter-irregexp.h" |
8 | |
9 | #include "src/ast/ast.h" |
10 | #include "src/objects-inl.h" |
11 | #include "src/regexp/bytecodes-irregexp.h" |
12 | #include "src/regexp/jsregexp.h" |
13 | #include "src/regexp/regexp-macro-assembler.h" |
14 | #include "src/unicode.h" |
15 | #include "src/utils.h" |
16 | |
17 | #ifdef V8_INTL_SUPPORT |
18 | #include "unicode/uchar.h" |
19 | #endif // V8_INTL_SUPPORT |
20 | |
21 | namespace v8 { |
22 | namespace internal { |
23 | |
24 | static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, |
25 | int len, Vector<const uc16> subject, |
26 | bool unicode) { |
27 | Address offset_a = |
28 | reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from))); |
29 | Address offset_b = |
30 | reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current))); |
31 | size_t length = len * kUC16Size; |
32 | return RegExpMacroAssembler::CaseInsensitiveCompareUC16( |
33 | offset_a, offset_b, length, unicode ? nullptr : isolate) == 1; |
34 | } |
35 | |
36 | |
37 | static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, |
38 | int len, Vector<const uint8_t> subject, |
39 | bool unicode) { |
40 | // For Latin1 characters the unicode flag makes no difference. |
41 | for (int i = 0; i < len; i++) { |
42 | unsigned int old_char = subject[from++]; |
43 | unsigned int new_char = subject[current++]; |
44 | if (old_char == new_char) continue; |
45 | // Convert both characters to lower case. |
46 | old_char |= 0x20; |
47 | new_char |= 0x20; |
48 | if (old_char != new_char) return false; |
49 | // Not letters in the ASCII range and Latin-1 range. |
50 | if (!(old_char - 'a' <= 'z' - 'a') && |
51 | !(old_char - 224 <= 254 - 224 && old_char != 247)) { |
52 | return false; |
53 | } |
54 | } |
55 | return true; |
56 | } |
57 | |
58 | |
59 | #ifdef DEBUG |
60 | static void TraceInterpreter(const byte* code_base, |
61 | const byte* pc, |
62 | int stack_depth, |
63 | int current_position, |
64 | uint32_t current_char, |
65 | int bytecode_length, |
66 | const char* bytecode_name) { |
67 | if (FLAG_trace_regexp_bytecodes) { |
68 | bool printable = (current_char < 127 && current_char >= 32); |
69 | const char* format = |
70 | printable ? |
71 | "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" : |
72 | "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s" ; |
73 | PrintF(format, |
74 | pc - code_base, |
75 | stack_depth, |
76 | current_position, |
77 | current_char, |
78 | printable ? current_char : '.', |
79 | bytecode_name); |
80 | for (int i = 0; i < bytecode_length; i++) { |
81 | printf(", %02x" , pc[i]); |
82 | } |
83 | printf(" " ); |
84 | for (int i = 1; i < bytecode_length; i++) { |
85 | unsigned char b = pc[i]; |
86 | if (b < 127 && b >= 32) { |
87 | printf("%c" , b); |
88 | } else { |
89 | printf("." ); |
90 | } |
91 | } |
92 | printf("\n" ); |
93 | } |
94 | } |
95 | |
96 | |
97 | #define BYTECODE(name) \ |
98 | case BC_##name: \ |
99 | TraceInterpreter(code_base, \ |
100 | pc, \ |
101 | static_cast<int>(backtrack_sp - backtrack_stack_base), \ |
102 | current, \ |
103 | current_char, \ |
104 | BC_##name##_LENGTH, \ |
105 | #name); |
106 | #else |
107 | #define BYTECODE(name) \ |
108 | case BC_##name: |
109 | #endif |
110 | |
111 | |
112 | static int32_t Load32Aligned(const byte* pc) { |
113 | DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3); |
114 | return *reinterpret_cast<const int32_t *>(pc); |
115 | } |
116 | |
117 | |
118 | static int32_t Load16Aligned(const byte* pc) { |
119 | DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1); |
120 | return *reinterpret_cast<const uint16_t *>(pc); |
121 | } |
122 | |
123 | |
124 | // A simple abstraction over the backtracking stack used by the interpreter. |
125 | // This backtracking stack does not grow automatically, but it ensures that the |
126 | // the memory held by the stack is released or remembered in a cache if the |
127 | // matching terminates. |
128 | class BacktrackStack { |
129 | public: |
130 | BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); } |
131 | |
132 | ~BacktrackStack() { |
133 | DeleteArray(data_); |
134 | } |
135 | |
136 | int* data() const { return data_; } |
137 | |
138 | int max_size() const { return kBacktrackStackSize; } |
139 | |
140 | private: |
141 | static const int kBacktrackStackSize = 10000; |
142 | |
143 | int* data_; |
144 | |
145 | DISALLOW_COPY_AND_ASSIGN(BacktrackStack); |
146 | }; |
147 | |
148 | namespace { |
149 | |
150 | IrregexpInterpreter::Result StackOverflow(Isolate* isolate) { |
151 | // We abort interpreter execution after the stack overflow is thrown, and thus |
152 | // allow allocation here despite the outer DisallowHeapAllocationScope. |
153 | AllowHeapAllocation yes_gc; |
154 | isolate->StackOverflow(); |
155 | return IrregexpInterpreter::EXCEPTION; |
156 | } |
157 | |
158 | // Runs all pending interrupts. Callers must update unhandlified object |
159 | // references after this function completes. |
160 | IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate, |
161 | Handle<String> subject_string) { |
162 | DisallowHeapAllocation no_gc; |
163 | |
164 | StackLimitCheck check(isolate); |
165 | if (check.JsHasOverflowed()) { |
166 | // A real stack overflow. |
167 | return StackOverflow(isolate); |
168 | } |
169 | |
170 | const bool was_one_byte = |
171 | String::IsOneByteRepresentationUnderneath(*subject_string); |
172 | |
173 | Object result; |
174 | { |
175 | AllowHeapAllocation yes_gc; |
176 | result = isolate->stack_guard()->HandleInterrupts(); |
177 | } |
178 | |
179 | if (result->IsException(isolate)) { |
180 | return IrregexpInterpreter::EXCEPTION; |
181 | } |
182 | |
183 | // If we changed between a LATIN1 and a UC16 string, we need to restart |
184 | // regexp matching with the appropriate template instantiation of RawMatch. |
185 | if (String::IsOneByteRepresentationUnderneath(*subject_string) != |
186 | was_one_byte) { |
187 | return IrregexpInterpreter::RETRY; |
188 | } |
189 | |
190 | return IrregexpInterpreter::SUCCESS; |
191 | } |
192 | |
193 | template <typename Char> |
194 | void UpdateCodeAndSubjectReferences(Isolate* isolate, |
195 | Handle<ByteArray> code_array, |
196 | Handle<String> subject_string, |
197 | const byte** code_base_out, |
198 | const byte** pc_out, |
199 | Vector<const Char>* subject_string_out) { |
200 | DisallowHeapAllocation no_gc; |
201 | |
202 | if (*code_base_out != code_array->GetDataStartAddress()) { |
203 | const intptr_t pc_offset = *pc_out - *code_base_out; |
204 | DCHECK_GT(pc_offset, 0); |
205 | *code_base_out = code_array->GetDataStartAddress(); |
206 | *pc_out = *code_base_out + pc_offset; |
207 | } |
208 | |
209 | DCHECK(subject_string->IsFlat()); |
210 | *subject_string_out = subject_string->GetCharVector<Char>(no_gc); |
211 | } |
212 | |
213 | template <typename Char> |
214 | IrregexpInterpreter::Result RawMatch(Isolate* isolate, |
215 | Handle<ByteArray> code_array, |
216 | Handle<String> subject_string, |
217 | Vector<const Char> subject, int* registers, |
218 | int current, uint32_t current_char) { |
219 | DisallowHeapAllocation no_gc; |
220 | |
221 | const byte* pc = code_array->GetDataStartAddress(); |
222 | const byte* code_base = pc; |
223 | |
224 | // BacktrackStack ensures that the memory allocated for the backtracking stack |
225 | // is returned to the system or cached if there is no stack being cached at |
226 | // the moment. |
227 | BacktrackStack backtrack_stack; |
228 | int* backtrack_stack_base = backtrack_stack.data(); |
229 | int* backtrack_sp = backtrack_stack_base; |
230 | int backtrack_stack_space = backtrack_stack.max_size(); |
231 | #ifdef DEBUG |
232 | if (FLAG_trace_regexp_bytecodes) { |
233 | PrintF("\n\nStart bytecode interpreter\n\n" ); |
234 | } |
235 | #endif |
236 | while (true) { |
237 | int32_t insn = Load32Aligned(pc); |
238 | switch (insn & BYTECODE_MASK) { |
239 | BYTECODE(BREAK) |
240 | UNREACHABLE(); |
241 | BYTECODE(PUSH_CP) |
242 | if (--backtrack_stack_space < 0) { |
243 | return StackOverflow(isolate); |
244 | } |
245 | *backtrack_sp++ = current; |
246 | pc += BC_PUSH_CP_LENGTH; |
247 | break; |
248 | BYTECODE(PUSH_BT) |
249 | if (--backtrack_stack_space < 0) { |
250 | return StackOverflow(isolate); |
251 | } |
252 | *backtrack_sp++ = Load32Aligned(pc + 4); |
253 | pc += BC_PUSH_BT_LENGTH; |
254 | break; |
255 | BYTECODE(PUSH_REGISTER) |
256 | if (--backtrack_stack_space < 0) { |
257 | return StackOverflow(isolate); |
258 | } |
259 | *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; |
260 | pc += BC_PUSH_REGISTER_LENGTH; |
261 | break; |
262 | BYTECODE(SET_REGISTER) |
263 | registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); |
264 | pc += BC_SET_REGISTER_LENGTH; |
265 | break; |
266 | BYTECODE(ADVANCE_REGISTER) |
267 | registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); |
268 | pc += BC_ADVANCE_REGISTER_LENGTH; |
269 | break; |
270 | BYTECODE(SET_REGISTER_TO_CP) |
271 | registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); |
272 | pc += BC_SET_REGISTER_TO_CP_LENGTH; |
273 | break; |
274 | BYTECODE(SET_CP_TO_REGISTER) |
275 | current = registers[insn >> BYTECODE_SHIFT]; |
276 | pc += BC_SET_CP_TO_REGISTER_LENGTH; |
277 | break; |
278 | BYTECODE(SET_REGISTER_TO_SP) |
279 | registers[insn >> BYTECODE_SHIFT] = |
280 | static_cast<int>(backtrack_sp - backtrack_stack_base); |
281 | pc += BC_SET_REGISTER_TO_SP_LENGTH; |
282 | break; |
283 | BYTECODE(SET_SP_TO_REGISTER) |
284 | backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT]; |
285 | backtrack_stack_space = backtrack_stack.max_size() - |
286 | static_cast<int>(backtrack_sp - backtrack_stack_base); |
287 | pc += BC_SET_SP_TO_REGISTER_LENGTH; |
288 | break; |
289 | BYTECODE(POP_CP) |
290 | backtrack_stack_space++; |
291 | --backtrack_sp; |
292 | current = *backtrack_sp; |
293 | pc += BC_POP_CP_LENGTH; |
294 | break; |
295 | // clang-format off |
296 | BYTECODE(POP_BT) { |
297 | IrregexpInterpreter::Result return_code = HandleInterrupts( |
298 | isolate, subject_string); |
299 | if (return_code != IrregexpInterpreter::SUCCESS) return return_code; |
300 | |
301 | UpdateCodeAndSubjectReferences(isolate, code_array, subject_string, |
302 | &code_base, &pc, &subject); |
303 | |
304 | backtrack_stack_space++; |
305 | --backtrack_sp; |
306 | pc = code_base + *backtrack_sp; |
307 | break; |
308 | } |
309 | BYTECODE(POP_REGISTER) // clang-format on |
310 | backtrack_stack_space++; |
311 | --backtrack_sp; |
312 | registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; |
313 | pc += BC_POP_REGISTER_LENGTH; |
314 | break; |
315 | BYTECODE(FAIL) |
316 | return IrregexpInterpreter::FAILURE; |
317 | BYTECODE(SUCCEED) |
318 | return IrregexpInterpreter::SUCCESS; |
319 | BYTECODE(ADVANCE_CP) |
320 | current += insn >> BYTECODE_SHIFT; |
321 | pc += BC_ADVANCE_CP_LENGTH; |
322 | break; |
323 | BYTECODE(GOTO) |
324 | pc = code_base + Load32Aligned(pc + 4); |
325 | break; |
326 | BYTECODE(ADVANCE_CP_AND_GOTO) |
327 | current += insn >> BYTECODE_SHIFT; |
328 | pc = code_base + Load32Aligned(pc + 4); |
329 | break; |
330 | BYTECODE(CHECK_GREEDY) |
331 | if (current == backtrack_sp[-1]) { |
332 | backtrack_sp--; |
333 | backtrack_stack_space++; |
334 | pc = code_base + Load32Aligned(pc + 4); |
335 | } else { |
336 | pc += BC_CHECK_GREEDY_LENGTH; |
337 | } |
338 | break; |
339 | BYTECODE(LOAD_CURRENT_CHAR) { |
340 | int pos = current + (insn >> BYTECODE_SHIFT); |
341 | if (pos >= subject.length() || pos < 0) { |
342 | pc = code_base + Load32Aligned(pc + 4); |
343 | } else { |
344 | current_char = subject[pos]; |
345 | pc += BC_LOAD_CURRENT_CHAR_LENGTH; |
346 | } |
347 | break; |
348 | } |
349 | BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { |
350 | int pos = current + (insn >> BYTECODE_SHIFT); |
351 | current_char = subject[pos]; |
352 | pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; |
353 | break; |
354 | } |
355 | BYTECODE(LOAD_2_CURRENT_CHARS) { |
356 | int pos = current + (insn >> BYTECODE_SHIFT); |
357 | if (pos + 2 > subject.length() || pos < 0) { |
358 | pc = code_base + Load32Aligned(pc + 4); |
359 | } else { |
360 | Char next = subject[pos + 1]; |
361 | current_char = |
362 | (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); |
363 | pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; |
364 | } |
365 | break; |
366 | } |
367 | BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { |
368 | int pos = current + (insn >> BYTECODE_SHIFT); |
369 | Char next = subject[pos + 1]; |
370 | current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char)))); |
371 | pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; |
372 | break; |
373 | } |
374 | BYTECODE(LOAD_4_CURRENT_CHARS) { |
375 | DCHECK_EQ(1, sizeof(Char)); |
376 | int pos = current + (insn >> BYTECODE_SHIFT); |
377 | if (pos + 4 > subject.length() || pos < 0) { |
378 | pc = code_base + Load32Aligned(pc + 4); |
379 | } else { |
380 | Char next1 = subject[pos + 1]; |
381 | Char next2 = subject[pos + 2]; |
382 | Char next3 = subject[pos + 3]; |
383 | current_char = (subject[pos] | |
384 | (next1 << 8) | |
385 | (next2 << 16) | |
386 | (next3 << 24)); |
387 | pc += BC_LOAD_4_CURRENT_CHARS_LENGTH; |
388 | } |
389 | break; |
390 | } |
391 | BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { |
392 | DCHECK_EQ(1, sizeof(Char)); |
393 | int pos = current + (insn >> BYTECODE_SHIFT); |
394 | Char next1 = subject[pos + 1]; |
395 | Char next2 = subject[pos + 2]; |
396 | Char next3 = subject[pos + 3]; |
397 | current_char = (subject[pos] | |
398 | (next1 << 8) | |
399 | (next2 << 16) | |
400 | (next3 << 24)); |
401 | pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; |
402 | break; |
403 | } |
404 | BYTECODE(CHECK_4_CHARS) { |
405 | uint32_t c = Load32Aligned(pc + 4); |
406 | if (c == current_char) { |
407 | pc = code_base + Load32Aligned(pc + 8); |
408 | } else { |
409 | pc += BC_CHECK_4_CHARS_LENGTH; |
410 | } |
411 | break; |
412 | } |
413 | BYTECODE(CHECK_CHAR) { |
414 | uint32_t c = (insn >> BYTECODE_SHIFT); |
415 | if (c == current_char) { |
416 | pc = code_base + Load32Aligned(pc + 4); |
417 | } else { |
418 | pc += BC_CHECK_CHAR_LENGTH; |
419 | } |
420 | break; |
421 | } |
422 | BYTECODE(CHECK_NOT_4_CHARS) { |
423 | uint32_t c = Load32Aligned(pc + 4); |
424 | if (c != current_char) { |
425 | pc = code_base + Load32Aligned(pc + 8); |
426 | } else { |
427 | pc += BC_CHECK_NOT_4_CHARS_LENGTH; |
428 | } |
429 | break; |
430 | } |
431 | BYTECODE(CHECK_NOT_CHAR) { |
432 | uint32_t c = (insn >> BYTECODE_SHIFT); |
433 | if (c != current_char) { |
434 | pc = code_base + Load32Aligned(pc + 4); |
435 | } else { |
436 | pc += BC_CHECK_NOT_CHAR_LENGTH; |
437 | } |
438 | break; |
439 | } |
440 | BYTECODE(AND_CHECK_4_CHARS) { |
441 | uint32_t c = Load32Aligned(pc + 4); |
442 | if (c == (current_char & Load32Aligned(pc + 8))) { |
443 | pc = code_base + Load32Aligned(pc + 12); |
444 | } else { |
445 | pc += BC_AND_CHECK_4_CHARS_LENGTH; |
446 | } |
447 | break; |
448 | } |
449 | BYTECODE(AND_CHECK_CHAR) { |
450 | uint32_t c = (insn >> BYTECODE_SHIFT); |
451 | if (c == (current_char & Load32Aligned(pc + 4))) { |
452 | pc = code_base + Load32Aligned(pc + 8); |
453 | } else { |
454 | pc += BC_AND_CHECK_CHAR_LENGTH; |
455 | } |
456 | break; |
457 | } |
458 | BYTECODE(AND_CHECK_NOT_4_CHARS) { |
459 | uint32_t c = Load32Aligned(pc + 4); |
460 | if (c != (current_char & Load32Aligned(pc + 8))) { |
461 | pc = code_base + Load32Aligned(pc + 12); |
462 | } else { |
463 | pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; |
464 | } |
465 | break; |
466 | } |
467 | BYTECODE(AND_CHECK_NOT_CHAR) { |
468 | uint32_t c = (insn >> BYTECODE_SHIFT); |
469 | if (c != (current_char & Load32Aligned(pc + 4))) { |
470 | pc = code_base + Load32Aligned(pc + 8); |
471 | } else { |
472 | pc += BC_AND_CHECK_NOT_CHAR_LENGTH; |
473 | } |
474 | break; |
475 | } |
476 | BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { |
477 | uint32_t c = (insn >> BYTECODE_SHIFT); |
478 | uint32_t minus = Load16Aligned(pc + 4); |
479 | uint32_t mask = Load16Aligned(pc + 6); |
480 | if (c != ((current_char - minus) & mask)) { |
481 | pc = code_base + Load32Aligned(pc + 8); |
482 | } else { |
483 | pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; |
484 | } |
485 | break; |
486 | } |
487 | BYTECODE(CHECK_CHAR_IN_RANGE) { |
488 | uint32_t from = Load16Aligned(pc + 4); |
489 | uint32_t to = Load16Aligned(pc + 6); |
490 | if (from <= current_char && current_char <= to) { |
491 | pc = code_base + Load32Aligned(pc + 8); |
492 | } else { |
493 | pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; |
494 | } |
495 | break; |
496 | } |
497 | BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { |
498 | uint32_t from = Load16Aligned(pc + 4); |
499 | uint32_t to = Load16Aligned(pc + 6); |
500 | if (from > current_char || current_char > to) { |
501 | pc = code_base + Load32Aligned(pc + 8); |
502 | } else { |
503 | pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; |
504 | } |
505 | break; |
506 | } |
507 | BYTECODE(CHECK_BIT_IN_TABLE) { |
508 | int mask = RegExpMacroAssembler::kTableMask; |
509 | byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; |
510 | int bit = (current_char & (kBitsPerByte - 1)); |
511 | if ((b & (1 << bit)) != 0) { |
512 | pc = code_base + Load32Aligned(pc + 4); |
513 | } else { |
514 | pc += BC_CHECK_BIT_IN_TABLE_LENGTH; |
515 | } |
516 | break; |
517 | } |
518 | BYTECODE(CHECK_LT) { |
519 | uint32_t limit = (insn >> BYTECODE_SHIFT); |
520 | if (current_char < limit) { |
521 | pc = code_base + Load32Aligned(pc + 4); |
522 | } else { |
523 | pc += BC_CHECK_LT_LENGTH; |
524 | } |
525 | break; |
526 | } |
527 | BYTECODE(CHECK_GT) { |
528 | uint32_t limit = (insn >> BYTECODE_SHIFT); |
529 | if (current_char > limit) { |
530 | pc = code_base + Load32Aligned(pc + 4); |
531 | } else { |
532 | pc += BC_CHECK_GT_LENGTH; |
533 | } |
534 | break; |
535 | } |
536 | BYTECODE(CHECK_REGISTER_LT) |
537 | if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { |
538 | pc = code_base + Load32Aligned(pc + 8); |
539 | } else { |
540 | pc += BC_CHECK_REGISTER_LT_LENGTH; |
541 | } |
542 | break; |
543 | BYTECODE(CHECK_REGISTER_GE) |
544 | if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { |
545 | pc = code_base + Load32Aligned(pc + 8); |
546 | } else { |
547 | pc += BC_CHECK_REGISTER_GE_LENGTH; |
548 | } |
549 | break; |
550 | BYTECODE(CHECK_REGISTER_EQ_POS) |
551 | if (registers[insn >> BYTECODE_SHIFT] == current) { |
552 | pc = code_base + Load32Aligned(pc + 4); |
553 | } else { |
554 | pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; |
555 | } |
556 | break; |
557 | BYTECODE(CHECK_NOT_REGS_EQUAL) |
558 | if (registers[insn >> BYTECODE_SHIFT] == |
559 | registers[Load32Aligned(pc + 4)]) { |
560 | pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; |
561 | } else { |
562 | pc = code_base + Load32Aligned(pc + 8); |
563 | } |
564 | break; |
565 | BYTECODE(CHECK_NOT_BACK_REF) { |
566 | int from = registers[insn >> BYTECODE_SHIFT]; |
567 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
568 | if (from >= 0 && len > 0) { |
569 | if (current + len > subject.length() || |
570 | CompareChars(&subject[from], &subject[current], len) != 0) { |
571 | pc = code_base + Load32Aligned(pc + 4); |
572 | break; |
573 | } |
574 | current += len; |
575 | } |
576 | pc += BC_CHECK_NOT_BACK_REF_LENGTH; |
577 | break; |
578 | } |
579 | BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { |
580 | int from = registers[insn >> BYTECODE_SHIFT]; |
581 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
582 | if (from >= 0 && len > 0) { |
583 | if (current - len < 0 || |
584 | CompareChars(&subject[from], &subject[current - len], len) != 0) { |
585 | pc = code_base + Load32Aligned(pc + 4); |
586 | break; |
587 | } |
588 | current -= len; |
589 | } |
590 | pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; |
591 | break; |
592 | } |
593 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) |
594 | V8_FALLTHROUGH; |
595 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { |
596 | bool unicode = |
597 | (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; |
598 | int from = registers[insn >> BYTECODE_SHIFT]; |
599 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
600 | if (from >= 0 && len > 0) { |
601 | if (current + len > subject.length() || |
602 | !BackRefMatchesNoCase(isolate, from, current, len, subject, |
603 | unicode)) { |
604 | pc = code_base + Load32Aligned(pc + 4); |
605 | break; |
606 | } |
607 | current += len; |
608 | } |
609 | pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; |
610 | break; |
611 | } |
612 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) |
613 | V8_FALLTHROUGH; |
614 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { |
615 | bool unicode = (insn & BYTECODE_MASK) == |
616 | BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; |
617 | int from = registers[insn >> BYTECODE_SHIFT]; |
618 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
619 | if (from >= 0 && len > 0) { |
620 | if (current - len < 0 || |
621 | !BackRefMatchesNoCase(isolate, from, current - len, len, subject, |
622 | unicode)) { |
623 | pc = code_base + Load32Aligned(pc + 4); |
624 | break; |
625 | } |
626 | current -= len; |
627 | } |
628 | pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; |
629 | break; |
630 | } |
631 | BYTECODE(CHECK_AT_START) |
632 | if (current == 0) { |
633 | pc = code_base + Load32Aligned(pc + 4); |
634 | } else { |
635 | pc += BC_CHECK_AT_START_LENGTH; |
636 | } |
637 | break; |
638 | BYTECODE(CHECK_NOT_AT_START) |
639 | if (current + (insn >> BYTECODE_SHIFT) == 0) { |
640 | pc += BC_CHECK_NOT_AT_START_LENGTH; |
641 | } else { |
642 | pc = code_base + Load32Aligned(pc + 4); |
643 | } |
644 | break; |
645 | BYTECODE(SET_CURRENT_POSITION_FROM_END) { |
646 | int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT; |
647 | if (subject.length() - current > by) { |
648 | current = subject.length() - by; |
649 | current_char = subject[current - 1]; |
650 | } |
651 | pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; |
652 | break; |
653 | } |
654 | default: |
655 | UNREACHABLE(); |
656 | break; |
657 | } |
658 | } |
659 | } |
660 | |
661 | } // namespace |
662 | |
663 | // static |
664 | IrregexpInterpreter::Result IrregexpInterpreter::Match( |
665 | Isolate* isolate, Handle<ByteArray> code_array, |
666 | Handle<String> subject_string, int* registers, int start_position) { |
667 | DCHECK(subject_string->IsFlat()); |
668 | |
669 | // Note: Heap allocation *is* allowed in two situations: |
670 | // 1. When creating & throwing a stack overflow exception. The interpreter |
671 | // aborts afterwards, and thus possible-moved objects are never used. |
672 | // 2. When handling interrupts. We manually relocate unhandlified references |
673 | // after interrupts have run. |
674 | DisallowHeapAllocation no_gc; |
675 | |
676 | uc16 previous_char = '\n'; |
677 | String::FlatContent subject_content = subject_string->GetFlatContent(no_gc); |
678 | if (subject_content.IsOneByte()) { |
679 | Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
680 | if (start_position != 0) previous_char = subject_vector[start_position - 1]; |
681 | return RawMatch(isolate, code_array, subject_string, subject_vector, |
682 | registers, start_position, previous_char); |
683 | } else { |
684 | DCHECK(subject_content.IsTwoByte()); |
685 | Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
686 | if (start_position != 0) previous_char = subject_vector[start_position - 1]; |
687 | return RawMatch(isolate, code_array, subject_string, subject_vector, |
688 | registers, start_position, previous_char); |
689 | } |
690 | } |
691 | |
692 | } // namespace internal |
693 | } // namespace v8 |
694 | |