1 | // Copyright 2012 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #if V8_TARGET_ARCH_X64 |
6 | |
7 | #include "src/regexp/x64/regexp-macro-assembler-x64.h" |
8 | |
9 | #include "src/heap/factory.h" |
10 | #include "src/log.h" |
11 | #include "src/macro-assembler.h" |
12 | #include "src/objects-inl.h" |
13 | #include "src/regexp/regexp-macro-assembler.h" |
14 | #include "src/regexp/regexp-stack.h" |
15 | #include "src/unicode.h" |
16 | |
17 | namespace v8 { |
18 | namespace internal { |
19 | |
20 | /* |
21 | * This assembler uses the following register assignment convention |
22 | * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded |
23 | * using LoadCurrentCharacter before using any of the dispatch methods. |
24 | * Temporarily stores the index of capture start after a matching pass |
25 | * for a global regexp. |
26 | * - rdi : Current position in input, as negative offset from end of string. |
27 | * Please notice that this is the byte offset, not the character |
28 | * offset! Is always a 32-bit signed (negative) offset, but must be |
29 | * maintained sign-extended to 64 bits, since it is used as index. |
30 | * - rsi : End of input (points to byte after last character in input), |
31 | * so that rsi+rdi points to the current character. |
32 | * - rbp : Frame pointer. Used to access arguments, local variables and |
33 | * RegExp registers. |
34 | * - rsp : Points to tip of C stack. |
35 | * - rcx : Points to tip of backtrack stack. The backtrack stack contains |
36 | * only 32-bit values. Most are offsets from some base (e.g., character |
37 | * positions from end of string or code location from Code pointer). |
38 | * - r8 : Code object pointer. Used to convert between absolute and |
39 | * code-object-relative addresses. |
40 | * |
41 | * The registers rax, rbx, r9 and r11 are free to use for computations. |
42 | * If changed to use r12+, they should be saved as callee-save registers. |
43 | * The macro assembler special register r13 (kRootRegister) isn't special |
44 | * during execution of RegExp code (it doesn't hold the value assumed when |
45 | * creating JS code), so Root related macro operations can be used. |
46 | * |
47 | * Each call to a C++ method should retain these registers. |
48 | * |
49 | * The stack will have the following content, in some order, indexable from the |
50 | * frame pointer (see, e.g., kStackHighEnd): |
51 | * - Isolate* isolate (address of the current isolate) |
52 | * - direct_call (if 1, direct call from JavaScript code, if 0 call |
53 | * through the runtime system) |
54 | * - stack_area_base (high end of the memory area to use as |
55 | * backtracking stack) |
56 | * - capture array size (may fit multiple sets of matches) |
57 | * - int* capture_array (int[num_saved_registers_], for output). |
58 | * - end of input (address of end of string) |
59 | * - start of input (address of first character in string) |
60 | * - start index (character index of start) |
61 | * - String input_string (input string) |
62 | * - return address |
63 | * - backup of callee save registers (rbx, possibly rsi and rdi). |
64 | * - success counter (only useful for global regexp to count matches) |
65 | * - Offset of location before start of input (effectively character |
66 | * string start - 1). Used to initialize capture registers to a |
67 | * non-position. |
68 | * - At start of string (if 1, we are starting at the start of the |
69 | * string, otherwise 0) |
70 | * - register 0 rbp[-n] (Only positions must be stored in the first |
71 | * - register 1 rbp[-n-8] num_saved_registers_ registers) |
72 | * - ... |
73 | * |
74 | * The first num_saved_registers_ registers are initialized to point to |
75 | * "character -1" in the string (i.e., char_size() bytes before the first |
76 | * character of the string). The remaining registers starts out uninitialized. |
77 | * |
78 | * The first seven values must be provided by the calling code by |
79 | * calling the code's entry address cast to a function pointer with the |
80 | * following signature: |
81 | * int (*match)(String input_string, |
82 | * int start_index, |
83 | * Address start, |
84 | * Address end, |
85 | * int* capture_output_array, |
86 | * int num_capture_registers, |
87 | * byte* stack_area_base, |
88 | * bool direct_call = false, |
89 | * Isolate* isolate); |
90 | */ |
91 | |
92 | #define __ ACCESS_MASM((&masm_)) |
93 | |
94 | const int RegExpMacroAssemblerX64::kRegExpCodeSize; |
95 | |
96 | RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(Isolate* isolate, Zone* zone, |
97 | Mode mode, |
98 | int registers_to_save) |
99 | : NativeRegExpMacroAssembler(isolate, zone), |
100 | masm_(isolate, CodeObjectRequired::kYes, |
101 | NewAssemblerBuffer(kRegExpCodeSize)), |
102 | no_root_array_scope_(&masm_), |
103 | code_relative_fixup_positions_(zone), |
104 | mode_(mode), |
105 | num_registers_(registers_to_save), |
106 | num_saved_registers_(registers_to_save), |
107 | entry_label_(), |
108 | start_label_(), |
109 | success_label_(), |
110 | backtrack_label_(), |
111 | exit_label_() { |
112 | DCHECK_EQ(0, registers_to_save % 2); |
113 | __ jmp(&entry_label_); // We'll write the entry code when we know more. |
114 | __ bind(&start_label_); // And then continue from here. |
115 | } |
116 | |
117 | RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() { |
118 | // Unuse labels in case we throw away the assembler without calling GetCode. |
119 | entry_label_.Unuse(); |
120 | start_label_.Unuse(); |
121 | success_label_.Unuse(); |
122 | backtrack_label_.Unuse(); |
123 | exit_label_.Unuse(); |
124 | check_preempt_label_.Unuse(); |
125 | stack_overflow_label_.Unuse(); |
126 | } |
127 | |
128 | |
129 | int RegExpMacroAssemblerX64::stack_limit_slack() { |
130 | return RegExpStack::kStackLimitSlack; |
131 | } |
132 | |
133 | |
134 | void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) { |
135 | if (by != 0) { |
136 | __ addq(rdi, Immediate(by * char_size())); |
137 | } |
138 | } |
139 | |
140 | |
141 | void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) { |
142 | DCHECK_LE(0, reg); |
143 | DCHECK_GT(num_registers_, reg); |
144 | if (by != 0) { |
145 | __ addq(register_location(reg), Immediate(by)); |
146 | } |
147 | } |
148 | |
149 | |
150 | void RegExpMacroAssemblerX64::Backtrack() { |
151 | CheckPreemption(); |
152 | // Pop Code offset from backtrack stack, add Code and jump to location. |
153 | Pop(rbx); |
154 | __ addq(rbx, code_object_pointer()); |
155 | __ jmp(rbx); |
156 | } |
157 | |
158 | |
159 | void RegExpMacroAssemblerX64::Bind(Label* label) { |
160 | __ bind(label); |
161 | } |
162 | |
163 | |
164 | void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) { |
165 | __ cmpl(current_character(), Immediate(c)); |
166 | BranchOrBacktrack(equal, on_equal); |
167 | } |
168 | |
169 | |
170 | void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { |
171 | __ cmpl(current_character(), Immediate(limit)); |
172 | BranchOrBacktrack(greater, on_greater); |
173 | } |
174 | |
175 | |
176 | void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { |
177 | __ leaq(rax, Operand(rdi, -char_size())); |
178 | __ cmpq(rax, Operand(rbp, kStringStartMinusOne)); |
179 | BranchOrBacktrack(equal, on_at_start); |
180 | } |
181 | |
182 | |
183 | void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset, |
184 | Label* on_not_at_start) { |
185 | __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size())); |
186 | __ cmpq(rax, Operand(rbp, kStringStartMinusOne)); |
187 | BranchOrBacktrack(not_equal, on_not_at_start); |
188 | } |
189 | |
190 | |
191 | void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) { |
192 | __ cmpl(current_character(), Immediate(limit)); |
193 | BranchOrBacktrack(less, on_less); |
194 | } |
195 | |
196 | |
197 | void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) { |
198 | Label fallthrough; |
199 | __ cmpl(rdi, Operand(backtrack_stackpointer(), 0)); |
200 | __ j(not_equal, &fallthrough); |
201 | Drop(); |
202 | BranchOrBacktrack(no_condition, on_equal); |
203 | __ bind(&fallthrough); |
204 | } |
205 | |
206 | |
207 | void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( |
208 | int start_reg, bool read_backward, bool unicode, Label* on_no_match) { |
209 | Label fallthrough; |
210 | ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture |
211 | ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture |
212 | __ subq(rbx, rdx); // Length of capture. |
213 | |
214 | // ----------------------- |
215 | // rdx = Start offset of capture. |
216 | // rbx = Length of capture |
217 | |
218 | // At this point, the capture registers are either both set or both cleared. |
219 | // If the capture length is zero, then the capture is either empty or cleared. |
220 | // Fall through in both cases. |
221 | __ j(equal, &fallthrough); |
222 | |
223 | // ----------------------- |
224 | // rdx - Start of capture |
225 | // rbx - length of capture |
226 | // Check that there are sufficient characters left in the input. |
227 | if (read_backward) { |
228 | __ movl(rax, Operand(rbp, kStringStartMinusOne)); |
229 | __ addl(rax, rbx); |
230 | __ cmpl(rdi, rax); |
231 | BranchOrBacktrack(less_equal, on_no_match); |
232 | } else { |
233 | __ movl(rax, rdi); |
234 | __ addl(rax, rbx); |
235 | BranchOrBacktrack(greater, on_no_match); |
236 | } |
237 | |
238 | if (mode_ == LATIN1) { |
239 | Label loop_increment; |
240 | if (on_no_match == nullptr) { |
241 | on_no_match = &backtrack_label_; |
242 | } |
243 | |
244 | __ leaq(r9, Operand(rsi, rdx, times_1, 0)); |
245 | __ leaq(r11, Operand(rsi, rdi, times_1, 0)); |
246 | if (read_backward) { |
247 | __ subq(r11, rbx); // Offset by length when matching backwards. |
248 | } |
249 | __ addq(rbx, r9); // End of capture |
250 | // --------------------- |
251 | // r11 - current input character address |
252 | // r9 - current capture character address |
253 | // rbx - end of capture |
254 | |
255 | Label loop; |
256 | __ bind(&loop); |
257 | __ movzxbl(rdx, Operand(r9, 0)); |
258 | __ movzxbl(rax, Operand(r11, 0)); |
259 | // al - input character |
260 | // dl - capture character |
261 | __ cmpb(rax, rdx); |
262 | __ j(equal, &loop_increment); |
263 | |
264 | // Mismatch, try case-insensitive match (converting letters to lower-case). |
265 | // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's |
266 | // a match. |
267 | __ orq(rax, Immediate(0x20)); // Convert match character to lower-case. |
268 | __ orq(rdx, Immediate(0x20)); // Convert capture character to lower-case. |
269 | __ cmpb(rax, rdx); |
270 | __ j(not_equal, on_no_match); // Definitely not equal. |
271 | __ subb(rax, Immediate('a')); |
272 | __ cmpb(rax, Immediate('z' - 'a')); |
273 | __ j(below_equal, &loop_increment); // In range 'a'-'z'. |
274 | // Latin-1: Check for values in range [224,254] but not 247. |
275 | __ subb(rax, Immediate(224 - 'a')); |
276 | __ cmpb(rax, Immediate(254 - 224)); |
277 | __ j(above, on_no_match); // Weren't Latin-1 letters. |
278 | __ cmpb(rax, Immediate(247 - 224)); // Check for 247. |
279 | __ j(equal, on_no_match); |
280 | __ bind(&loop_increment); |
281 | // Increment pointers into match and capture strings. |
282 | __ addq(r11, Immediate(1)); |
283 | __ addq(r9, Immediate(1)); |
284 | // Compare to end of capture, and loop if not done. |
285 | __ cmpq(r9, rbx); |
286 | __ j(below, &loop); |
287 | |
288 | // Compute new value of character position after the matched part. |
289 | __ movq(rdi, r11); |
290 | __ subq(rdi, rsi); |
291 | if (read_backward) { |
292 | // Subtract match length if we matched backward. |
293 | __ addq(rdi, register_location(start_reg)); |
294 | __ subq(rdi, register_location(start_reg + 1)); |
295 | } |
296 | } else { |
297 | DCHECK(mode_ == UC16); |
298 | // Save important/volatile registers before calling C function. |
299 | #ifndef _WIN64 |
300 | // Caller save on Linux and callee save in Windows. |
301 | __ pushq(rsi); |
302 | __ pushq(rdi); |
303 | #endif |
304 | __ pushq(backtrack_stackpointer()); |
305 | |
306 | static const int num_arguments = 4; |
307 | __ PrepareCallCFunction(num_arguments); |
308 | |
309 | // Put arguments into parameter registers. Parameters are |
310 | // Address byte_offset1 - Address captured substring's start. |
311 | // Address byte_offset2 - Address of current character position. |
312 | // size_t byte_length - length of capture in bytes(!) |
313 | // Isolate* isolate or 0 if unicode flag. |
314 | #ifdef _WIN64 |
315 | DCHECK(rcx == arg_reg_1); |
316 | DCHECK(rdx == arg_reg_2); |
317 | // Compute and set byte_offset1 (start of capture). |
318 | __ leaq(rcx, Operand(rsi, rdx, times_1, 0)); |
319 | // Set byte_offset2. |
320 | __ leaq(rdx, Operand(rsi, rdi, times_1, 0)); |
321 | if (read_backward) { |
322 | __ subq(rdx, rbx); |
323 | } |
324 | #else // AMD64 calling convention |
325 | DCHECK(rdi == arg_reg_1); |
326 | DCHECK(rsi == arg_reg_2); |
327 | // Compute byte_offset2 (current position = rsi+rdi). |
328 | __ leaq(rax, Operand(rsi, rdi, times_1, 0)); |
329 | // Compute and set byte_offset1 (start of capture). |
330 | __ leaq(rdi, Operand(rsi, rdx, times_1, 0)); |
331 | // Set byte_offset2. |
332 | __ movq(rsi, rax); |
333 | if (read_backward) { |
334 | __ subq(rsi, rbx); |
335 | } |
336 | #endif // _WIN64 |
337 | |
338 | // Set byte_length. |
339 | __ movq(arg_reg_3, rbx); |
340 | // Isolate. |
341 | #ifdef V8_INTL_SUPPORT |
342 | if (unicode) { |
343 | __ movq(arg_reg_4, Immediate(0)); |
344 | } else // NOLINT |
345 | #endif // V8_INTL_SUPPORT |
346 | { |
347 | __ LoadAddress(arg_reg_4, ExternalReference::isolate_address(isolate())); |
348 | } |
349 | |
350 | { // NOLINT: Can't find a way to open this scope without confusing the |
351 | // linter. |
352 | AllowExternalCallThatCantCauseGC scope(&masm_); |
353 | ExternalReference compare = |
354 | ExternalReference::re_case_insensitive_compare_uc16(isolate()); |
355 | __ CallCFunction(compare, num_arguments); |
356 | } |
357 | |
358 | // Restore original values before reacting on result value. |
359 | __ Move(code_object_pointer(), masm_.CodeObject()); |
360 | __ popq(backtrack_stackpointer()); |
361 | #ifndef _WIN64 |
362 | __ popq(rdi); |
363 | __ popq(rsi); |
364 | #endif |
365 | |
366 | // Check if function returned non-zero for success or zero for failure. |
367 | __ testq(rax, rax); |
368 | BranchOrBacktrack(zero, on_no_match); |
369 | // On success, advance position by length of capture. |
370 | // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs). |
371 | if (read_backward) { |
372 | __ subq(rdi, rbx); |
373 | } else { |
374 | __ addq(rdi, rbx); |
375 | } |
376 | } |
377 | __ bind(&fallthrough); |
378 | } |
379 | |
380 | |
381 | void RegExpMacroAssemblerX64::CheckNotBackReference(int start_reg, |
382 | bool read_backward, |
383 | Label* on_no_match) { |
384 | Label fallthrough; |
385 | |
386 | // Find length of back-referenced capture. |
387 | ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture |
388 | ReadPositionFromRegister(rax, start_reg + 1); // Offset of end of capture |
389 | __ subq(rax, rdx); // Length to check. |
390 | |
391 | // At this point, the capture registers are either both set or both cleared. |
392 | // If the capture length is zero, then the capture is either empty or cleared. |
393 | // Fall through in both cases. |
394 | __ j(equal, &fallthrough); |
395 | |
396 | // ----------------------- |
397 | // rdx - Start of capture |
398 | // rax - length of capture |
399 | // Check that there are sufficient characters left in the input. |
400 | if (read_backward) { |
401 | __ movl(rbx, Operand(rbp, kStringStartMinusOne)); |
402 | __ addl(rbx, rax); |
403 | __ cmpl(rdi, rbx); |
404 | BranchOrBacktrack(less_equal, on_no_match); |
405 | } else { |
406 | __ movl(rbx, rdi); |
407 | __ addl(rbx, rax); |
408 | BranchOrBacktrack(greater, on_no_match); |
409 | } |
410 | |
411 | // Compute pointers to match string and capture string |
412 | __ leaq(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match. |
413 | if (read_backward) { |
414 | __ subq(rbx, rax); // Offset by length when matching backwards. |
415 | } |
416 | __ addq(rdx, rsi); // Start of capture. |
417 | __ leaq(r9, Operand(rdx, rax, times_1, 0)); // End of capture |
418 | |
419 | // ----------------------- |
420 | // rbx - current capture character address. |
421 | // rbx - current input character address . |
422 | // r9 - end of input to match (capture length after rbx). |
423 | |
424 | Label loop; |
425 | __ bind(&loop); |
426 | if (mode_ == LATIN1) { |
427 | __ movzxbl(rax, Operand(rdx, 0)); |
428 | __ cmpb(rax, Operand(rbx, 0)); |
429 | } else { |
430 | DCHECK(mode_ == UC16); |
431 | __ movzxwl(rax, Operand(rdx, 0)); |
432 | __ cmpw(rax, Operand(rbx, 0)); |
433 | } |
434 | BranchOrBacktrack(not_equal, on_no_match); |
435 | // Increment pointers into capture and match string. |
436 | __ addq(rbx, Immediate(char_size())); |
437 | __ addq(rdx, Immediate(char_size())); |
438 | // Check if we have reached end of match area. |
439 | __ cmpq(rdx, r9); |
440 | __ j(below, &loop); |
441 | |
442 | // Success. |
443 | // Set current character position to position after match. |
444 | __ movq(rdi, rbx); |
445 | __ subq(rdi, rsi); |
446 | if (read_backward) { |
447 | // Subtract match length if we matched backward. |
448 | __ addq(rdi, register_location(start_reg)); |
449 | __ subq(rdi, register_location(start_reg + 1)); |
450 | } |
451 | |
452 | __ bind(&fallthrough); |
453 | } |
454 | |
455 | |
456 | void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c, |
457 | Label* on_not_equal) { |
458 | __ cmpl(current_character(), Immediate(c)); |
459 | BranchOrBacktrack(not_equal, on_not_equal); |
460 | } |
461 | |
462 | |
463 | void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c, |
464 | uint32_t mask, |
465 | Label* on_equal) { |
466 | if (c == 0) { |
467 | __ testl(current_character(), Immediate(mask)); |
468 | } else { |
469 | __ movl(rax, Immediate(mask)); |
470 | __ andq(rax, current_character()); |
471 | __ cmpl(rax, Immediate(c)); |
472 | } |
473 | BranchOrBacktrack(equal, on_equal); |
474 | } |
475 | |
476 | |
477 | void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c, |
478 | uint32_t mask, |
479 | Label* on_not_equal) { |
480 | if (c == 0) { |
481 | __ testl(current_character(), Immediate(mask)); |
482 | } else { |
483 | __ movl(rax, Immediate(mask)); |
484 | __ andq(rax, current_character()); |
485 | __ cmpl(rax, Immediate(c)); |
486 | } |
487 | BranchOrBacktrack(not_equal, on_not_equal); |
488 | } |
489 | |
490 | |
491 | void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd( |
492 | uc16 c, |
493 | uc16 minus, |
494 | uc16 mask, |
495 | Label* on_not_equal) { |
496 | DCHECK_GT(String::kMaxUtf16CodeUnit, minus); |
497 | __ leal(rax, Operand(current_character(), -minus)); |
498 | __ andl(rax, Immediate(mask)); |
499 | __ cmpl(rax, Immediate(c)); |
500 | BranchOrBacktrack(not_equal, on_not_equal); |
501 | } |
502 | |
503 | |
504 | void RegExpMacroAssemblerX64::CheckCharacterInRange( |
505 | uc16 from, |
506 | uc16 to, |
507 | Label* on_in_range) { |
508 | __ leal(rax, Operand(current_character(), -from)); |
509 | __ cmpl(rax, Immediate(to - from)); |
510 | BranchOrBacktrack(below_equal, on_in_range); |
511 | } |
512 | |
513 | |
514 | void RegExpMacroAssemblerX64::CheckCharacterNotInRange( |
515 | uc16 from, |
516 | uc16 to, |
517 | Label* on_not_in_range) { |
518 | __ leal(rax, Operand(current_character(), -from)); |
519 | __ cmpl(rax, Immediate(to - from)); |
520 | BranchOrBacktrack(above, on_not_in_range); |
521 | } |
522 | |
523 | |
524 | void RegExpMacroAssemblerX64::CheckBitInTable( |
525 | Handle<ByteArray> table, |
526 | Label* on_bit_set) { |
527 | __ Move(rax, table); |
528 | Register index = current_character(); |
529 | if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { |
530 | __ movq(rbx, current_character()); |
531 | __ andq(rbx, Immediate(kTableMask)); |
532 | index = rbx; |
533 | } |
534 | __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize), |
535 | Immediate(0)); |
536 | BranchOrBacktrack(not_equal, on_bit_set); |
537 | } |
538 | |
539 | |
540 | bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, |
541 | Label* on_no_match) { |
542 | // Range checks (c in min..max) are generally implemented by an unsigned |
543 | // (c - min) <= (max - min) check, using the sequence: |
544 | // leal(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) |
545 | // cmpl(rax, Immediate(max - min)) |
546 | switch (type) { |
547 | case 's': |
548 | // Match space-characters |
549 | if (mode_ == LATIN1) { |
550 | // One byte space characters are '\t'..'\r', ' ' and \u00a0. |
551 | Label success; |
552 | __ cmpl(current_character(), Immediate(' ')); |
553 | __ j(equal, &success, Label::kNear); |
554 | // Check range 0x09..0x0D |
555 | __ leal(rax, Operand(current_character(), -'\t')); |
556 | __ cmpl(rax, Immediate('\r' - '\t')); |
557 | __ j(below_equal, &success, Label::kNear); |
558 | // \u00a0 (NBSP). |
559 | __ cmpl(rax, Immediate(0x00A0 - '\t')); |
560 | BranchOrBacktrack(not_equal, on_no_match); |
561 | __ bind(&success); |
562 | return true; |
563 | } |
564 | return false; |
565 | case 'S': |
566 | // The emitted code for generic character classes is good enough. |
567 | return false; |
568 | case 'd': |
569 | // Match ASCII digits ('0'..'9') |
570 | __ leal(rax, Operand(current_character(), -'0')); |
571 | __ cmpl(rax, Immediate('9' - '0')); |
572 | BranchOrBacktrack(above, on_no_match); |
573 | return true; |
574 | case 'D': |
575 | // Match non ASCII-digits |
576 | __ leal(rax, Operand(current_character(), -'0')); |
577 | __ cmpl(rax, Immediate('9' - '0')); |
578 | BranchOrBacktrack(below_equal, on_no_match); |
579 | return true; |
580 | case '.': { |
581 | // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) |
582 | __ movl(rax, current_character()); |
583 | __ xorl(rax, Immediate(0x01)); |
584 | // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C |
585 | __ subl(rax, Immediate(0x0B)); |
586 | __ cmpl(rax, Immediate(0x0C - 0x0B)); |
587 | BranchOrBacktrack(below_equal, on_no_match); |
588 | if (mode_ == UC16) { |
589 | // Compare original value to 0x2028 and 0x2029, using the already |
590 | // computed (current_char ^ 0x01 - 0x0B). I.e., check for |
591 | // 0x201D (0x2028 - 0x0B) or 0x201E. |
592 | __ subl(rax, Immediate(0x2028 - 0x0B)); |
593 | __ cmpl(rax, Immediate(0x2029 - 0x2028)); |
594 | BranchOrBacktrack(below_equal, on_no_match); |
595 | } |
596 | return true; |
597 | } |
598 | case 'n': { |
599 | // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) |
600 | __ movl(rax, current_character()); |
601 | __ xorl(rax, Immediate(0x01)); |
602 | // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C |
603 | __ subl(rax, Immediate(0x0B)); |
604 | __ cmpl(rax, Immediate(0x0C - 0x0B)); |
605 | if (mode_ == LATIN1) { |
606 | BranchOrBacktrack(above, on_no_match); |
607 | } else { |
608 | Label done; |
609 | BranchOrBacktrack(below_equal, &done); |
610 | // Compare original value to 0x2028 and 0x2029, using the already |
611 | // computed (current_char ^ 0x01 - 0x0B). I.e., check for |
612 | // 0x201D (0x2028 - 0x0B) or 0x201E. |
613 | __ subl(rax, Immediate(0x2028 - 0x0B)); |
614 | __ cmpl(rax, Immediate(0x2029 - 0x2028)); |
615 | BranchOrBacktrack(above, on_no_match); |
616 | __ bind(&done); |
617 | } |
618 | return true; |
619 | } |
620 | case 'w': { |
621 | if (mode_ != LATIN1) { |
622 | // Table is 256 entries, so all Latin1 characters can be tested. |
623 | __ cmpl(current_character(), Immediate('z')); |
624 | BranchOrBacktrack(above, on_no_match); |
625 | } |
626 | __ Move(rbx, ExternalReference::re_word_character_map(isolate())); |
627 | DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. |
628 | __ testb(Operand(rbx, current_character(), times_1, 0), |
629 | current_character()); |
630 | BranchOrBacktrack(zero, on_no_match); |
631 | return true; |
632 | } |
633 | case 'W': { |
634 | Label done; |
635 | if (mode_ != LATIN1) { |
636 | // Table is 256 entries, so all Latin1 characters can be tested. |
637 | __ cmpl(current_character(), Immediate('z')); |
638 | __ j(above, &done); |
639 | } |
640 | __ Move(rbx, ExternalReference::re_word_character_map(isolate())); |
641 | DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char. |
642 | __ testb(Operand(rbx, current_character(), times_1, 0), |
643 | current_character()); |
644 | BranchOrBacktrack(not_zero, on_no_match); |
645 | if (mode_ != LATIN1) { |
646 | __ bind(&done); |
647 | } |
648 | return true; |
649 | } |
650 | |
651 | case '*': |
652 | // Match any character. |
653 | return true; |
654 | // No custom implementation (yet): s(UC16), S(UC16). |
655 | default: |
656 | return false; |
657 | } |
658 | } |
659 | |
660 | |
661 | void RegExpMacroAssemblerX64::Fail() { |
662 | STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero. |
663 | if (!global()) { |
664 | __ Set(rax, FAILURE); |
665 | } |
666 | __ jmp(&exit_label_); |
667 | } |
668 | |
669 | |
670 | Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
671 | Label return_rax; |
672 | // Finalize code - write the entry point code now we know how many |
673 | // registers we need. |
674 | // Entry code: |
675 | __ bind(&entry_label_); |
676 | |
677 | // Tell the system that we have a stack frame. Because the type is MANUAL, no |
678 | // is generated. |
679 | FrameScope scope(&masm_, StackFrame::MANUAL); |
680 | |
681 | // Actually emit code to start a new stack frame. |
682 | __ pushq(rbp); |
683 | __ movq(rbp, rsp); |
684 | // Save parameters and callee-save registers. Order here should correspond |
685 | // to order of kBackup_ebx etc. |
686 | #ifdef _WIN64 |
687 | // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots. |
688 | // Store register parameters in pre-allocated stack slots, |
689 | __ movq(Operand(rbp, kInputString), rcx); |
690 | __ movq(Operand(rbp, kStartIndex), rdx); // Passed as int32 in edx. |
691 | __ movq(Operand(rbp, kInputStart), r8); |
692 | __ movq(Operand(rbp, kInputEnd), r9); |
693 | // Callee-save on Win64. |
694 | __ pushq(rsi); |
695 | __ pushq(rdi); |
696 | __ pushq(rbx); |
697 | #else |
698 | // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack). |
699 | // Push register parameters on stack for reference. |
700 | DCHECK_EQ(kInputString, -1 * kSystemPointerSize); |
701 | DCHECK_EQ(kStartIndex, -2 * kSystemPointerSize); |
702 | DCHECK_EQ(kInputStart, -3 * kSystemPointerSize); |
703 | DCHECK_EQ(kInputEnd, -4 * kSystemPointerSize); |
704 | DCHECK_EQ(kRegisterOutput, -5 * kSystemPointerSize); |
705 | DCHECK_EQ(kNumOutputRegisters, -6 * kSystemPointerSize); |
706 | __ pushq(rdi); |
707 | __ pushq(rsi); |
708 | __ pushq(rdx); |
709 | __ pushq(rcx); |
710 | __ pushq(r8); |
711 | __ pushq(r9); |
712 | |
713 | __ pushq(rbx); // Callee-save |
714 | #endif |
715 | |
716 | __ Push(Immediate(0)); // Number of successful matches in a global regexp. |
717 | __ Push(Immediate(0)); // Make room for "string start - 1" constant. |
718 | |
719 | // Check if we have space on the stack for registers. |
720 | Label stack_limit_hit; |
721 | Label stack_ok; |
722 | |
723 | ExternalReference stack_limit = |
724 | ExternalReference::address_of_stack_limit(isolate()); |
725 | __ movq(rcx, rsp); |
726 | __ Move(kScratchRegister, stack_limit); |
727 | __ subq(rcx, Operand(kScratchRegister, 0)); |
728 | // Handle it if the stack pointer is already below the stack limit. |
729 | __ j(below_equal, &stack_limit_hit); |
730 | // Check if there is room for the variable number of registers above |
731 | // the stack limit. |
732 | __ cmpq(rcx, Immediate(num_registers_ * kSystemPointerSize)); |
733 | __ j(above_equal, &stack_ok); |
734 | // Exit with OutOfMemory exception. There is not enough space on the stack |
735 | // for our working registers. |
736 | __ Set(rax, EXCEPTION); |
737 | __ jmp(&return_rax); |
738 | |
739 | __ bind(&stack_limit_hit); |
740 | __ Move(code_object_pointer(), masm_.CodeObject()); |
741 | CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp. |
742 | __ testq(rax, rax); |
743 | // If returned value is non-zero, we exit with the returned value as result. |
744 | __ j(not_zero, &return_rax); |
745 | |
746 | __ bind(&stack_ok); |
747 | |
748 | // Allocate space on stack for registers. |
749 | __ subq(rsp, Immediate(num_registers_ * kSystemPointerSize)); |
750 | // Load string length. |
751 | __ movq(rsi, Operand(rbp, kInputEnd)); |
752 | // Load input position. |
753 | __ movq(rdi, Operand(rbp, kInputStart)); |
754 | // Set up rdi to be negative offset from string end. |
755 | __ subq(rdi, rsi); |
756 | // Set rax to address of char before start of the string |
757 | // (effectively string position -1). |
758 | __ movq(rbx, Operand(rbp, kStartIndex)); |
759 | __ negq(rbx); |
760 | if (mode_ == UC16) { |
761 | __ leaq(rax, Operand(rdi, rbx, times_2, -char_size())); |
762 | } else { |
763 | __ leaq(rax, Operand(rdi, rbx, times_1, -char_size())); |
764 | } |
765 | // Store this value in a local variable, for use when clearing |
766 | // position registers. |
767 | __ movq(Operand(rbp, kStringStartMinusOne), rax); |
768 | |
769 | #if V8_OS_WIN |
770 | // Ensure that we have written to each stack page, in order. Skipping a page |
771 | // on Windows can cause segmentation faults. Assuming page size is 4k. |
772 | const int kPageSize = 4096; |
773 | const int kRegistersPerPage = kPageSize / kSystemPointerSize; |
774 | for (int i = num_saved_registers_ + kRegistersPerPage - 1; |
775 | i < num_registers_; |
776 | i += kRegistersPerPage) { |
777 | __ movq(register_location(i), rax); // One write every page. |
778 | } |
779 | #endif // V8_OS_WIN |
780 | |
781 | // Initialize code object pointer. |
782 | __ Move(code_object_pointer(), masm_.CodeObject()); |
783 | |
784 | Label load_char_start_regexp, start_regexp; |
785 | // Load newline if index is at start, previous character otherwise. |
786 | __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); |
787 | __ j(not_equal, &load_char_start_regexp, Label::kNear); |
788 | __ Set(current_character(), '\n'); |
789 | __ jmp(&start_regexp, Label::kNear); |
790 | |
791 | // Global regexp restarts matching here. |
792 | __ bind(&load_char_start_regexp); |
793 | // Load previous char as initial value of current character register. |
794 | LoadCurrentCharacterUnchecked(-1, 1); |
795 | __ bind(&start_regexp); |
796 | |
797 | // Initialize on-stack registers. |
798 | if (num_saved_registers_ > 0) { |
799 | // Fill saved registers with initial value = start offset - 1 |
800 | // Fill in stack push order, to avoid accessing across an unwritten |
801 | // page (a problem on Windows). |
802 | if (num_saved_registers_ > 8) { |
803 | __ Set(rcx, kRegisterZero); |
804 | Label init_loop; |
805 | __ bind(&init_loop); |
806 | __ movq(Operand(rbp, rcx, times_1, 0), rax); |
807 | __ subq(rcx, Immediate(kSystemPointerSize)); |
808 | __ cmpq(rcx, Immediate(kRegisterZero - |
809 | num_saved_registers_ * kSystemPointerSize)); |
810 | __ j(greater, &init_loop); |
811 | } else { // Unroll the loop. |
812 | for (int i = 0; i < num_saved_registers_; i++) { |
813 | __ movq(register_location(i), rax); |
814 | } |
815 | } |
816 | } |
817 | |
818 | // Initialize backtrack stack pointer. |
819 | __ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); |
820 | |
821 | __ jmp(&start_label_); |
822 | |
823 | // Exit code: |
824 | if (success_label_.is_linked()) { |
825 | // Save captures when successful. |
826 | __ bind(&success_label_); |
827 | if (num_saved_registers_ > 0) { |
828 | // copy captures to output |
829 | __ movq(rdx, Operand(rbp, kStartIndex)); |
830 | __ movq(rbx, Operand(rbp, kRegisterOutput)); |
831 | __ movq(rcx, Operand(rbp, kInputEnd)); |
832 | __ subq(rcx, Operand(rbp, kInputStart)); |
833 | if (mode_ == UC16) { |
834 | __ leaq(rcx, Operand(rcx, rdx, times_2, 0)); |
835 | } else { |
836 | __ addq(rcx, rdx); |
837 | } |
838 | for (int i = 0; i < num_saved_registers_; i++) { |
839 | __ movq(rax, register_location(i)); |
840 | if (i == 0 && global_with_zero_length_check()) { |
841 | // Keep capture start in rdx for the zero-length check later. |
842 | __ movq(rdx, rax); |
843 | } |
844 | __ addq(rax, rcx); // Convert to index from start, not end. |
845 | if (mode_ == UC16) { |
846 | __ sarq(rax, Immediate(1)); // Convert byte index to character index. |
847 | } |
848 | __ movl(Operand(rbx, i * kIntSize), rax); |
849 | } |
850 | } |
851 | |
852 | if (global()) { |
853 | // Restart matching if the regular expression is flagged as global. |
854 | // Increment success counter. |
855 | __ incq(Operand(rbp, kSuccessfulCaptures)); |
856 | // Capture results have been stored, so the number of remaining global |
857 | // output registers is reduced by the number of stored captures. |
858 | __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters)); |
859 | __ subq(rcx, Immediate(num_saved_registers_)); |
860 | // Check whether we have enough room for another set of capture results. |
861 | __ cmpq(rcx, Immediate(num_saved_registers_)); |
862 | __ j(less, &exit_label_); |
863 | |
864 | __ movq(Operand(rbp, kNumOutputRegisters), rcx); |
865 | // Advance the location for output. |
866 | __ addq(Operand(rbp, kRegisterOutput), |
867 | Immediate(num_saved_registers_ * kIntSize)); |
868 | |
869 | // Prepare rax to initialize registers with its value in the next run. |
870 | __ movq(rax, Operand(rbp, kStringStartMinusOne)); |
871 | |
872 | if (global_with_zero_length_check()) { |
873 | // Special case for zero-length matches. |
874 | // rdx: capture start index |
875 | __ cmpq(rdi, rdx); |
876 | // Not a zero-length match, restart. |
877 | __ j(not_equal, &load_char_start_regexp); |
878 | // rdi (offset from the end) is zero if we already reached the end. |
879 | __ testq(rdi, rdi); |
880 | __ j(zero, &exit_label_, Label::kNear); |
881 | // Advance current position after a zero-length match. |
882 | Label advance; |
883 | __ bind(&advance); |
884 | if (mode_ == UC16) { |
885 | __ addq(rdi, Immediate(2)); |
886 | } else { |
887 | __ incq(rdi); |
888 | } |
889 | if (global_unicode()) CheckNotInSurrogatePair(0, &advance); |
890 | } |
891 | |
892 | __ jmp(&load_char_start_regexp); |
893 | } else { |
894 | __ movq(rax, Immediate(SUCCESS)); |
895 | } |
896 | } |
897 | |
898 | __ bind(&exit_label_); |
899 | if (global()) { |
900 | // Return the number of successful captures. |
901 | __ movq(rax, Operand(rbp, kSuccessfulCaptures)); |
902 | } |
903 | |
904 | __ bind(&return_rax); |
905 | #ifdef _WIN64 |
906 | // Restore callee save registers. |
907 | __ leaq(rsp, Operand(rbp, kLastCalleeSaveRegister)); |
908 | __ popq(rbx); |
909 | __ popq(rdi); |
910 | __ popq(rsi); |
911 | // Stack now at rbp. |
912 | #else |
913 | // Restore callee save register. |
914 | __ movq(rbx, Operand(rbp, kBackup_rbx)); |
915 | // Skip rsp to rbp. |
916 | __ movq(rsp, rbp); |
917 | #endif |
918 | // Exit function frame, restore previous one. |
919 | __ popq(rbp); |
920 | __ ret(0); |
921 | |
922 | // Backtrack code (branch target for conditional backtracks). |
923 | if (backtrack_label_.is_linked()) { |
924 | __ bind(&backtrack_label_); |
925 | Backtrack(); |
926 | } |
927 | |
928 | Label exit_with_exception; |
929 | |
930 | // Preempt-code |
931 | if (check_preempt_label_.is_linked()) { |
932 | SafeCallTarget(&check_preempt_label_); |
933 | |
934 | __ pushq(backtrack_stackpointer()); |
935 | __ pushq(rdi); |
936 | |
937 | CallCheckStackGuardState(); |
938 | __ testq(rax, rax); |
939 | // If returning non-zero, we should end execution with the given |
940 | // result as return value. |
941 | __ j(not_zero, &return_rax); |
942 | |
943 | // Restore registers. |
944 | __ Move(code_object_pointer(), masm_.CodeObject()); |
945 | __ popq(rdi); |
946 | __ popq(backtrack_stackpointer()); |
947 | // String might have moved: Reload esi from frame. |
948 | __ movq(rsi, Operand(rbp, kInputEnd)); |
949 | SafeReturn(); |
950 | } |
951 | |
952 | // Backtrack stack overflow code. |
953 | if (stack_overflow_label_.is_linked()) { |
954 | SafeCallTarget(&stack_overflow_label_); |
955 | // Reached if the backtrack-stack limit has been hit. |
956 | |
957 | // Save registers before calling C function |
958 | #ifndef _WIN64 |
959 | // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI. |
960 | __ pushq(rsi); |
961 | __ pushq(rdi); |
962 | #endif |
963 | |
964 | // Call GrowStack(backtrack_stackpointer()) |
965 | static const int num_arguments = 3; |
966 | __ PrepareCallCFunction(num_arguments); |
967 | #ifdef _WIN64 |
968 | // Microsoft passes parameters in rcx, rdx, r8. |
969 | // First argument, backtrack stackpointer, is already in rcx. |
970 | __ leaq(rdx, Operand(rbp, kStackHighEnd)); // Second argument |
971 | __ LoadAddress(r8, ExternalReference::isolate_address(isolate())); |
972 | #else |
973 | // AMD64 ABI passes parameters in rdi, rsi, rdx. |
974 | __ movq(rdi, backtrack_stackpointer()); // First argument. |
975 | __ leaq(rsi, Operand(rbp, kStackHighEnd)); // Second argument. |
976 | __ LoadAddress(rdx, ExternalReference::isolate_address(isolate())); |
977 | #endif |
978 | ExternalReference grow_stack = |
979 | ExternalReference::re_grow_stack(isolate()); |
980 | __ CallCFunction(grow_stack, num_arguments); |
981 | // If return nullptr, we have failed to grow the stack, and |
982 | // must exit with a stack-overflow exception. |
983 | __ testq(rax, rax); |
984 | __ j(equal, &exit_with_exception); |
985 | // Otherwise use return value as new stack pointer. |
986 | __ movq(backtrack_stackpointer(), rax); |
987 | // Restore saved registers and continue. |
988 | __ Move(code_object_pointer(), masm_.CodeObject()); |
989 | #ifndef _WIN64 |
990 | __ popq(rdi); |
991 | __ popq(rsi); |
992 | #endif |
993 | SafeReturn(); |
994 | } |
995 | |
996 | if (exit_with_exception.is_linked()) { |
997 | // If any of the code above needed to exit with an exception. |
998 | __ bind(&exit_with_exception); |
999 | // Exit with Result EXCEPTION(-1) to signal thrown exception. |
1000 | __ Set(rax, EXCEPTION); |
1001 | __ jmp(&return_rax); |
1002 | } |
1003 | |
1004 | FixupCodeRelativePositions(); |
1005 | |
1006 | CodeDesc code_desc; |
1007 | Isolate* isolate = this->isolate(); |
1008 | masm_.GetCode(isolate, &code_desc); |
1009 | Handle<Code> code = |
1010 | isolate->factory()->NewCode(code_desc, Code::REGEXP, masm_.CodeObject()); |
1011 | PROFILE(isolate, RegExpCodeCreateEvent(AbstractCode::cast(*code), *source)); |
1012 | return Handle<HeapObject>::cast(code); |
1013 | } |
1014 | |
1015 | |
1016 | void RegExpMacroAssemblerX64::GoTo(Label* to) { |
1017 | BranchOrBacktrack(no_condition, to); |
1018 | } |
1019 | |
1020 | |
1021 | void RegExpMacroAssemblerX64::IfRegisterGE(int reg, |
1022 | int comparand, |
1023 | Label* if_ge) { |
1024 | __ cmpq(register_location(reg), Immediate(comparand)); |
1025 | BranchOrBacktrack(greater_equal, if_ge); |
1026 | } |
1027 | |
1028 | |
1029 | void RegExpMacroAssemblerX64::IfRegisterLT(int reg, |
1030 | int comparand, |
1031 | Label* if_lt) { |
1032 | __ cmpq(register_location(reg), Immediate(comparand)); |
1033 | BranchOrBacktrack(less, if_lt); |
1034 | } |
1035 | |
1036 | |
1037 | void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg, |
1038 | Label* if_eq) { |
1039 | __ cmpq(rdi, register_location(reg)); |
1040 | BranchOrBacktrack(equal, if_eq); |
1041 | } |
1042 | |
1043 | |
1044 | RegExpMacroAssembler::IrregexpImplementation |
1045 | RegExpMacroAssemblerX64::Implementation() { |
1046 | return kX64Implementation; |
1047 | } |
1048 | |
1049 | |
1050 | void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset, |
1051 | Label* on_end_of_input, |
1052 | bool check_bounds, |
1053 | int characters) { |
1054 | DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) |
1055 | if (check_bounds) { |
1056 | if (cp_offset >= 0) { |
1057 | CheckPosition(cp_offset + characters - 1, on_end_of_input); |
1058 | } else { |
1059 | CheckPosition(cp_offset, on_end_of_input); |
1060 | } |
1061 | } |
1062 | LoadCurrentCharacterUnchecked(cp_offset, characters); |
1063 | } |
1064 | |
1065 | |
1066 | void RegExpMacroAssemblerX64::PopCurrentPosition() { |
1067 | Pop(rdi); |
1068 | } |
1069 | |
1070 | |
1071 | void RegExpMacroAssemblerX64::PopRegister(int register_index) { |
1072 | Pop(rax); |
1073 | __ movq(register_location(register_index), rax); |
1074 | } |
1075 | |
1076 | |
1077 | void RegExpMacroAssemblerX64::PushBacktrack(Label* label) { |
1078 | Push(label); |
1079 | CheckStackLimit(); |
1080 | } |
1081 | |
1082 | |
1083 | void RegExpMacroAssemblerX64::PushCurrentPosition() { |
1084 | Push(rdi); |
1085 | } |
1086 | |
1087 | |
1088 | void RegExpMacroAssemblerX64::PushRegister(int register_index, |
1089 | StackCheckFlag check_stack_limit) { |
1090 | __ movq(rax, register_location(register_index)); |
1091 | Push(rax); |
1092 | if (check_stack_limit) CheckStackLimit(); |
1093 | } |
1094 | |
1095 | void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) { |
1096 | __ movq(rdi, register_location(reg)); |
1097 | } |
1098 | |
1099 | |
1100 | void RegExpMacroAssemblerX64::ReadPositionFromRegister(Register dst, int reg) { |
1101 | __ movq(dst, register_location(reg)); |
1102 | } |
1103 | |
1104 | |
1105 | void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) { |
1106 | __ movq(backtrack_stackpointer(), register_location(reg)); |
1107 | __ addq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd)); |
1108 | } |
1109 | |
1110 | |
1111 | void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) { |
1112 | Label after_position; |
1113 | __ cmpq(rdi, Immediate(-by * char_size())); |
1114 | __ j(greater_equal, &after_position, Label::kNear); |
1115 | __ movq(rdi, Immediate(-by * char_size())); |
1116 | // On RegExp code entry (where this operation is used), the character before |
1117 | // the current position is expected to be already loaded. |
1118 | // We have advanced the position, so it's safe to read backwards. |
1119 | LoadCurrentCharacterUnchecked(-1, 1); |
1120 | __ bind(&after_position); |
1121 | } |
1122 | |
1123 | |
1124 | void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) { |
1125 | DCHECK(register_index >= num_saved_registers_); // Reserved for positions! |
1126 | __ movq(register_location(register_index), Immediate(to)); |
1127 | } |
1128 | |
1129 | |
1130 | bool RegExpMacroAssemblerX64::Succeed() { |
1131 | __ jmp(&success_label_); |
1132 | return global(); |
1133 | } |
1134 | |
1135 | |
1136 | void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg, |
1137 | int cp_offset) { |
1138 | if (cp_offset == 0) { |
1139 | __ movq(register_location(reg), rdi); |
1140 | } else { |
1141 | __ leaq(rax, Operand(rdi, cp_offset * char_size())); |
1142 | __ movq(register_location(reg), rax); |
1143 | } |
1144 | } |
1145 | |
1146 | |
1147 | void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) { |
1148 | DCHECK(reg_from <= reg_to); |
1149 | __ movq(rax, Operand(rbp, kStringStartMinusOne)); |
1150 | for (int reg = reg_from; reg <= reg_to; reg++) { |
1151 | __ movq(register_location(reg), rax); |
1152 | } |
1153 | } |
1154 | |
1155 | |
1156 | void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) { |
1157 | __ movq(rax, backtrack_stackpointer()); |
1158 | __ subq(rax, Operand(rbp, kStackHighEnd)); |
1159 | __ movq(register_location(reg), rax); |
1160 | } |
1161 | |
1162 | |
1163 | // Private methods: |
1164 | |
1165 | void RegExpMacroAssemblerX64::CallCheckStackGuardState() { |
1166 | // This function call preserves no register values. Caller should |
1167 | // store anything volatile in a C call or overwritten by this function. |
1168 | static const int num_arguments = 3; |
1169 | __ PrepareCallCFunction(num_arguments); |
1170 | #ifdef _WIN64 |
1171 | // Second argument: Code of self. (Do this before overwriting r8). |
1172 | __ movq(rdx, code_object_pointer()); |
1173 | // Third argument: RegExp code frame pointer. |
1174 | __ movq(r8, rbp); |
1175 | // First argument: Next address on the stack (will be address of |
1176 | // return address). |
1177 | __ leaq(rcx, Operand(rsp, -kSystemPointerSize)); |
1178 | #else |
1179 | // Third argument: RegExp code frame pointer. |
1180 | __ movq(rdx, rbp); |
1181 | // Second argument: Code of self. |
1182 | __ movq(rsi, code_object_pointer()); |
1183 | // First argument: Next address on the stack (will be address of |
1184 | // return address). |
1185 | __ leaq(rdi, Operand(rsp, -kSystemPointerSize)); |
1186 | #endif |
1187 | ExternalReference stack_check = |
1188 | ExternalReference::re_check_stack_guard_state(isolate()); |
1189 | __ CallCFunction(stack_check, num_arguments); |
1190 | } |
1191 | |
1192 | |
1193 | // Helper function for reading a value out of a stack frame. |
1194 | template <typename T> |
1195 | static T& frame_entry(Address re_frame, int frame_offset) { |
1196 | return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset)); |
1197 | } |
1198 | |
1199 | |
1200 | template <typename T> |
1201 | static T* frame_entry_address(Address re_frame, int frame_offset) { |
1202 | return reinterpret_cast<T*>(re_frame + frame_offset); |
1203 | } |
1204 | |
1205 | int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address, |
1206 | Address raw_code, |
1207 | Address re_frame) { |
1208 | Code re_code = Code::cast(Object(raw_code)); |
1209 | return NativeRegExpMacroAssembler::CheckStackGuardState( |
1210 | frame_entry<Isolate*>(re_frame, kIsolate), |
1211 | frame_entry<int>(re_frame, kStartIndex), |
1212 | frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, |
1213 | frame_entry_address<Address>(re_frame, kInputString), |
1214 | frame_entry_address<const byte*>(re_frame, kInputStart), |
1215 | frame_entry_address<const byte*>(re_frame, kInputEnd)); |
1216 | } |
1217 | |
1218 | |
1219 | Operand RegExpMacroAssemblerX64::register_location(int register_index) { |
1220 | DCHECK(register_index < (1<<30)); |
1221 | if (num_registers_ <= register_index) { |
1222 | num_registers_ = register_index + 1; |
1223 | } |
1224 | return Operand(rbp, kRegisterZero - register_index * kSystemPointerSize); |
1225 | } |
1226 | |
1227 | |
1228 | void RegExpMacroAssemblerX64::CheckPosition(int cp_offset, |
1229 | Label* on_outside_input) { |
1230 | if (cp_offset >= 0) { |
1231 | __ cmpl(rdi, Immediate(-cp_offset * char_size())); |
1232 | BranchOrBacktrack(greater_equal, on_outside_input); |
1233 | } else { |
1234 | __ leaq(rax, Operand(rdi, cp_offset * char_size())); |
1235 | __ cmpq(rax, Operand(rbp, kStringStartMinusOne)); |
1236 | BranchOrBacktrack(less_equal, on_outside_input); |
1237 | } |
1238 | } |
1239 | |
1240 | |
1241 | void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition, |
1242 | Label* to) { |
1243 | if (condition < 0) { // No condition |
1244 | if (to == nullptr) { |
1245 | Backtrack(); |
1246 | return; |
1247 | } |
1248 | __ jmp(to); |
1249 | return; |
1250 | } |
1251 | if (to == nullptr) { |
1252 | __ j(condition, &backtrack_label_); |
1253 | return; |
1254 | } |
1255 | __ j(condition, to); |
1256 | } |
1257 | |
1258 | |
1259 | void RegExpMacroAssemblerX64::SafeCall(Label* to) { |
1260 | __ call(to); |
1261 | } |
1262 | |
1263 | |
1264 | void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) { |
1265 | __ bind(label); |
1266 | __ subq(Operand(rsp, 0), code_object_pointer()); |
1267 | } |
1268 | |
1269 | |
1270 | void RegExpMacroAssemblerX64::SafeReturn() { |
1271 | __ addq(Operand(rsp, 0), code_object_pointer()); |
1272 | __ ret(0); |
1273 | } |
1274 | |
1275 | |
1276 | void RegExpMacroAssemblerX64::Push(Register source) { |
1277 | DCHECK(source != backtrack_stackpointer()); |
1278 | // Notice: This updates flags, unlike normal Push. |
1279 | __ subq(backtrack_stackpointer(), Immediate(kIntSize)); |
1280 | __ movl(Operand(backtrack_stackpointer(), 0), source); |
1281 | } |
1282 | |
1283 | |
1284 | void RegExpMacroAssemblerX64::Push(Immediate value) { |
1285 | // Notice: This updates flags, unlike normal Push. |
1286 | __ subq(backtrack_stackpointer(), Immediate(kIntSize)); |
1287 | __ movl(Operand(backtrack_stackpointer(), 0), value); |
1288 | } |
1289 | |
1290 | |
1291 | void RegExpMacroAssemblerX64::FixupCodeRelativePositions() { |
1292 | for (int position : code_relative_fixup_positions_) { |
1293 | // The position succeeds a relative label offset from position. |
1294 | // Patch the relative offset to be relative to the Code object pointer |
1295 | // instead. |
1296 | int patch_position = position - kIntSize; |
1297 | int offset = masm_.long_at(patch_position); |
1298 | masm_.long_at_put(patch_position, |
1299 | offset |
1300 | + position |
1301 | + Code::kHeaderSize |
1302 | - kHeapObjectTag); |
1303 | } |
1304 | code_relative_fixup_positions_.Rewind(0); |
1305 | } |
1306 | |
1307 | |
1308 | void RegExpMacroAssemblerX64::Push(Label* backtrack_target) { |
1309 | __ subq(backtrack_stackpointer(), Immediate(kIntSize)); |
1310 | __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target); |
1311 | MarkPositionForCodeRelativeFixup(); |
1312 | } |
1313 | |
1314 | |
1315 | void RegExpMacroAssemblerX64::Pop(Register target) { |
1316 | DCHECK(target != backtrack_stackpointer()); |
1317 | __ movsxlq(target, Operand(backtrack_stackpointer(), 0)); |
1318 | // Notice: This updates flags, unlike normal Pop. |
1319 | __ addq(backtrack_stackpointer(), Immediate(kIntSize)); |
1320 | } |
1321 | |
1322 | |
1323 | void RegExpMacroAssemblerX64::Drop() { |
1324 | __ addq(backtrack_stackpointer(), Immediate(kIntSize)); |
1325 | } |
1326 | |
1327 | |
1328 | void RegExpMacroAssemblerX64::CheckPreemption() { |
1329 | // Check for preemption. |
1330 | Label no_preempt; |
1331 | ExternalReference stack_limit = |
1332 | ExternalReference::address_of_stack_limit(isolate()); |
1333 | __ load_rax(stack_limit); |
1334 | __ cmpq(rsp, rax); |
1335 | __ j(above, &no_preempt); |
1336 | |
1337 | SafeCall(&check_preempt_label_); |
1338 | |
1339 | __ bind(&no_preempt); |
1340 | } |
1341 | |
1342 | |
1343 | void RegExpMacroAssemblerX64::CheckStackLimit() { |
1344 | Label no_stack_overflow; |
1345 | ExternalReference stack_limit = |
1346 | ExternalReference::address_of_regexp_stack_limit(isolate()); |
1347 | __ load_rax(stack_limit); |
1348 | __ cmpq(backtrack_stackpointer(), rax); |
1349 | __ j(above, &no_stack_overflow); |
1350 | |
1351 | SafeCall(&stack_overflow_label_); |
1352 | |
1353 | __ bind(&no_stack_overflow); |
1354 | } |
1355 | |
1356 | |
1357 | void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset, |
1358 | int characters) { |
1359 | if (mode_ == LATIN1) { |
1360 | if (characters == 4) { |
1361 | __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
1362 | } else if (characters == 2) { |
1363 | __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
1364 | } else { |
1365 | DCHECK_EQ(1, characters); |
1366 | __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset)); |
1367 | } |
1368 | } else { |
1369 | DCHECK(mode_ == UC16); |
1370 | if (characters == 2) { |
1371 | __ movl(current_character(), |
1372 | Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
1373 | } else { |
1374 | DCHECK_EQ(1, characters); |
1375 | __ movzxwl(current_character(), |
1376 | Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
1377 | } |
1378 | } |
1379 | } |
1380 | |
1381 | #undef __ |
1382 | |
1383 | } // namespace internal |
1384 | } // namespace v8 |
1385 | |
1386 | #endif // V8_TARGET_ARCH_X64 |
1387 | |