1 | // Copyright 2013 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #include "src/compiler/backend/code-generator.h" |
6 | |
7 | #include <limits> |
8 | |
9 | #include "src/base/overflowing-math.h" |
10 | #include "src/compiler/backend/code-generator-impl.h" |
11 | #include "src/compiler/backend/gap-resolver.h" |
12 | #include "src/compiler/node-matchers.h" |
13 | #include "src/compiler/osr.h" |
14 | #include "src/heap/heap-inl.h" // crbug.com/v8/8499 |
15 | #include "src/macro-assembler.h" |
16 | #include "src/objects/smi.h" |
17 | #include "src/optimized-compilation-info.h" |
18 | #include "src/wasm/wasm-code-manager.h" |
19 | #include "src/wasm/wasm-objects.h" |
20 | #include "src/x64/assembler-x64.h" |
21 | |
22 | namespace v8 { |
23 | namespace internal { |
24 | namespace compiler { |
25 | |
26 | #define __ tasm()-> |
27 | |
28 | // Adds X64 specific methods for decoding operands. |
29 | class X64OperandConverter : public InstructionOperandConverter { |
30 | public: |
31 | X64OperandConverter(CodeGenerator* gen, Instruction* instr) |
32 | : InstructionOperandConverter(gen, instr) {} |
33 | |
34 | Immediate InputImmediate(size_t index) { |
35 | return ToImmediate(instr_->InputAt(index)); |
36 | } |
37 | |
38 | Operand InputOperand(size_t index, int = 0) { |
39 | return ToOperand(instr_->InputAt(index), extra); |
40 | } |
41 | |
42 | Operand OutputOperand() { return ToOperand(instr_->Output()); } |
43 | |
44 | Immediate ToImmediate(InstructionOperand* operand) { |
45 | Constant constant = ToConstant(operand); |
46 | if (constant.type() == Constant::kFloat64) { |
47 | DCHECK_EQ(0, constant.ToFloat64().AsUint64()); |
48 | return Immediate(0); |
49 | } |
50 | if (RelocInfo::IsWasmReference(constant.rmode())) { |
51 | return Immediate(constant.ToInt32(), constant.rmode()); |
52 | } |
53 | return Immediate(constant.ToInt32()); |
54 | } |
55 | |
56 | Operand ToOperand(InstructionOperand* op, int = 0) { |
57 | DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); |
58 | return SlotToOperand(AllocatedOperand::cast(op)->index(), extra); |
59 | } |
60 | |
61 | Operand SlotToOperand(int slot_index, int = 0) { |
62 | FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index); |
63 | return Operand(offset.from_stack_pointer() ? rsp : rbp, |
64 | offset.offset() + extra); |
65 | } |
66 | |
67 | static size_t NextOffset(size_t* offset) { |
68 | size_t i = *offset; |
69 | (*offset)++; |
70 | return i; |
71 | } |
72 | |
73 | static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) { |
74 | STATIC_ASSERT(0 == static_cast<int>(times_1)); |
75 | STATIC_ASSERT(1 == static_cast<int>(times_2)); |
76 | STATIC_ASSERT(2 == static_cast<int>(times_4)); |
77 | STATIC_ASSERT(3 == static_cast<int>(times_8)); |
78 | int scale = static_cast<int>(mode - one); |
79 | DCHECK(scale >= 0 && scale < 4); |
80 | return static_cast<ScaleFactor>(scale); |
81 | } |
82 | |
83 | Operand MemoryOperand(size_t* offset) { |
84 | AddressingMode mode = AddressingModeField::decode(instr_->opcode()); |
85 | switch (mode) { |
86 | case kMode_MR: { |
87 | Register base = InputRegister(NextOffset(offset)); |
88 | int32_t disp = 0; |
89 | return Operand(base, disp); |
90 | } |
91 | case kMode_MRI: { |
92 | Register base = InputRegister(NextOffset(offset)); |
93 | int32_t disp = InputInt32(NextOffset(offset)); |
94 | return Operand(base, disp); |
95 | } |
96 | case kMode_MR1: |
97 | case kMode_MR2: |
98 | case kMode_MR4: |
99 | case kMode_MR8: { |
100 | Register base = InputRegister(NextOffset(offset)); |
101 | Register index = InputRegister(NextOffset(offset)); |
102 | ScaleFactor scale = ScaleFor(kMode_MR1, mode); |
103 | int32_t disp = 0; |
104 | return Operand(base, index, scale, disp); |
105 | } |
106 | case kMode_MR1I: |
107 | case kMode_MR2I: |
108 | case kMode_MR4I: |
109 | case kMode_MR8I: { |
110 | Register base = InputRegister(NextOffset(offset)); |
111 | Register index = InputRegister(NextOffset(offset)); |
112 | ScaleFactor scale = ScaleFor(kMode_MR1I, mode); |
113 | int32_t disp = InputInt32(NextOffset(offset)); |
114 | return Operand(base, index, scale, disp); |
115 | } |
116 | case kMode_M1: { |
117 | Register base = InputRegister(NextOffset(offset)); |
118 | int32_t disp = 0; |
119 | return Operand(base, disp); |
120 | } |
121 | case kMode_M2: |
122 | UNREACHABLE(); // Should use kModeMR with more compact encoding instead |
123 | return Operand(no_reg, 0); |
124 | case kMode_M4: |
125 | case kMode_M8: { |
126 | Register index = InputRegister(NextOffset(offset)); |
127 | ScaleFactor scale = ScaleFor(kMode_M1, mode); |
128 | int32_t disp = 0; |
129 | return Operand(index, scale, disp); |
130 | } |
131 | case kMode_M1I: |
132 | case kMode_M2I: |
133 | case kMode_M4I: |
134 | case kMode_M8I: { |
135 | Register index = InputRegister(NextOffset(offset)); |
136 | ScaleFactor scale = ScaleFor(kMode_M1I, mode); |
137 | int32_t disp = InputInt32(NextOffset(offset)); |
138 | return Operand(index, scale, disp); |
139 | } |
140 | case kMode_Root: { |
141 | Register base = kRootRegister; |
142 | int32_t disp = InputInt32(NextOffset(offset)); |
143 | return Operand(base, disp); |
144 | } |
145 | case kMode_None: |
146 | UNREACHABLE(); |
147 | } |
148 | UNREACHABLE(); |
149 | } |
150 | |
151 | Operand MemoryOperand(size_t first_input = 0) { |
152 | return MemoryOperand(&first_input); |
153 | } |
154 | }; |
155 | |
156 | namespace { |
157 | |
158 | bool HasImmediateInput(Instruction* instr, size_t index) { |
159 | return instr->InputAt(index)->IsImmediate(); |
160 | } |
161 | |
162 | class OutOfLineLoadFloat32NaN final : public OutOfLineCode { |
163 | public: |
164 | OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) |
165 | : OutOfLineCode(gen), result_(result) {} |
166 | |
167 | void Generate() final { |
168 | __ Xorps(result_, result_); |
169 | __ Divss(result_, result_); |
170 | } |
171 | |
172 | private: |
173 | XMMRegister const result_; |
174 | }; |
175 | |
176 | class OutOfLineLoadFloat64NaN final : public OutOfLineCode { |
177 | public: |
178 | OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result) |
179 | : OutOfLineCode(gen), result_(result) {} |
180 | |
181 | void Generate() final { |
182 | __ Xorpd(result_, result_); |
183 | __ Divsd(result_, result_); |
184 | } |
185 | |
186 | private: |
187 | XMMRegister const result_; |
188 | }; |
189 | |
190 | class OutOfLineTruncateDoubleToI final : public OutOfLineCode { |
191 | public: |
192 | OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, |
193 | XMMRegister input, StubCallMode stub_mode, |
194 | UnwindingInfoWriter* unwinding_info_writer) |
195 | : OutOfLineCode(gen), |
196 | result_(result), |
197 | input_(input), |
198 | stub_mode_(stub_mode), |
199 | unwinding_info_writer_(unwinding_info_writer), |
200 | isolate_(gen->isolate()), |
201 | zone_(gen->zone()) {} |
202 | |
203 | void Generate() final { |
204 | __ subq(rsp, Immediate(kDoubleSize)); |
205 | unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
206 | kDoubleSize); |
207 | __ Movsd(MemOperand(rsp, 0), input_); |
208 | if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
209 | // A direct call to a wasm runtime stub defined in this module. |
210 | // Just encode the stub index. This will be patched when the code |
211 | // is added to the native module and copied into wasm code space. |
212 | __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); |
213 | } else { |
214 | __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); |
215 | } |
216 | __ movl(result_, MemOperand(rsp, 0)); |
217 | __ addq(rsp, Immediate(kDoubleSize)); |
218 | unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
219 | -kDoubleSize); |
220 | } |
221 | |
222 | private: |
223 | Register const result_; |
224 | XMMRegister const input_; |
225 | StubCallMode stub_mode_; |
226 | UnwindingInfoWriter* const unwinding_info_writer_; |
227 | Isolate* isolate_; |
228 | Zone* zone_; |
229 | }; |
230 | |
231 | class OutOfLineRecordWrite final : public OutOfLineCode { |
232 | public: |
233 | OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand, |
234 | Register value, Register scratch0, Register scratch1, |
235 | RecordWriteMode mode, StubCallMode stub_mode) |
236 | : OutOfLineCode(gen), |
237 | object_(object), |
238 | operand_(operand), |
239 | value_(value), |
240 | scratch0_(scratch0), |
241 | scratch1_(scratch1), |
242 | mode_(mode), |
243 | stub_mode_(stub_mode), |
244 | zone_(gen->zone()) {} |
245 | |
246 | void Generate() final { |
247 | if (mode_ > RecordWriteMode::kValueIsPointer) { |
248 | __ JumpIfSmi(value_, exit()); |
249 | } |
250 | __ CheckPageFlag(value_, scratch0_, |
251 | MemoryChunk::kPointersToHereAreInterestingMask, zero, |
252 | exit()); |
253 | __ leaq(scratch1_, operand_); |
254 | |
255 | RememberedSetAction const remembered_set_action = |
256 | mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET |
257 | : OMIT_REMEMBERED_SET; |
258 | SaveFPRegsMode const save_fp_mode = |
259 | frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs; |
260 | |
261 | if (mode_ == RecordWriteMode::kValueIsEphemeronKey) { |
262 | __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode); |
263 | } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { |
264 | // A direct call to a wasm runtime stub defined in this module. |
265 | // Just encode the stub index. This will be patched when the code |
266 | // is added to the native module and copied into wasm code space. |
267 | __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, |
268 | save_fp_mode, wasm::WasmCode::kWasmRecordWrite); |
269 | } else { |
270 | __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, |
271 | save_fp_mode); |
272 | } |
273 | } |
274 | |
275 | private: |
276 | Register const object_; |
277 | Operand const operand_; |
278 | Register const value_; |
279 | Register const scratch0_; |
280 | Register const scratch1_; |
281 | RecordWriteMode const mode_; |
282 | StubCallMode const stub_mode_; |
283 | Zone* zone_; |
284 | }; |
285 | |
286 | class WasmOutOfLineTrap : public OutOfLineCode { |
287 | public: |
288 | WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr) |
289 | : OutOfLineCode(gen), gen_(gen), instr_(instr) {} |
290 | |
291 | void Generate() override { |
292 | X64OperandConverter i(gen_, instr_); |
293 | TrapId trap_id = |
294 | static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1)); |
295 | GenerateWithTrapId(trap_id); |
296 | } |
297 | |
298 | protected: |
299 | CodeGenerator* gen_; |
300 | |
301 | void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); } |
302 | |
303 | private: |
304 | void GenerateCallToTrap(TrapId trap_id) { |
305 | if (!gen_->wasm_runtime_exception_support()) { |
306 | // We cannot test calls to the runtime in cctest/test-run-wasm. |
307 | // Therefore we emit a call to C here instead of a call to the runtime. |
308 | __ PrepareCallCFunction(0); |
309 | __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), |
310 | 0); |
311 | __ LeaveFrame(StackFrame::WASM_COMPILED); |
312 | auto call_descriptor = gen_->linkage()->GetIncomingDescriptor(); |
313 | size_t pop_size = |
314 | call_descriptor->StackParameterCount() * kSystemPointerSize; |
315 | // Use rcx as a scratch register, we return anyways immediately. |
316 | __ Ret(static_cast<int>(pop_size), rcx); |
317 | } else { |
318 | gen_->AssembleSourcePosition(instr_); |
319 | // A direct call to a wasm runtime stub defined in this module. |
320 | // Just encode the stub index. This will be patched when the code |
321 | // is added to the native module and copied into wasm code space. |
322 | __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL); |
323 | ReferenceMap* reference_map = |
324 | new (gen_->zone()) ReferenceMap(gen_->zone()); |
325 | gen_->RecordSafepoint(reference_map, Safepoint::kSimple, |
326 | Safepoint::kNoLazyDeopt); |
327 | __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap); |
328 | } |
329 | } |
330 | |
331 | Instruction* instr_; |
332 | }; |
333 | |
334 | class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap { |
335 | public: |
336 | WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr) |
337 | : WasmOutOfLineTrap(gen, instr), pc_(pc) {} |
338 | |
339 | void Generate() final { |
340 | gen_->AddProtectedInstructionLanding(pc_, __ pc_offset()); |
341 | GenerateWithTrapId(TrapId::kTrapMemOutOfBounds); |
342 | } |
343 | |
344 | private: |
345 | int pc_; |
346 | }; |
347 | |
348 | void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen, |
349 | InstructionCode opcode, Instruction* instr, |
350 | X64OperandConverter& i, int pc) { |
351 | const MemoryAccessMode access_mode = |
352 | static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
353 | if (access_mode == kMemoryAccessProtected) { |
354 | new (zone) WasmProtectedInstructionTrap(codegen, pc, instr); |
355 | } |
356 | } |
357 | |
358 | void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, |
359 | InstructionCode opcode, Instruction* instr, |
360 | X64OperandConverter& i) { |
361 | const MemoryAccessMode access_mode = |
362 | static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
363 | if (access_mode == kMemoryAccessPoisoned) { |
364 | Register value = i.OutputRegister(); |
365 | codegen->tasm()->andq(value, kSpeculationPoisonRegister); |
366 | } |
367 | } |
368 | |
369 | } // namespace |
370 | |
371 | #define ASSEMBLE_UNOP(asm_instr) \ |
372 | do { \ |
373 | if (instr->Output()->IsRegister()) { \ |
374 | __ asm_instr(i.OutputRegister()); \ |
375 | } else { \ |
376 | __ asm_instr(i.OutputOperand()); \ |
377 | } \ |
378 | } while (false) |
379 | |
380 | #define ASSEMBLE_BINOP(asm_instr) \ |
381 | do { \ |
382 | if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ |
383 | size_t index = 1; \ |
384 | Operand right = i.MemoryOperand(&index); \ |
385 | __ asm_instr(i.InputRegister(0), right); \ |
386 | } else { \ |
387 | if (HasImmediateInput(instr, 1)) { \ |
388 | if (instr->InputAt(0)->IsRegister()) { \ |
389 | __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ |
390 | } else { \ |
391 | __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ |
392 | } \ |
393 | } else { \ |
394 | if (instr->InputAt(1)->IsRegister()) { \ |
395 | __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ |
396 | } else { \ |
397 | __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ |
398 | } \ |
399 | } \ |
400 | } \ |
401 | } while (false) |
402 | |
403 | #define ASSEMBLE_COMPARE(asm_instr) \ |
404 | do { \ |
405 | if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ |
406 | size_t index = 0; \ |
407 | Operand left = i.MemoryOperand(&index); \ |
408 | if (HasImmediateInput(instr, index)) { \ |
409 | __ asm_instr(left, i.InputImmediate(index)); \ |
410 | } else { \ |
411 | __ asm_instr(left, i.InputRegister(index)); \ |
412 | } \ |
413 | } else { \ |
414 | if (HasImmediateInput(instr, 1)) { \ |
415 | if (instr->InputAt(0)->IsRegister()) { \ |
416 | __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ |
417 | } else { \ |
418 | __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ |
419 | } \ |
420 | } else { \ |
421 | if (instr->InputAt(1)->IsRegister()) { \ |
422 | __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ |
423 | } else { \ |
424 | __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ |
425 | } \ |
426 | } \ |
427 | } \ |
428 | } while (false) |
429 | |
430 | #define ASSEMBLE_MULT(asm_instr) \ |
431 | do { \ |
432 | if (HasImmediateInput(instr, 1)) { \ |
433 | if (instr->InputAt(0)->IsRegister()) { \ |
434 | __ asm_instr(i.OutputRegister(), i.InputRegister(0), \ |
435 | i.InputImmediate(1)); \ |
436 | } else { \ |
437 | __ asm_instr(i.OutputRegister(), i.InputOperand(0), \ |
438 | i.InputImmediate(1)); \ |
439 | } \ |
440 | } else { \ |
441 | if (instr->InputAt(1)->IsRegister()) { \ |
442 | __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \ |
443 | } else { \ |
444 | __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \ |
445 | } \ |
446 | } \ |
447 | } while (false) |
448 | |
449 | #define ASSEMBLE_SHIFT(asm_instr, width) \ |
450 | do { \ |
451 | if (HasImmediateInput(instr, 1)) { \ |
452 | if (instr->Output()->IsRegister()) { \ |
453 | __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \ |
454 | } else { \ |
455 | __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \ |
456 | } \ |
457 | } else { \ |
458 | if (instr->Output()->IsRegister()) { \ |
459 | __ asm_instr##_cl(i.OutputRegister()); \ |
460 | } else { \ |
461 | __ asm_instr##_cl(i.OutputOperand()); \ |
462 | } \ |
463 | } \ |
464 | } while (false) |
465 | |
466 | #define ASSEMBLE_MOVX(asm_instr) \ |
467 | do { \ |
468 | if (instr->addressing_mode() != kMode_None) { \ |
469 | __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \ |
470 | } else if (instr->InputAt(0)->IsRegister()) { \ |
471 | __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \ |
472 | } else { \ |
473 | __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \ |
474 | } \ |
475 | } while (false) |
476 | |
477 | #define ASSEMBLE_SSE_BINOP(asm_instr) \ |
478 | do { \ |
479 | if (instr->InputAt(1)->IsFPRegister()) { \ |
480 | __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \ |
481 | } else { \ |
482 | __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \ |
483 | } \ |
484 | } while (false) |
485 | |
486 | #define ASSEMBLE_SSE_UNOP(asm_instr) \ |
487 | do { \ |
488 | if (instr->InputAt(0)->IsFPRegister()) { \ |
489 | __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \ |
490 | } else { \ |
491 | __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \ |
492 | } \ |
493 | } while (false) |
494 | |
495 | #define ASSEMBLE_AVX_BINOP(asm_instr) \ |
496 | do { \ |
497 | CpuFeatureScope avx_scope(tasm(), AVX); \ |
498 | if (instr->InputAt(1)->IsFPRegister()) { \ |
499 | __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \ |
500 | i.InputDoubleRegister(1)); \ |
501 | } else { \ |
502 | __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \ |
503 | i.InputOperand(1)); \ |
504 | } \ |
505 | } while (false) |
506 | |
507 | #define ASSEMBLE_IEEE754_BINOP(name) \ |
508 | do { \ |
509 | __ PrepareCallCFunction(2); \ |
510 | __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \ |
511 | } while (false) |
512 | |
513 | #define ASSEMBLE_IEEE754_UNOP(name) \ |
514 | do { \ |
515 | __ PrepareCallCFunction(1); \ |
516 | __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \ |
517 | } while (false) |
518 | |
519 | #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ |
520 | do { \ |
521 | Label binop; \ |
522 | __ bind(&binop); \ |
523 | __ mov_inst(rax, i.MemoryOperand(1)); \ |
524 | __ movl(i.TempRegister(0), rax); \ |
525 | __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ |
526 | __ lock(); \ |
527 | __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ |
528 | __ j(not_equal, &binop); \ |
529 | } while (false) |
530 | |
531 | #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ |
532 | do { \ |
533 | Label binop; \ |
534 | __ bind(&binop); \ |
535 | __ mov_inst(rax, i.MemoryOperand(1)); \ |
536 | __ movq(i.TempRegister(0), rax); \ |
537 | __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ |
538 | __ lock(); \ |
539 | __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ |
540 | __ j(not_equal, &binop); \ |
541 | } while (false) |
542 | |
543 | #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \ |
544 | do { \ |
545 | if (instr->InputAt(index)->IsSimd128Register()) { \ |
546 | __ opcode(dst_operand, i.InputSimd128Register(index)); \ |
547 | } else { \ |
548 | __ opcode(dst_operand, i.InputOperand(index)); \ |
549 | } \ |
550 | } while (false) |
551 | |
552 | #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \ |
553 | do { \ |
554 | if (instr->InputAt(index)->IsSimd128Register()) { \ |
555 | __ opcode(dst_operand, i.InputSimd128Register(index), imm); \ |
556 | } else { \ |
557 | __ opcode(dst_operand, i.InputOperand(index), imm); \ |
558 | } \ |
559 | } while (false) |
560 | |
561 | #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \ |
562 | do { \ |
563 | XMMRegister dst = i.OutputSimd128Register(); \ |
564 | DCHECK_EQ(dst, i.InputSimd128Register(0)); \ |
565 | byte input_index = instr->InputCount() == 2 ? 1 : 0; \ |
566 | ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \ |
567 | } while (false) |
568 | |
569 | #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \ |
570 | do { \ |
571 | CpuFeatureScope sse_scope(tasm(), SSELevel); \ |
572 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \ |
573 | __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \ |
574 | } while (false) |
575 | |
576 | void CodeGenerator::AssembleDeconstructFrame() { |
577 | unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset()); |
578 | __ movq(rsp, rbp); |
579 | __ popq(rbp); |
580 | } |
581 | |
582 | void CodeGenerator::AssemblePrepareTailCall() { |
583 | if (frame_access_state()->has_frame()) { |
584 | __ movq(rbp, MemOperand(rbp, 0)); |
585 | } |
586 | frame_access_state()->SetFrameAccessToSP(); |
587 | } |
588 | |
589 | void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, |
590 | Register scratch1, |
591 | Register scratch2, |
592 | Register scratch3) { |
593 | DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3)); |
594 | Label done; |
595 | |
596 | // Check if current frame is an arguments adaptor frame. |
597 | __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset), |
598 | Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); |
599 | __ j(not_equal, &done, Label::kNear); |
600 | |
601 | // Load arguments count from current arguments adaptor frame (note, it |
602 | // does not include receiver). |
603 | Register caller_args_count_reg = scratch1; |
604 | __ SmiUntag(caller_args_count_reg, |
605 | Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset)); |
606 | |
607 | ParameterCount callee_args_count(args_reg); |
608 | __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2, |
609 | scratch3); |
610 | __ bind(&done); |
611 | } |
612 | |
613 | namespace { |
614 | |
615 | void AdjustStackPointerForTailCall(Assembler* assembler, |
616 | FrameAccessState* state, |
617 | int new_slot_above_sp, |
618 | bool allow_shrinkage = true) { |
619 | int current_sp_offset = state->GetSPToFPSlotCount() + |
620 | StandardFrameConstants::kFixedSlotCountAboveFp; |
621 | int stack_slot_delta = new_slot_above_sp - current_sp_offset; |
622 | if (stack_slot_delta > 0) { |
623 | assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize)); |
624 | state->IncreaseSPDelta(stack_slot_delta); |
625 | } else if (allow_shrinkage && stack_slot_delta < 0) { |
626 | assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize)); |
627 | state->IncreaseSPDelta(stack_slot_delta); |
628 | } |
629 | } |
630 | |
631 | void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) { |
632 | int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32); |
633 | assembler->movq(kScratchRegister, shuffle_mask); |
634 | assembler->Push(kScratchRegister); |
635 | shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32); |
636 | assembler->movq(kScratchRegister, shuffle_mask); |
637 | assembler->Push(kScratchRegister); |
638 | } |
639 | |
640 | } // namespace |
641 | |
642 | void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, |
643 | int first_unused_stack_slot) { |
644 | CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush); |
645 | ZoneVector<MoveOperands*> pushes(zone()); |
646 | GetPushCompatibleMoves(instr, flags, &pushes); |
647 | |
648 | if (!pushes.empty() && |
649 | (LocationOperand::cast(pushes.back()->destination()).index() + 1 == |
650 | first_unused_stack_slot)) { |
651 | X64OperandConverter g(this, instr); |
652 | for (auto move : pushes) { |
653 | LocationOperand destination_location( |
654 | LocationOperand::cast(move->destination())); |
655 | InstructionOperand source(move->source()); |
656 | AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
657 | destination_location.index()); |
658 | if (source.IsStackSlot()) { |
659 | LocationOperand source_location(LocationOperand::cast(source)); |
660 | __ Push(g.SlotToOperand(source_location.index())); |
661 | } else if (source.IsRegister()) { |
662 | LocationOperand source_location(LocationOperand::cast(source)); |
663 | __ Push(source_location.GetRegister()); |
664 | } else if (source.IsImmediate()) { |
665 | __ Push(Immediate(ImmediateOperand::cast(source).inline_value())); |
666 | } else { |
667 | // Pushes of non-scalar data types is not supported. |
668 | UNIMPLEMENTED(); |
669 | } |
670 | frame_access_state()->IncreaseSPDelta(1); |
671 | move->Eliminate(); |
672 | } |
673 | } |
674 | AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
675 | first_unused_stack_slot, false); |
676 | } |
677 | |
678 | void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, |
679 | int first_unused_stack_slot) { |
680 | AdjustStackPointerForTailCall(tasm(), frame_access_state(), |
681 | first_unused_stack_slot); |
682 | } |
683 | |
684 | // Check that {kJavaScriptCallCodeStartRegister} is correct. |
685 | void CodeGenerator::AssembleCodeStartRegisterCheck() { |
686 | __ ComputeCodeStartAddress(rbx); |
687 | __ cmpq(rbx, kJavaScriptCallCodeStartRegister); |
688 | __ Assert(equal, AbortReason::kWrongFunctionCodeStart); |
689 | } |
690 | |
691 | // Check if the code object is marked for deoptimization. If it is, then it |
692 | // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need |
693 | // to: |
694 | // 1. read from memory the word that contains that bit, which can be found in |
695 | // the flags in the referenced {CodeDataContainer} object; |
696 | // 2. test kMarkedForDeoptimizationBit in those flags; and |
697 | // 3. if it is not zero then it jumps to the builtin. |
698 | void CodeGenerator::BailoutIfDeoptimized() { |
699 | int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; |
700 | __ LoadTaggedPointerField(rbx, |
701 | Operand(kJavaScriptCallCodeStartRegister, offset)); |
702 | __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset), |
703 | Immediate(1 << Code::kMarkedForDeoptimizationBit)); |
704 | __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode), |
705 | RelocInfo::CODE_TARGET, not_zero); |
706 | } |
707 | |
708 | void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() { |
709 | // Set a mask which has all bits set in the normal case, but has all |
710 | // bits cleared if we are speculatively executing the wrong PC. |
711 | __ ComputeCodeStartAddress(rbx); |
712 | __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister); |
713 | __ cmpq(kJavaScriptCallCodeStartRegister, rbx); |
714 | __ movq(rbx, Immediate(-1)); |
715 | __ cmovq(equal, kSpeculationPoisonRegister, rbx); |
716 | } |
717 | |
718 | void CodeGenerator::AssembleRegisterArgumentPoisoning() { |
719 | __ andq(kJSFunctionRegister, kSpeculationPoisonRegister); |
720 | __ andq(kContextRegister, kSpeculationPoisonRegister); |
721 | __ andq(rsp, kSpeculationPoisonRegister); |
722 | } |
723 | |
724 | // Assembles an instruction after register allocation, producing machine code. |
725 | CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
726 | Instruction* instr) { |
727 | X64OperandConverter i(this, instr); |
728 | InstructionCode opcode = instr->opcode(); |
729 | ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode); |
730 | switch (arch_opcode) { |
731 | case kArchCallCodeObject: { |
732 | if (HasImmediateInput(instr, 0)) { |
733 | Handle<Code> code = i.InputCode(0); |
734 | __ Call(code, RelocInfo::CODE_TARGET); |
735 | } else { |
736 | Register reg = i.InputRegister(0); |
737 | DCHECK_IMPLIES( |
738 | HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
739 | reg == kJavaScriptCallCodeStartRegister); |
740 | __ LoadCodeObjectEntry(reg, reg); |
741 | if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
742 | __ RetpolineCall(reg); |
743 | } else { |
744 | __ call(reg); |
745 | } |
746 | } |
747 | RecordCallPosition(instr); |
748 | frame_access_state()->ClearSPDelta(); |
749 | break; |
750 | } |
751 | case kArchCallBuiltinPointer: { |
752 | DCHECK(!HasImmediateInput(instr, 0)); |
753 | Register builtin_pointer = i.InputRegister(0); |
754 | __ CallBuiltinPointer(builtin_pointer); |
755 | RecordCallPosition(instr); |
756 | frame_access_state()->ClearSPDelta(); |
757 | break; |
758 | } |
759 | case kArchCallWasmFunction: { |
760 | if (HasImmediateInput(instr, 0)) { |
761 | Constant constant = i.ToConstant(instr->InputAt(0)); |
762 | Address wasm_code = static_cast<Address>(constant.ToInt64()); |
763 | if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) { |
764 | __ near_call(wasm_code, constant.rmode()); |
765 | } else { |
766 | if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
767 | __ RetpolineCall(wasm_code, constant.rmode()); |
768 | } else { |
769 | __ Call(wasm_code, constant.rmode()); |
770 | } |
771 | } |
772 | } else { |
773 | Register reg = i.InputRegister(0); |
774 | if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
775 | __ RetpolineCall(reg); |
776 | } else { |
777 | __ call(reg); |
778 | } |
779 | } |
780 | RecordCallPosition(instr); |
781 | frame_access_state()->ClearSPDelta(); |
782 | break; |
783 | } |
784 | case kArchTailCallCodeObjectFromJSFunction: |
785 | case kArchTailCallCodeObject: { |
786 | if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { |
787 | AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, |
788 | i.TempRegister(0), i.TempRegister(1), |
789 | i.TempRegister(2)); |
790 | } |
791 | if (HasImmediateInput(instr, 0)) { |
792 | Handle<Code> code = i.InputCode(0); |
793 | __ Jump(code, RelocInfo::CODE_TARGET); |
794 | } else { |
795 | Register reg = i.InputRegister(0); |
796 | DCHECK_IMPLIES( |
797 | HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
798 | reg == kJavaScriptCallCodeStartRegister); |
799 | __ LoadCodeObjectEntry(reg, reg); |
800 | if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
801 | __ RetpolineJump(reg); |
802 | } else { |
803 | __ jmp(reg); |
804 | } |
805 | } |
806 | unwinding_info_writer_.MarkBlockWillExit(); |
807 | frame_access_state()->ClearSPDelta(); |
808 | frame_access_state()->SetFrameAccessToDefault(); |
809 | break; |
810 | } |
811 | case kArchTailCallWasm: { |
812 | if (HasImmediateInput(instr, 0)) { |
813 | Constant constant = i.ToConstant(instr->InputAt(0)); |
814 | Address wasm_code = static_cast<Address>(constant.ToInt64()); |
815 | if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) { |
816 | __ near_jmp(wasm_code, constant.rmode()); |
817 | } else { |
818 | __ Move(kScratchRegister, wasm_code, constant.rmode()); |
819 | __ jmp(kScratchRegister); |
820 | } |
821 | } else { |
822 | Register reg = i.InputRegister(0); |
823 | if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
824 | __ RetpolineJump(reg); |
825 | } else { |
826 | __ jmp(reg); |
827 | } |
828 | } |
829 | unwinding_info_writer_.MarkBlockWillExit(); |
830 | frame_access_state()->ClearSPDelta(); |
831 | frame_access_state()->SetFrameAccessToDefault(); |
832 | break; |
833 | } |
834 | case kArchTailCallAddress: { |
835 | CHECK(!HasImmediateInput(instr, 0)); |
836 | Register reg = i.InputRegister(0); |
837 | DCHECK_IMPLIES( |
838 | HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), |
839 | reg == kJavaScriptCallCodeStartRegister); |
840 | if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { |
841 | __ RetpolineJump(reg); |
842 | } else { |
843 | __ jmp(reg); |
844 | } |
845 | unwinding_info_writer_.MarkBlockWillExit(); |
846 | frame_access_state()->ClearSPDelta(); |
847 | frame_access_state()->SetFrameAccessToDefault(); |
848 | break; |
849 | } |
850 | case kArchCallJSFunction: { |
851 | Register func = i.InputRegister(0); |
852 | if (FLAG_debug_code) { |
853 | // Check the function's context matches the context argument. |
854 | __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset)); |
855 | __ Assert(equal, AbortReason::kWrongFunctionContext); |
856 | } |
857 | static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch" ); |
858 | __ LoadTaggedPointerField(rcx, |
859 | FieldOperand(func, JSFunction::kCodeOffset)); |
860 | __ CallCodeObject(rcx); |
861 | frame_access_state()->ClearSPDelta(); |
862 | RecordCallPosition(instr); |
863 | break; |
864 | } |
865 | case kArchPrepareCallCFunction: { |
866 | // Frame alignment requires using FP-relative frame addressing. |
867 | frame_access_state()->SetFrameAccessToFP(); |
868 | int const num_parameters = MiscField::decode(instr->opcode()); |
869 | __ PrepareCallCFunction(num_parameters); |
870 | break; |
871 | } |
872 | case kArchSaveCallerRegisters: { |
873 | fp_mode_ = |
874 | static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())); |
875 | DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
876 | // kReturnRegister0 should have been saved before entering the stub. |
877 | int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0); |
878 | DCHECK(IsAligned(bytes, kSystemPointerSize)); |
879 | DCHECK_EQ(0, frame_access_state()->sp_delta()); |
880 | frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
881 | DCHECK(!caller_registers_saved_); |
882 | caller_registers_saved_ = true; |
883 | break; |
884 | } |
885 | case kArchRestoreCallerRegisters: { |
886 | DCHECK(fp_mode_ == |
887 | static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()))); |
888 | DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); |
889 | // Don't overwrite the returned value. |
890 | int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0); |
891 | frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize)); |
892 | DCHECK_EQ(0, frame_access_state()->sp_delta()); |
893 | DCHECK(caller_registers_saved_); |
894 | caller_registers_saved_ = false; |
895 | break; |
896 | } |
897 | case kArchPrepareTailCall: |
898 | AssemblePrepareTailCall(); |
899 | break; |
900 | case kArchCallCFunction: { |
901 | int const num_parameters = MiscField::decode(instr->opcode()); |
902 | if (HasImmediateInput(instr, 0)) { |
903 | ExternalReference ref = i.InputExternalReference(0); |
904 | __ CallCFunction(ref, num_parameters); |
905 | } else { |
906 | Register func = i.InputRegister(0); |
907 | __ CallCFunction(func, num_parameters); |
908 | } |
909 | frame_access_state()->SetFrameAccessToDefault(); |
910 | // Ideally, we should decrement SP delta to match the change of stack |
911 | // pointer in CallCFunction. However, for certain architectures (e.g. |
912 | // ARM), there may be more strict alignment requirement, causing old SP |
913 | // to be saved on the stack. In those cases, we can not calculate the SP |
914 | // delta statically. |
915 | frame_access_state()->ClearSPDelta(); |
916 | if (caller_registers_saved_) { |
917 | // Need to re-sync SP delta introduced in kArchSaveCallerRegisters. |
918 | // Here, we assume the sequence to be: |
919 | // kArchSaveCallerRegisters; |
920 | // kArchCallCFunction; |
921 | // kArchRestoreCallerRegisters; |
922 | int bytes = |
923 | __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0); |
924 | frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); |
925 | } |
926 | // TODO(tebbi): Do we need an lfence here? |
927 | break; |
928 | } |
929 | case kArchJmp: |
930 | AssembleArchJump(i.InputRpo(0)); |
931 | break; |
932 | case kArchBinarySearchSwitch: |
933 | AssembleArchBinarySearchSwitch(instr); |
934 | break; |
935 | case kArchLookupSwitch: |
936 | AssembleArchLookupSwitch(instr); |
937 | break; |
938 | case kArchTableSwitch: |
939 | AssembleArchTableSwitch(instr); |
940 | break; |
941 | case kArchComment: |
942 | __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0))); |
943 | break; |
944 | case kArchDebugAbort: |
945 | DCHECK(i.InputRegister(0) == rdx); |
946 | if (!frame_access_state()->has_frame()) { |
947 | // We don't actually want to generate a pile of code for this, so just |
948 | // claim there is a stack frame, without generating one. |
949 | FrameScope scope(tasm(), StackFrame::NONE); |
950 | __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS), |
951 | RelocInfo::CODE_TARGET); |
952 | } else { |
953 | __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS), |
954 | RelocInfo::CODE_TARGET); |
955 | } |
956 | __ int3(); |
957 | unwinding_info_writer_.MarkBlockWillExit(); |
958 | break; |
959 | case kArchDebugBreak: |
960 | __ int3(); |
961 | break; |
962 | case kArchThrowTerminator: |
963 | unwinding_info_writer_.MarkBlockWillExit(); |
964 | break; |
965 | case kArchNop: |
966 | // don't emit code for nops. |
967 | break; |
968 | case kArchDeoptimize: { |
969 | int deopt_state_id = |
970 | BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); |
971 | CodeGenResult result = |
972 | AssembleDeoptimizerCall(deopt_state_id, current_source_position_); |
973 | if (result != kSuccess) return result; |
974 | unwinding_info_writer_.MarkBlockWillExit(); |
975 | break; |
976 | } |
977 | case kArchRet: |
978 | AssembleReturn(instr->InputAt(0)); |
979 | break; |
980 | case kArchStackPointer: |
981 | __ movq(i.OutputRegister(), rsp); |
982 | break; |
983 | case kArchFramePointer: |
984 | __ movq(i.OutputRegister(), rbp); |
985 | break; |
986 | case kArchParentFramePointer: |
987 | if (frame_access_state()->has_frame()) { |
988 | __ movq(i.OutputRegister(), Operand(rbp, 0)); |
989 | } else { |
990 | __ movq(i.OutputRegister(), rbp); |
991 | } |
992 | break; |
993 | case kArchTruncateDoubleToI: { |
994 | auto result = i.OutputRegister(); |
995 | auto input = i.InputDoubleRegister(0); |
996 | auto ool = new (zone()) OutOfLineTruncateDoubleToI( |
997 | this, result, input, DetermineStubCallMode(), |
998 | &unwinding_info_writer_); |
999 | // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The |
1000 | // use of Cvttsd2siq requires the movl below to avoid sign extension. |
1001 | __ Cvttsd2siq(result, input); |
1002 | __ cmpq(result, Immediate(1)); |
1003 | __ j(overflow, ool->entry()); |
1004 | __ bind(ool->exit()); |
1005 | __ movl(result, result); |
1006 | break; |
1007 | } |
1008 | case kArchStoreWithWriteBarrier: { |
1009 | RecordWriteMode mode = |
1010 | static_cast<RecordWriteMode>(MiscField::decode(instr->opcode())); |
1011 | Register object = i.InputRegister(0); |
1012 | size_t index = 0; |
1013 | Operand operand = i.MemoryOperand(&index); |
1014 | Register value = i.InputRegister(index); |
1015 | Register scratch0 = i.TempRegister(0); |
1016 | Register scratch1 = i.TempRegister(1); |
1017 | auto ool = new (zone()) |
1018 | OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1, |
1019 | mode, DetermineStubCallMode()); |
1020 | __ StoreTaggedField(operand, value); |
1021 | __ CheckPageFlag(object, scratch0, |
1022 | MemoryChunk::kPointersFromHereAreInterestingMask, |
1023 | not_zero, ool->entry()); |
1024 | __ bind(ool->exit()); |
1025 | break; |
1026 | } |
1027 | case kArchWordPoisonOnSpeculation: |
1028 | DCHECK_EQ(i.OutputRegister(), i.InputRegister(0)); |
1029 | __ andq(i.InputRegister(0), kSpeculationPoisonRegister); |
1030 | break; |
1031 | case kLFence: |
1032 | __ lfence(); |
1033 | break; |
1034 | case kArchStackSlot: { |
1035 | FrameOffset offset = |
1036 | frame_access_state()->GetFrameOffset(i.InputInt32(0)); |
1037 | Register base = offset.from_stack_pointer() ? rsp : rbp; |
1038 | __ leaq(i.OutputRegister(), Operand(base, offset.offset())); |
1039 | break; |
1040 | } |
1041 | case kIeee754Float64Acos: |
1042 | ASSEMBLE_IEEE754_UNOP(acos); |
1043 | break; |
1044 | case kIeee754Float64Acosh: |
1045 | ASSEMBLE_IEEE754_UNOP(acosh); |
1046 | break; |
1047 | case kIeee754Float64Asin: |
1048 | ASSEMBLE_IEEE754_UNOP(asin); |
1049 | break; |
1050 | case kIeee754Float64Asinh: |
1051 | ASSEMBLE_IEEE754_UNOP(asinh); |
1052 | break; |
1053 | case kIeee754Float64Atan: |
1054 | ASSEMBLE_IEEE754_UNOP(atan); |
1055 | break; |
1056 | case kIeee754Float64Atanh: |
1057 | ASSEMBLE_IEEE754_UNOP(atanh); |
1058 | break; |
1059 | case kIeee754Float64Atan2: |
1060 | ASSEMBLE_IEEE754_BINOP(atan2); |
1061 | break; |
1062 | case kIeee754Float64Cbrt: |
1063 | ASSEMBLE_IEEE754_UNOP(cbrt); |
1064 | break; |
1065 | case kIeee754Float64Cos: |
1066 | ASSEMBLE_IEEE754_UNOP(cos); |
1067 | break; |
1068 | case kIeee754Float64Cosh: |
1069 | ASSEMBLE_IEEE754_UNOP(cosh); |
1070 | break; |
1071 | case kIeee754Float64Exp: |
1072 | ASSEMBLE_IEEE754_UNOP(exp); |
1073 | break; |
1074 | case kIeee754Float64Expm1: |
1075 | ASSEMBLE_IEEE754_UNOP(expm1); |
1076 | break; |
1077 | case kIeee754Float64Log: |
1078 | ASSEMBLE_IEEE754_UNOP(log); |
1079 | break; |
1080 | case kIeee754Float64Log1p: |
1081 | ASSEMBLE_IEEE754_UNOP(log1p); |
1082 | break; |
1083 | case kIeee754Float64Log2: |
1084 | ASSEMBLE_IEEE754_UNOP(log2); |
1085 | break; |
1086 | case kIeee754Float64Log10: |
1087 | ASSEMBLE_IEEE754_UNOP(log10); |
1088 | break; |
1089 | case kIeee754Float64Pow: |
1090 | ASSEMBLE_IEEE754_BINOP(pow); |
1091 | break; |
1092 | case kIeee754Float64Sin: |
1093 | ASSEMBLE_IEEE754_UNOP(sin); |
1094 | break; |
1095 | case kIeee754Float64Sinh: |
1096 | ASSEMBLE_IEEE754_UNOP(sinh); |
1097 | break; |
1098 | case kIeee754Float64Tan: |
1099 | ASSEMBLE_IEEE754_UNOP(tan); |
1100 | break; |
1101 | case kIeee754Float64Tanh: |
1102 | ASSEMBLE_IEEE754_UNOP(tanh); |
1103 | break; |
1104 | case kX64Add32: |
1105 | ASSEMBLE_BINOP(addl); |
1106 | break; |
1107 | case kX64Add: |
1108 | ASSEMBLE_BINOP(addq); |
1109 | break; |
1110 | case kX64Sub32: |
1111 | ASSEMBLE_BINOP(subl); |
1112 | break; |
1113 | case kX64Sub: |
1114 | ASSEMBLE_BINOP(subq); |
1115 | break; |
1116 | case kX64And32: |
1117 | ASSEMBLE_BINOP(andl); |
1118 | break; |
1119 | case kX64And: |
1120 | ASSEMBLE_BINOP(andq); |
1121 | break; |
1122 | case kX64Cmp8: |
1123 | ASSEMBLE_COMPARE(cmpb); |
1124 | break; |
1125 | case kX64Cmp16: |
1126 | ASSEMBLE_COMPARE(cmpw); |
1127 | break; |
1128 | case kX64Cmp32: |
1129 | ASSEMBLE_COMPARE(cmpl); |
1130 | break; |
1131 | case kX64Cmp: |
1132 | ASSEMBLE_COMPARE(cmpq); |
1133 | break; |
1134 | case kX64Test8: |
1135 | ASSEMBLE_COMPARE(testb); |
1136 | break; |
1137 | case kX64Test16: |
1138 | ASSEMBLE_COMPARE(testw); |
1139 | break; |
1140 | case kX64Test32: |
1141 | ASSEMBLE_COMPARE(testl); |
1142 | break; |
1143 | case kX64Test: |
1144 | ASSEMBLE_COMPARE(testq); |
1145 | break; |
1146 | case kX64Imul32: |
1147 | ASSEMBLE_MULT(imull); |
1148 | break; |
1149 | case kX64Imul: |
1150 | ASSEMBLE_MULT(imulq); |
1151 | break; |
1152 | case kX64ImulHigh32: |
1153 | if (instr->InputAt(1)->IsRegister()) { |
1154 | __ imull(i.InputRegister(1)); |
1155 | } else { |
1156 | __ imull(i.InputOperand(1)); |
1157 | } |
1158 | break; |
1159 | case kX64UmulHigh32: |
1160 | if (instr->InputAt(1)->IsRegister()) { |
1161 | __ mull(i.InputRegister(1)); |
1162 | } else { |
1163 | __ mull(i.InputOperand(1)); |
1164 | } |
1165 | break; |
1166 | case kX64Idiv32: |
1167 | __ cdq(); |
1168 | __ idivl(i.InputRegister(1)); |
1169 | break; |
1170 | case kX64Idiv: |
1171 | __ cqo(); |
1172 | __ idivq(i.InputRegister(1)); |
1173 | break; |
1174 | case kX64Udiv32: |
1175 | __ xorl(rdx, rdx); |
1176 | __ divl(i.InputRegister(1)); |
1177 | break; |
1178 | case kX64Udiv: |
1179 | __ xorq(rdx, rdx); |
1180 | __ divq(i.InputRegister(1)); |
1181 | break; |
1182 | case kX64Not: |
1183 | ASSEMBLE_UNOP(notq); |
1184 | break; |
1185 | case kX64Not32: |
1186 | ASSEMBLE_UNOP(notl); |
1187 | break; |
1188 | case kX64Neg: |
1189 | ASSEMBLE_UNOP(negq); |
1190 | break; |
1191 | case kX64Neg32: |
1192 | ASSEMBLE_UNOP(negl); |
1193 | break; |
1194 | case kX64Or32: |
1195 | ASSEMBLE_BINOP(orl); |
1196 | break; |
1197 | case kX64Or: |
1198 | ASSEMBLE_BINOP(orq); |
1199 | break; |
1200 | case kX64Xor32: |
1201 | ASSEMBLE_BINOP(xorl); |
1202 | break; |
1203 | case kX64Xor: |
1204 | ASSEMBLE_BINOP(xorq); |
1205 | break; |
1206 | case kX64Shl32: |
1207 | ASSEMBLE_SHIFT(shll, 5); |
1208 | break; |
1209 | case kX64Shl: |
1210 | ASSEMBLE_SHIFT(shlq, 6); |
1211 | break; |
1212 | case kX64Shr32: |
1213 | ASSEMBLE_SHIFT(shrl, 5); |
1214 | break; |
1215 | case kX64Shr: |
1216 | ASSEMBLE_SHIFT(shrq, 6); |
1217 | break; |
1218 | case kX64Sar32: |
1219 | ASSEMBLE_SHIFT(sarl, 5); |
1220 | break; |
1221 | case kX64Sar: |
1222 | ASSEMBLE_SHIFT(sarq, 6); |
1223 | break; |
1224 | case kX64Ror32: |
1225 | ASSEMBLE_SHIFT(rorl, 5); |
1226 | break; |
1227 | case kX64Ror: |
1228 | ASSEMBLE_SHIFT(rorq, 6); |
1229 | break; |
1230 | case kX64Lzcnt: |
1231 | if (instr->InputAt(0)->IsRegister()) { |
1232 | __ Lzcntq(i.OutputRegister(), i.InputRegister(0)); |
1233 | } else { |
1234 | __ Lzcntq(i.OutputRegister(), i.InputOperand(0)); |
1235 | } |
1236 | break; |
1237 | case kX64Lzcnt32: |
1238 | if (instr->InputAt(0)->IsRegister()) { |
1239 | __ Lzcntl(i.OutputRegister(), i.InputRegister(0)); |
1240 | } else { |
1241 | __ Lzcntl(i.OutputRegister(), i.InputOperand(0)); |
1242 | } |
1243 | break; |
1244 | case kX64Tzcnt: |
1245 | if (instr->InputAt(0)->IsRegister()) { |
1246 | __ Tzcntq(i.OutputRegister(), i.InputRegister(0)); |
1247 | } else { |
1248 | __ Tzcntq(i.OutputRegister(), i.InputOperand(0)); |
1249 | } |
1250 | break; |
1251 | case kX64Tzcnt32: |
1252 | if (instr->InputAt(0)->IsRegister()) { |
1253 | __ Tzcntl(i.OutputRegister(), i.InputRegister(0)); |
1254 | } else { |
1255 | __ Tzcntl(i.OutputRegister(), i.InputOperand(0)); |
1256 | } |
1257 | break; |
1258 | case kX64Popcnt: |
1259 | if (instr->InputAt(0)->IsRegister()) { |
1260 | __ Popcntq(i.OutputRegister(), i.InputRegister(0)); |
1261 | } else { |
1262 | __ Popcntq(i.OutputRegister(), i.InputOperand(0)); |
1263 | } |
1264 | break; |
1265 | case kX64Popcnt32: |
1266 | if (instr->InputAt(0)->IsRegister()) { |
1267 | __ Popcntl(i.OutputRegister(), i.InputRegister(0)); |
1268 | } else { |
1269 | __ Popcntl(i.OutputRegister(), i.InputOperand(0)); |
1270 | } |
1271 | break; |
1272 | case kX64Bswap: |
1273 | __ bswapq(i.OutputRegister()); |
1274 | break; |
1275 | case kX64Bswap32: |
1276 | __ bswapl(i.OutputRegister()); |
1277 | break; |
1278 | case kSSEFloat32Cmp: |
1279 | ASSEMBLE_SSE_BINOP(Ucomiss); |
1280 | break; |
1281 | case kSSEFloat32Add: |
1282 | ASSEMBLE_SSE_BINOP(addss); |
1283 | break; |
1284 | case kSSEFloat32Sub: |
1285 | ASSEMBLE_SSE_BINOP(subss); |
1286 | break; |
1287 | case kSSEFloat32Mul: |
1288 | ASSEMBLE_SSE_BINOP(mulss); |
1289 | break; |
1290 | case kSSEFloat32Div: |
1291 | ASSEMBLE_SSE_BINOP(divss); |
1292 | // Don't delete this mov. It may improve performance on some CPUs, |
1293 | // when there is a (v)mulss depending on the result. |
1294 | __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
1295 | break; |
1296 | case kSSEFloat32Abs: { |
1297 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1298 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
1299 | __ psrlq(kScratchDoubleReg, 33); |
1300 | __ andps(i.OutputDoubleRegister(), kScratchDoubleReg); |
1301 | break; |
1302 | } |
1303 | case kSSEFloat32Neg: { |
1304 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1305 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
1306 | __ psllq(kScratchDoubleReg, 31); |
1307 | __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg); |
1308 | break; |
1309 | } |
1310 | case kSSEFloat32Sqrt: |
1311 | ASSEMBLE_SSE_UNOP(sqrtss); |
1312 | break; |
1313 | case kSSEFloat32ToFloat64: |
1314 | ASSEMBLE_SSE_UNOP(Cvtss2sd); |
1315 | break; |
1316 | case kSSEFloat32Round: { |
1317 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
1318 | RoundingMode const mode = |
1319 | static_cast<RoundingMode>(MiscField::decode(instr->opcode())); |
1320 | __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); |
1321 | break; |
1322 | } |
1323 | case kSSEFloat32ToInt32: |
1324 | if (instr->InputAt(0)->IsFPRegister()) { |
1325 | __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0)); |
1326 | } else { |
1327 | __ Cvttss2si(i.OutputRegister(), i.InputOperand(0)); |
1328 | } |
1329 | break; |
1330 | case kSSEFloat32ToUint32: { |
1331 | if (instr->InputAt(0)->IsFPRegister()) { |
1332 | __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0)); |
1333 | } else { |
1334 | __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0)); |
1335 | } |
1336 | break; |
1337 | } |
1338 | case kSSEFloat64Cmp: |
1339 | ASSEMBLE_SSE_BINOP(Ucomisd); |
1340 | break; |
1341 | case kSSEFloat64Add: |
1342 | ASSEMBLE_SSE_BINOP(addsd); |
1343 | break; |
1344 | case kSSEFloat64Sub: |
1345 | ASSEMBLE_SSE_BINOP(subsd); |
1346 | break; |
1347 | case kSSEFloat64Mul: |
1348 | ASSEMBLE_SSE_BINOP(mulsd); |
1349 | break; |
1350 | case kSSEFloat64Div: |
1351 | ASSEMBLE_SSE_BINOP(divsd); |
1352 | // Don't delete this mov. It may improve performance on some CPUs, |
1353 | // when there is a (v)mulsd depending on the result. |
1354 | __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
1355 | break; |
1356 | case kSSEFloat64Mod: { |
1357 | __ subq(rsp, Immediate(kDoubleSize)); |
1358 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
1359 | kDoubleSize); |
1360 | // Move values to st(0) and st(1). |
1361 | __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); |
1362 | __ fld_d(Operand(rsp, 0)); |
1363 | __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); |
1364 | __ fld_d(Operand(rsp, 0)); |
1365 | // Loop while fprem isn't done. |
1366 | Label mod_loop; |
1367 | __ bind(&mod_loop); |
1368 | // This instructions traps on all kinds inputs, but we are assuming the |
1369 | // floating point control word is set to ignore them all. |
1370 | __ fprem(); |
1371 | // The following 2 instruction implicitly use rax. |
1372 | __ fnstsw_ax(); |
1373 | if (CpuFeatures::IsSupported(SAHF)) { |
1374 | CpuFeatureScope sahf_scope(tasm(), SAHF); |
1375 | __ sahf(); |
1376 | } else { |
1377 | __ shrl(rax, Immediate(8)); |
1378 | __ andl(rax, Immediate(0xFF)); |
1379 | __ pushq(rax); |
1380 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
1381 | kSystemPointerSize); |
1382 | __ popfq(); |
1383 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
1384 | -kSystemPointerSize); |
1385 | } |
1386 | __ j(parity_even, &mod_loop); |
1387 | // Move output to stack and clean up. |
1388 | __ fstp(1); |
1389 | __ fstp_d(Operand(rsp, 0)); |
1390 | __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0)); |
1391 | __ addq(rsp, Immediate(kDoubleSize)); |
1392 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
1393 | -kDoubleSize); |
1394 | break; |
1395 | } |
1396 | case kSSEFloat32Max: { |
1397 | Label compare_swap, done_compare; |
1398 | if (instr->InputAt(1)->IsFPRegister()) { |
1399 | __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1400 | } else { |
1401 | __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
1402 | } |
1403 | auto ool = |
1404 | new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); |
1405 | __ j(parity_even, ool->entry()); |
1406 | __ j(above, &done_compare, Label::kNear); |
1407 | __ j(below, &compare_swap, Label::kNear); |
1408 | __ Movmskps(kScratchRegister, i.InputDoubleRegister(0)); |
1409 | __ testl(kScratchRegister, Immediate(1)); |
1410 | __ j(zero, &done_compare, Label::kNear); |
1411 | __ bind(&compare_swap); |
1412 | if (instr->InputAt(1)->IsFPRegister()) { |
1413 | __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1414 | } else { |
1415 | __ Movss(i.InputDoubleRegister(0), i.InputOperand(1)); |
1416 | } |
1417 | __ bind(&done_compare); |
1418 | __ bind(ool->exit()); |
1419 | break; |
1420 | } |
1421 | case kSSEFloat32Min: { |
1422 | Label compare_swap, done_compare; |
1423 | if (instr->InputAt(1)->IsFPRegister()) { |
1424 | __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1425 | } else { |
1426 | __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
1427 | } |
1428 | auto ool = |
1429 | new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); |
1430 | __ j(parity_even, ool->entry()); |
1431 | __ j(below, &done_compare, Label::kNear); |
1432 | __ j(above, &compare_swap, Label::kNear); |
1433 | if (instr->InputAt(1)->IsFPRegister()) { |
1434 | __ Movmskps(kScratchRegister, i.InputDoubleRegister(1)); |
1435 | } else { |
1436 | __ Movss(kScratchDoubleReg, i.InputOperand(1)); |
1437 | __ Movmskps(kScratchRegister, kScratchDoubleReg); |
1438 | } |
1439 | __ testl(kScratchRegister, Immediate(1)); |
1440 | __ j(zero, &done_compare, Label::kNear); |
1441 | __ bind(&compare_swap); |
1442 | if (instr->InputAt(1)->IsFPRegister()) { |
1443 | __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1444 | } else { |
1445 | __ Movss(i.InputDoubleRegister(0), i.InputOperand(1)); |
1446 | } |
1447 | __ bind(&done_compare); |
1448 | __ bind(ool->exit()); |
1449 | break; |
1450 | } |
1451 | case kSSEFloat64Max: { |
1452 | Label compare_swap, done_compare; |
1453 | if (instr->InputAt(1)->IsFPRegister()) { |
1454 | __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1455 | } else { |
1456 | __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
1457 | } |
1458 | auto ool = |
1459 | new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); |
1460 | __ j(parity_even, ool->entry()); |
1461 | __ j(above, &done_compare, Label::kNear); |
1462 | __ j(below, &compare_swap, Label::kNear); |
1463 | __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0)); |
1464 | __ testl(kScratchRegister, Immediate(1)); |
1465 | __ j(zero, &done_compare, Label::kNear); |
1466 | __ bind(&compare_swap); |
1467 | if (instr->InputAt(1)->IsFPRegister()) { |
1468 | __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1469 | } else { |
1470 | __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
1471 | } |
1472 | __ bind(&done_compare); |
1473 | __ bind(ool->exit()); |
1474 | break; |
1475 | } |
1476 | case kSSEFloat64Min: { |
1477 | Label compare_swap, done_compare; |
1478 | if (instr->InputAt(1)->IsFPRegister()) { |
1479 | __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1480 | } else { |
1481 | __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
1482 | } |
1483 | auto ool = |
1484 | new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); |
1485 | __ j(parity_even, ool->entry()); |
1486 | __ j(below, &done_compare, Label::kNear); |
1487 | __ j(above, &compare_swap, Label::kNear); |
1488 | if (instr->InputAt(1)->IsFPRegister()) { |
1489 | __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1)); |
1490 | } else { |
1491 | __ Movsd(kScratchDoubleReg, i.InputOperand(1)); |
1492 | __ Movmskpd(kScratchRegister, kScratchDoubleReg); |
1493 | } |
1494 | __ testl(kScratchRegister, Immediate(1)); |
1495 | __ j(zero, &done_compare, Label::kNear); |
1496 | __ bind(&compare_swap); |
1497 | if (instr->InputAt(1)->IsFPRegister()) { |
1498 | __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1499 | } else { |
1500 | __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1)); |
1501 | } |
1502 | __ bind(&done_compare); |
1503 | __ bind(ool->exit()); |
1504 | break; |
1505 | } |
1506 | case kSSEFloat64Abs: { |
1507 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1508 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
1509 | __ psrlq(kScratchDoubleReg, 1); |
1510 | __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg); |
1511 | break; |
1512 | } |
1513 | case kSSEFloat64Neg: { |
1514 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1515 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
1516 | __ psllq(kScratchDoubleReg, 63); |
1517 | __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); |
1518 | break; |
1519 | } |
1520 | case kSSEFloat64Sqrt: |
1521 | ASSEMBLE_SSE_UNOP(Sqrtsd); |
1522 | break; |
1523 | case kSSEFloat64Round: { |
1524 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
1525 | RoundingMode const mode = |
1526 | static_cast<RoundingMode>(MiscField::decode(instr->opcode())); |
1527 | __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); |
1528 | break; |
1529 | } |
1530 | case kSSEFloat64ToFloat32: |
1531 | ASSEMBLE_SSE_UNOP(Cvtsd2ss); |
1532 | break; |
1533 | case kSSEFloat64ToInt32: |
1534 | if (instr->InputAt(0)->IsFPRegister()) { |
1535 | __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0)); |
1536 | } else { |
1537 | __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0)); |
1538 | } |
1539 | break; |
1540 | case kSSEFloat64ToUint32: { |
1541 | if (instr->InputAt(0)->IsFPRegister()) { |
1542 | __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0)); |
1543 | } else { |
1544 | __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0)); |
1545 | } |
1546 | if (MiscField::decode(instr->opcode())) { |
1547 | __ AssertZeroExtended(i.OutputRegister()); |
1548 | } |
1549 | break; |
1550 | } |
1551 | case kSSEFloat32ToInt64: |
1552 | if (instr->InputAt(0)->IsFPRegister()) { |
1553 | __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0)); |
1554 | } else { |
1555 | __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0)); |
1556 | } |
1557 | if (instr->OutputCount() > 1) { |
1558 | __ Set(i.OutputRegister(1), 1); |
1559 | Label done; |
1560 | Label fail; |
1561 | __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN)); |
1562 | if (instr->InputAt(0)->IsFPRegister()) { |
1563 | __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0)); |
1564 | } else { |
1565 | __ Ucomiss(kScratchDoubleReg, i.InputOperand(0)); |
1566 | } |
1567 | // If the input is NaN, then the conversion fails. |
1568 | __ j(parity_even, &fail); |
1569 | // If the input is INT64_MIN, then the conversion succeeds. |
1570 | __ j(equal, &done); |
1571 | __ cmpq(i.OutputRegister(0), Immediate(1)); |
1572 | // If the conversion results in INT64_MIN, but the input was not |
1573 | // INT64_MIN, then the conversion fails. |
1574 | __ j(no_overflow, &done); |
1575 | __ bind(&fail); |
1576 | __ Set(i.OutputRegister(1), 0); |
1577 | __ bind(&done); |
1578 | } |
1579 | break; |
1580 | case kSSEFloat64ToInt64: |
1581 | if (instr->InputAt(0)->IsFPRegister()) { |
1582 | __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0)); |
1583 | } else { |
1584 | __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0)); |
1585 | } |
1586 | if (instr->OutputCount() > 1) { |
1587 | __ Set(i.OutputRegister(1), 1); |
1588 | Label done; |
1589 | Label fail; |
1590 | __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN)); |
1591 | if (instr->InputAt(0)->IsFPRegister()) { |
1592 | __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0)); |
1593 | } else { |
1594 | __ Ucomisd(kScratchDoubleReg, i.InputOperand(0)); |
1595 | } |
1596 | // If the input is NaN, then the conversion fails. |
1597 | __ j(parity_even, &fail); |
1598 | // If the input is INT64_MIN, then the conversion succeeds. |
1599 | __ j(equal, &done); |
1600 | __ cmpq(i.OutputRegister(0), Immediate(1)); |
1601 | // If the conversion results in INT64_MIN, but the input was not |
1602 | // INT64_MIN, then the conversion fails. |
1603 | __ j(no_overflow, &done); |
1604 | __ bind(&fail); |
1605 | __ Set(i.OutputRegister(1), 0); |
1606 | __ bind(&done); |
1607 | } |
1608 | break; |
1609 | case kSSEFloat32ToUint64: { |
1610 | Label fail; |
1611 | if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0); |
1612 | if (instr->InputAt(0)->IsFPRegister()) { |
1613 | __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail); |
1614 | } else { |
1615 | __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail); |
1616 | } |
1617 | if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1); |
1618 | __ bind(&fail); |
1619 | break; |
1620 | } |
1621 | case kSSEFloat64ToUint64: { |
1622 | Label fail; |
1623 | if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0); |
1624 | if (instr->InputAt(0)->IsFPRegister()) { |
1625 | __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail); |
1626 | } else { |
1627 | __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail); |
1628 | } |
1629 | if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1); |
1630 | __ bind(&fail); |
1631 | break; |
1632 | } |
1633 | case kSSEInt32ToFloat64: |
1634 | if (instr->InputAt(0)->IsRegister()) { |
1635 | __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
1636 | } else { |
1637 | __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
1638 | } |
1639 | break; |
1640 | case kSSEInt32ToFloat32: |
1641 | if (instr->InputAt(0)->IsRegister()) { |
1642 | __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
1643 | } else { |
1644 | __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
1645 | } |
1646 | break; |
1647 | case kSSEInt64ToFloat32: |
1648 | if (instr->InputAt(0)->IsRegister()) { |
1649 | __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
1650 | } else { |
1651 | __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
1652 | } |
1653 | break; |
1654 | case kSSEInt64ToFloat64: |
1655 | if (instr->InputAt(0)->IsRegister()) { |
1656 | __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
1657 | } else { |
1658 | __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
1659 | } |
1660 | break; |
1661 | case kSSEUint64ToFloat32: |
1662 | if (instr->InputAt(0)->IsRegister()) { |
1663 | __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
1664 | } else { |
1665 | __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
1666 | } |
1667 | break; |
1668 | case kSSEUint64ToFloat64: |
1669 | if (instr->InputAt(0)->IsRegister()) { |
1670 | __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
1671 | } else { |
1672 | __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
1673 | } |
1674 | break; |
1675 | case kSSEUint32ToFloat64: |
1676 | if (instr->InputAt(0)->IsRegister()) { |
1677 | __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0)); |
1678 | } else { |
1679 | __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); |
1680 | } |
1681 | break; |
1682 | case kSSEUint32ToFloat32: |
1683 | if (instr->InputAt(0)->IsRegister()) { |
1684 | __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0)); |
1685 | } else { |
1686 | __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0)); |
1687 | } |
1688 | break; |
1689 | case kSSEFloat64ExtractLowWord32: |
1690 | if (instr->InputAt(0)->IsFPStackSlot()) { |
1691 | __ movl(i.OutputRegister(), i.InputOperand(0)); |
1692 | } else { |
1693 | __ Movd(i.OutputRegister(), i.InputDoubleRegister(0)); |
1694 | } |
1695 | break; |
1696 | case kSSEFloat64ExtractHighWord32: |
1697 | if (instr->InputAt(0)->IsFPStackSlot()) { |
1698 | __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2)); |
1699 | } else { |
1700 | __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1); |
1701 | } |
1702 | break; |
1703 | case kSSEFloat64InsertLowWord32: |
1704 | if (instr->InputAt(1)->IsRegister()) { |
1705 | __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0); |
1706 | } else { |
1707 | __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0); |
1708 | } |
1709 | break; |
1710 | case kSSEFloat64InsertHighWord32: |
1711 | if (instr->InputAt(1)->IsRegister()) { |
1712 | __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1); |
1713 | } else { |
1714 | __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1); |
1715 | } |
1716 | break; |
1717 | case kSSEFloat64LoadLowWord32: |
1718 | if (instr->InputAt(0)->IsRegister()) { |
1719 | __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); |
1720 | } else { |
1721 | __ Movd(i.OutputDoubleRegister(), i.InputOperand(0)); |
1722 | } |
1723 | break; |
1724 | case kAVXFloat32Cmp: { |
1725 | CpuFeatureScope avx_scope(tasm(), AVX); |
1726 | if (instr->InputAt(1)->IsFPRegister()) { |
1727 | __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1728 | } else { |
1729 | __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); |
1730 | } |
1731 | break; |
1732 | } |
1733 | case kAVXFloat32Add: |
1734 | ASSEMBLE_AVX_BINOP(vaddss); |
1735 | break; |
1736 | case kAVXFloat32Sub: |
1737 | ASSEMBLE_AVX_BINOP(vsubss); |
1738 | break; |
1739 | case kAVXFloat32Mul: |
1740 | ASSEMBLE_AVX_BINOP(vmulss); |
1741 | break; |
1742 | case kAVXFloat32Div: |
1743 | ASSEMBLE_AVX_BINOP(vdivss); |
1744 | // Don't delete this mov. It may improve performance on some CPUs, |
1745 | // when there is a (v)mulss depending on the result. |
1746 | __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
1747 | break; |
1748 | case kAVXFloat64Cmp: { |
1749 | CpuFeatureScope avx_scope(tasm(), AVX); |
1750 | if (instr->InputAt(1)->IsFPRegister()) { |
1751 | __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); |
1752 | } else { |
1753 | __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); |
1754 | } |
1755 | break; |
1756 | } |
1757 | case kAVXFloat64Add: |
1758 | ASSEMBLE_AVX_BINOP(vaddsd); |
1759 | break; |
1760 | case kAVXFloat64Sub: |
1761 | ASSEMBLE_AVX_BINOP(vsubsd); |
1762 | break; |
1763 | case kAVXFloat64Mul: |
1764 | ASSEMBLE_AVX_BINOP(vmulsd); |
1765 | break; |
1766 | case kAVXFloat64Div: |
1767 | ASSEMBLE_AVX_BINOP(vdivsd); |
1768 | // Don't delete this mov. It may improve performance on some CPUs, |
1769 | // when there is a (v)mulsd depending on the result. |
1770 | __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister()); |
1771 | break; |
1772 | case kAVXFloat32Abs: { |
1773 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1774 | CpuFeatureScope avx_scope(tasm(), AVX); |
1775 | __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
1776 | __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33); |
1777 | if (instr->InputAt(0)->IsFPRegister()) { |
1778 | __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, |
1779 | i.InputDoubleRegister(0)); |
1780 | } else { |
1781 | __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, |
1782 | i.InputOperand(0)); |
1783 | } |
1784 | break; |
1785 | } |
1786 | case kAVXFloat32Neg: { |
1787 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1788 | CpuFeatureScope avx_scope(tasm(), AVX); |
1789 | __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
1790 | __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31); |
1791 | if (instr->InputAt(0)->IsFPRegister()) { |
1792 | __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, |
1793 | i.InputDoubleRegister(0)); |
1794 | } else { |
1795 | __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, |
1796 | i.InputOperand(0)); |
1797 | } |
1798 | break; |
1799 | } |
1800 | case kAVXFloat64Abs: { |
1801 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1802 | CpuFeatureScope avx_scope(tasm(), AVX); |
1803 | __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
1804 | __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1); |
1805 | if (instr->InputAt(0)->IsFPRegister()) { |
1806 | __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
1807 | i.InputDoubleRegister(0)); |
1808 | } else { |
1809 | __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
1810 | i.InputOperand(0)); |
1811 | } |
1812 | break; |
1813 | } |
1814 | case kAVXFloat64Neg: { |
1815 | // TODO(bmeurer): Use RIP relative 128-bit constants. |
1816 | CpuFeatureScope avx_scope(tasm(), AVX); |
1817 | __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
1818 | __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63); |
1819 | if (instr->InputAt(0)->IsFPRegister()) { |
1820 | __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
1821 | i.InputDoubleRegister(0)); |
1822 | } else { |
1823 | __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, |
1824 | i.InputOperand(0)); |
1825 | } |
1826 | break; |
1827 | } |
1828 | case kSSEFloat64SilenceNaN: |
1829 | __ Xorpd(kScratchDoubleReg, kScratchDoubleReg); |
1830 | __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg); |
1831 | break; |
1832 | case kX64Movsxbl: |
1833 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1834 | ASSEMBLE_MOVX(movsxbl); |
1835 | __ AssertZeroExtended(i.OutputRegister()); |
1836 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1837 | break; |
1838 | case kX64Movzxbl: |
1839 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1840 | ASSEMBLE_MOVX(movzxbl); |
1841 | __ AssertZeroExtended(i.OutputRegister()); |
1842 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1843 | break; |
1844 | case kX64Movsxbq: |
1845 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1846 | ASSEMBLE_MOVX(movsxbq); |
1847 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1848 | break; |
1849 | case kX64Movzxbq: |
1850 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1851 | ASSEMBLE_MOVX(movzxbq); |
1852 | __ AssertZeroExtended(i.OutputRegister()); |
1853 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1854 | break; |
1855 | case kX64Movb: { |
1856 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1857 | size_t index = 0; |
1858 | Operand operand = i.MemoryOperand(&index); |
1859 | if (HasImmediateInput(instr, index)) { |
1860 | __ movb(operand, Immediate(i.InputInt8(index))); |
1861 | } else { |
1862 | __ movb(operand, i.InputRegister(index)); |
1863 | } |
1864 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1865 | break; |
1866 | } |
1867 | case kX64Movsxwl: |
1868 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1869 | ASSEMBLE_MOVX(movsxwl); |
1870 | __ AssertZeroExtended(i.OutputRegister()); |
1871 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1872 | break; |
1873 | case kX64Movzxwl: |
1874 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1875 | ASSEMBLE_MOVX(movzxwl); |
1876 | __ AssertZeroExtended(i.OutputRegister()); |
1877 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1878 | break; |
1879 | case kX64Movsxwq: |
1880 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1881 | ASSEMBLE_MOVX(movsxwq); |
1882 | break; |
1883 | case kX64Movzxwq: |
1884 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1885 | ASSEMBLE_MOVX(movzxwq); |
1886 | __ AssertZeroExtended(i.OutputRegister()); |
1887 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1888 | break; |
1889 | case kX64Movw: { |
1890 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1891 | size_t index = 0; |
1892 | Operand operand = i.MemoryOperand(&index); |
1893 | if (HasImmediateInput(instr, index)) { |
1894 | __ movw(operand, Immediate(i.InputInt16(index))); |
1895 | } else { |
1896 | __ movw(operand, i.InputRegister(index)); |
1897 | } |
1898 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1899 | break; |
1900 | } |
1901 | case kX64Movl: |
1902 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1903 | if (instr->HasOutput()) { |
1904 | if (instr->addressing_mode() == kMode_None) { |
1905 | if (instr->InputAt(0)->IsRegister()) { |
1906 | __ movl(i.OutputRegister(), i.InputRegister(0)); |
1907 | } else { |
1908 | __ movl(i.OutputRegister(), i.InputOperand(0)); |
1909 | } |
1910 | } else { |
1911 | __ movl(i.OutputRegister(), i.MemoryOperand()); |
1912 | } |
1913 | __ AssertZeroExtended(i.OutputRegister()); |
1914 | } else { |
1915 | size_t index = 0; |
1916 | Operand operand = i.MemoryOperand(&index); |
1917 | if (HasImmediateInput(instr, index)) { |
1918 | __ movl(operand, i.InputImmediate(index)); |
1919 | } else { |
1920 | __ movl(operand, i.InputRegister(index)); |
1921 | } |
1922 | } |
1923 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1924 | break; |
1925 | case kX64Movsxlq: |
1926 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
1927 | ASSEMBLE_MOVX(movsxlq); |
1928 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
1929 | break; |
1930 | case kX64MovqDecompressTaggedSigned: { |
1931 | CHECK(instr->HasOutput()); |
1932 | __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand()); |
1933 | break; |
1934 | } |
1935 | case kX64MovqDecompressTaggedPointer: { |
1936 | CHECK(instr->HasOutput()); |
1937 | __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand()); |
1938 | break; |
1939 | } |
1940 | case kX64MovqDecompressAnyTagged: { |
1941 | CHECK(instr->HasOutput()); |
1942 | __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand()); |
1943 | break; |
1944 | } |
1945 | case kX64MovqCompressTagged: { |
1946 | CHECK(!instr->HasOutput()); |
1947 | size_t index = 0; |
1948 | Operand operand = i.MemoryOperand(&index); |
1949 | if (HasImmediateInput(instr, index)) { |
1950 | __ StoreTaggedField(operand, i.InputImmediate(index)); |
1951 | } else { |
1952 | __ StoreTaggedField(operand, i.InputRegister(index)); |
1953 | } |
1954 | break; |
1955 | } |
1956 | case kX64DecompressSigned: { |
1957 | CHECK(instr->HasOutput()); |
1958 | ASSEMBLE_MOVX(movsxlq); |
1959 | break; |
1960 | } |
1961 | case kX64DecompressPointer: { |
1962 | CHECK(instr->HasOutput()); |
1963 | ASSEMBLE_MOVX(movsxlq); |
1964 | __ addq(i.OutputRegister(), kRootRegister); |
1965 | break; |
1966 | } |
1967 | case kX64DecompressAny: { |
1968 | CHECK(instr->HasOutput()); |
1969 | ASSEMBLE_MOVX(movsxlq); |
1970 | // TODO(solanes): Do branchful compute? |
1971 | // Branchlessly compute |masked_root|: |
1972 | STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32)); |
1973 | Register masked_root = kScratchRegister; |
1974 | __ movl(masked_root, i.OutputRegister()); |
1975 | __ andl(masked_root, Immediate(kSmiTagMask)); |
1976 | __ negq(masked_root); |
1977 | __ andq(masked_root, kRootRegister); |
1978 | // Now this add operation will either leave the value unchanged if it is a |
1979 | // smi or add the isolate root if it is a heap object. |
1980 | __ addq(i.OutputRegister(), masked_root); |
1981 | break; |
1982 | } |
1983 | // TODO(solanes): Combine into one Compress? They seem to be identical. |
1984 | // TODO(solanes): We might get away with doing a no-op in these three cases. |
1985 | // The movl instruction is the conservative way for the moment. |
1986 | case kX64CompressSigned: { |
1987 | ASSEMBLE_MOVX(movl); |
1988 | break; |
1989 | } |
1990 | case kX64CompressPointer: { |
1991 | ASSEMBLE_MOVX(movl); |
1992 | break; |
1993 | } |
1994 | case kX64CompressAny: { |
1995 | ASSEMBLE_MOVX(movl); |
1996 | break; |
1997 | } |
1998 | case kX64Movq: |
1999 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
2000 | if (instr->HasOutput()) { |
2001 | __ movq(i.OutputRegister(), i.MemoryOperand()); |
2002 | } else { |
2003 | size_t index = 0; |
2004 | Operand operand = i.MemoryOperand(&index); |
2005 | if (HasImmediateInput(instr, index)) { |
2006 | __ movq(operand, i.InputImmediate(index)); |
2007 | } else { |
2008 | __ movq(operand, i.InputRegister(index)); |
2009 | } |
2010 | } |
2011 | EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); |
2012 | break; |
2013 | case kX64Movss: |
2014 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
2015 | if (instr->HasOutput()) { |
2016 | __ movss(i.OutputDoubleRegister(), i.MemoryOperand()); |
2017 | } else { |
2018 | size_t index = 0; |
2019 | Operand operand = i.MemoryOperand(&index); |
2020 | __ movss(operand, i.InputDoubleRegister(index)); |
2021 | } |
2022 | break; |
2023 | case kX64Movsd: { |
2024 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
2025 | if (instr->HasOutput()) { |
2026 | const MemoryAccessMode access_mode = |
2027 | static_cast<MemoryAccessMode>(MiscField::decode(opcode)); |
2028 | if (access_mode == kMemoryAccessPoisoned) { |
2029 | // If we have to poison the loaded value, we load into a general |
2030 | // purpose register first, mask it with the poison, and move the |
2031 | // value from the general purpose register into the double register. |
2032 | __ movq(kScratchRegister, i.MemoryOperand()); |
2033 | __ andq(kScratchRegister, kSpeculationPoisonRegister); |
2034 | __ Movq(i.OutputDoubleRegister(), kScratchRegister); |
2035 | } else { |
2036 | __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand()); |
2037 | } |
2038 | } else { |
2039 | size_t index = 0; |
2040 | Operand operand = i.MemoryOperand(&index); |
2041 | __ Movsd(operand, i.InputDoubleRegister(index)); |
2042 | } |
2043 | break; |
2044 | } |
2045 | case kX64Movdqu: { |
2046 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2047 | EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); |
2048 | if (instr->HasOutput()) { |
2049 | __ movdqu(i.OutputSimd128Register(), i.MemoryOperand()); |
2050 | } else { |
2051 | size_t index = 0; |
2052 | Operand operand = i.MemoryOperand(&index); |
2053 | __ movdqu(operand, i.InputSimd128Register(index)); |
2054 | } |
2055 | break; |
2056 | } |
2057 | case kX64BitcastFI: |
2058 | if (instr->InputAt(0)->IsFPStackSlot()) { |
2059 | __ movl(i.OutputRegister(), i.InputOperand(0)); |
2060 | } else { |
2061 | __ Movd(i.OutputRegister(), i.InputDoubleRegister(0)); |
2062 | } |
2063 | break; |
2064 | case kX64BitcastDL: |
2065 | if (instr->InputAt(0)->IsFPStackSlot()) { |
2066 | __ movq(i.OutputRegister(), i.InputOperand(0)); |
2067 | } else { |
2068 | __ Movq(i.OutputRegister(), i.InputDoubleRegister(0)); |
2069 | } |
2070 | break; |
2071 | case kX64BitcastIF: |
2072 | if (instr->InputAt(0)->IsRegister()) { |
2073 | __ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); |
2074 | } else { |
2075 | __ movss(i.OutputDoubleRegister(), i.InputOperand(0)); |
2076 | } |
2077 | break; |
2078 | case kX64BitcastLD: |
2079 | if (instr->InputAt(0)->IsRegister()) { |
2080 | __ Movq(i.OutputDoubleRegister(), i.InputRegister(0)); |
2081 | } else { |
2082 | __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0)); |
2083 | } |
2084 | break; |
2085 | case kX64Lea32: { |
2086 | AddressingMode mode = AddressingModeField::decode(instr->opcode()); |
2087 | // Shorten "leal" to "addl", "subl" or "shll" if the register allocation |
2088 | // and addressing mode just happens to work out. The "addl"/"subl" forms |
2089 | // in these cases are faster based on measurements. |
2090 | if (i.InputRegister(0) == i.OutputRegister()) { |
2091 | if (mode == kMode_MRI) { |
2092 | int32_t constant_summand = i.InputInt32(1); |
2093 | DCHECK_NE(0, constant_summand); |
2094 | if (constant_summand > 0) { |
2095 | __ addl(i.OutputRegister(), Immediate(constant_summand)); |
2096 | } else { |
2097 | __ subl(i.OutputRegister(), |
2098 | Immediate(base::NegateWithWraparound(constant_summand))); |
2099 | } |
2100 | } else if (mode == kMode_MR1) { |
2101 | if (i.InputRegister(1) == i.OutputRegister()) { |
2102 | __ shll(i.OutputRegister(), Immediate(1)); |
2103 | } else { |
2104 | __ addl(i.OutputRegister(), i.InputRegister(1)); |
2105 | } |
2106 | } else if (mode == kMode_M2) { |
2107 | __ shll(i.OutputRegister(), Immediate(1)); |
2108 | } else if (mode == kMode_M4) { |
2109 | __ shll(i.OutputRegister(), Immediate(2)); |
2110 | } else if (mode == kMode_M8) { |
2111 | __ shll(i.OutputRegister(), Immediate(3)); |
2112 | } else { |
2113 | __ leal(i.OutputRegister(), i.MemoryOperand()); |
2114 | } |
2115 | } else if (mode == kMode_MR1 && |
2116 | i.InputRegister(1) == i.OutputRegister()) { |
2117 | __ addl(i.OutputRegister(), i.InputRegister(0)); |
2118 | } else { |
2119 | __ leal(i.OutputRegister(), i.MemoryOperand()); |
2120 | } |
2121 | __ AssertZeroExtended(i.OutputRegister()); |
2122 | break; |
2123 | } |
2124 | case kX64Lea: { |
2125 | AddressingMode mode = AddressingModeField::decode(instr->opcode()); |
2126 | // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation |
2127 | // and addressing mode just happens to work out. The "addq"/"subq" forms |
2128 | // in these cases are faster based on measurements. |
2129 | if (i.InputRegister(0) == i.OutputRegister()) { |
2130 | if (mode == kMode_MRI) { |
2131 | int32_t constant_summand = i.InputInt32(1); |
2132 | if (constant_summand > 0) { |
2133 | __ addq(i.OutputRegister(), Immediate(constant_summand)); |
2134 | } else if (constant_summand < 0) { |
2135 | __ subq(i.OutputRegister(), Immediate(-constant_summand)); |
2136 | } |
2137 | } else if (mode == kMode_MR1) { |
2138 | if (i.InputRegister(1) == i.OutputRegister()) { |
2139 | __ shlq(i.OutputRegister(), Immediate(1)); |
2140 | } else { |
2141 | __ addq(i.OutputRegister(), i.InputRegister(1)); |
2142 | } |
2143 | } else if (mode == kMode_M2) { |
2144 | __ shlq(i.OutputRegister(), Immediate(1)); |
2145 | } else if (mode == kMode_M4) { |
2146 | __ shlq(i.OutputRegister(), Immediate(2)); |
2147 | } else if (mode == kMode_M8) { |
2148 | __ shlq(i.OutputRegister(), Immediate(3)); |
2149 | } else { |
2150 | __ leaq(i.OutputRegister(), i.MemoryOperand()); |
2151 | } |
2152 | } else if (mode == kMode_MR1 && |
2153 | i.InputRegister(1) == i.OutputRegister()) { |
2154 | __ addq(i.OutputRegister(), i.InputRegister(0)); |
2155 | } else { |
2156 | __ leaq(i.OutputRegister(), i.MemoryOperand()); |
2157 | } |
2158 | break; |
2159 | } |
2160 | case kX64Dec32: |
2161 | __ decl(i.OutputRegister()); |
2162 | break; |
2163 | case kX64Inc32: |
2164 | __ incl(i.OutputRegister()); |
2165 | break; |
2166 | case kX64Push: |
2167 | if (AddressingModeField::decode(instr->opcode()) != kMode_None) { |
2168 | size_t index = 0; |
2169 | Operand operand = i.MemoryOperand(&index); |
2170 | __ pushq(operand); |
2171 | frame_access_state()->IncreaseSPDelta(1); |
2172 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2173 | kSystemPointerSize); |
2174 | } else if (HasImmediateInput(instr, 0)) { |
2175 | __ pushq(i.InputImmediate(0)); |
2176 | frame_access_state()->IncreaseSPDelta(1); |
2177 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2178 | kSystemPointerSize); |
2179 | } else if (instr->InputAt(0)->IsRegister()) { |
2180 | __ pushq(i.InputRegister(0)); |
2181 | frame_access_state()->IncreaseSPDelta(1); |
2182 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2183 | kSystemPointerSize); |
2184 | } else if (instr->InputAt(0)->IsFloatRegister() || |
2185 | instr->InputAt(0)->IsDoubleRegister()) { |
2186 | // TODO(titzer): use another machine instruction? |
2187 | __ subq(rsp, Immediate(kDoubleSize)); |
2188 | frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); |
2189 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2190 | kDoubleSize); |
2191 | __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); |
2192 | } else if (instr->InputAt(0)->IsSimd128Register()) { |
2193 | // TODO(titzer): use another machine instruction? |
2194 | __ subq(rsp, Immediate(kSimd128Size)); |
2195 | frame_access_state()->IncreaseSPDelta(kSimd128Size / |
2196 | kSystemPointerSize); |
2197 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2198 | kSimd128Size); |
2199 | __ Movups(Operand(rsp, 0), i.InputSimd128Register(0)); |
2200 | } else if (instr->InputAt(0)->IsStackSlot() || |
2201 | instr->InputAt(0)->IsFloatStackSlot() || |
2202 | instr->InputAt(0)->IsDoubleStackSlot()) { |
2203 | __ pushq(i.InputOperand(0)); |
2204 | frame_access_state()->IncreaseSPDelta(1); |
2205 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2206 | kSystemPointerSize); |
2207 | } else { |
2208 | DCHECK(instr->InputAt(0)->IsSimd128StackSlot()); |
2209 | __ Movups(kScratchDoubleReg, i.InputOperand(0)); |
2210 | // TODO(titzer): use another machine instruction? |
2211 | __ subq(rsp, Immediate(kSimd128Size)); |
2212 | frame_access_state()->IncreaseSPDelta(kSimd128Size / |
2213 | kSystemPointerSize); |
2214 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
2215 | kSimd128Size); |
2216 | __ Movups(Operand(rsp, 0), kScratchDoubleReg); |
2217 | } |
2218 | break; |
2219 | case kX64Poke: { |
2220 | int slot = MiscField::decode(instr->opcode()); |
2221 | if (HasImmediateInput(instr, 0)) { |
2222 | __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0)); |
2223 | } else { |
2224 | __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0)); |
2225 | } |
2226 | break; |
2227 | } |
2228 | case kX64Peek: { |
2229 | int reverse_slot = i.InputInt32(0); |
2230 | int offset = |
2231 | FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); |
2232 | if (instr->OutputAt(0)->IsFPRegister()) { |
2233 | LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); |
2234 | if (op->representation() == MachineRepresentation::kFloat64) { |
2235 | __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset)); |
2236 | } else { |
2237 | DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); |
2238 | __ Movss(i.OutputFloatRegister(), Operand(rbp, offset)); |
2239 | } |
2240 | } else { |
2241 | __ movq(i.OutputRegister(), Operand(rbp, offset)); |
2242 | } |
2243 | break; |
2244 | } |
2245 | // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below |
2246 | case kX64F32x4Splat: { |
2247 | XMMRegister dst = i.OutputSimd128Register(); |
2248 | if (instr->InputAt(0)->IsFPRegister()) { |
2249 | __ movss(dst, i.InputDoubleRegister(0)); |
2250 | } else { |
2251 | __ movss(dst, i.InputOperand(0)); |
2252 | } |
2253 | __ shufps(dst, dst, 0x0); |
2254 | break; |
2255 | } |
2256 | case kX64F32x4ExtractLane: { |
2257 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2258 | __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1)); |
2259 | __ movd(i.OutputDoubleRegister(), kScratchRegister); |
2260 | break; |
2261 | } |
2262 | case kX64F32x4ReplaceLane: { |
2263 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2264 | // The insertps instruction uses imm8[5:4] to indicate the lane |
2265 | // that needs to be replaced. |
2266 | byte select = i.InputInt8(1) << 4 & 0x30; |
2267 | if (instr->InputAt(2)->IsFPRegister()) { |
2268 | __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), |
2269 | select); |
2270 | } else { |
2271 | __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select); |
2272 | } |
2273 | break; |
2274 | } |
2275 | case kX64F32x4SConvertI32x4: { |
2276 | __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2277 | break; |
2278 | } |
2279 | case kX64F32x4UConvertI32x4: { |
2280 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2281 | DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg); |
2282 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2283 | XMMRegister dst = i.OutputSimd128Register(); |
2284 | __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros |
2285 | __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits |
2286 | __ psubd(dst, kScratchDoubleReg); // get hi 16 bits |
2287 | __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly |
2288 | __ psrld(dst, 1); // divide by 2 to get in unsigned range |
2289 | __ cvtdq2ps(dst, dst); // convert hi exactly |
2290 | __ addps(dst, dst); // double hi, exactly |
2291 | __ addps(dst, kScratchDoubleReg); // add hi and lo, may round. |
2292 | break; |
2293 | } |
2294 | case kX64F32x4Abs: { |
2295 | XMMRegister dst = i.OutputSimd128Register(); |
2296 | XMMRegister src = i.InputSimd128Register(0); |
2297 | if (dst == src) { |
2298 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2299 | __ psrld(kScratchDoubleReg, 1); |
2300 | __ andps(i.OutputSimd128Register(), kScratchDoubleReg); |
2301 | } else { |
2302 | __ pcmpeqd(dst, dst); |
2303 | __ psrld(dst, 1); |
2304 | __ andps(dst, i.InputSimd128Register(0)); |
2305 | } |
2306 | break; |
2307 | } |
2308 | case kX64F32x4Neg: { |
2309 | XMMRegister dst = i.OutputSimd128Register(); |
2310 | XMMRegister src = i.InputSimd128Register(0); |
2311 | if (dst == src) { |
2312 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2313 | __ pslld(kScratchDoubleReg, 31); |
2314 | __ xorps(i.OutputSimd128Register(), kScratchDoubleReg); |
2315 | } else { |
2316 | __ pcmpeqd(dst, dst); |
2317 | __ pslld(dst, 31); |
2318 | __ xorps(dst, i.InputSimd128Register(0)); |
2319 | } |
2320 | break; |
2321 | } |
2322 | case kX64F32x4RecipApprox: { |
2323 | __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2324 | break; |
2325 | } |
2326 | case kX64F32x4RecipSqrtApprox: { |
2327 | __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2328 | break; |
2329 | } |
2330 | case kX64F32x4Add: { |
2331 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2332 | __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2333 | break; |
2334 | } |
2335 | case kX64F32x4AddHoriz: { |
2336 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2337 | CpuFeatureScope sse_scope(tasm(), SSE3); |
2338 | __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2339 | break; |
2340 | } |
2341 | case kX64F32x4Sub: { |
2342 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2343 | __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2344 | break; |
2345 | } |
2346 | case kX64F32x4Mul: { |
2347 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2348 | __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2349 | break; |
2350 | } |
2351 | case kX64F32x4Min: { |
2352 | XMMRegister src1 = i.InputSimd128Register(1), |
2353 | dst = i.OutputSimd128Register(); |
2354 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
2355 | // The minps instruction doesn't propagate NaNs and +0's in its first |
2356 | // operand. Perform minps in both orders, merge the resuls, and adjust. |
2357 | __ movaps(kScratchDoubleReg, src1); |
2358 | __ minps(kScratchDoubleReg, dst); |
2359 | __ minps(dst, src1); |
2360 | // propagate -0's and NaNs, which may be non-canonical. |
2361 | __ orps(kScratchDoubleReg, dst); |
2362 | // Canonicalize NaNs by quieting and clearing the payload. |
2363 | __ cmpps(dst, kScratchDoubleReg, 3); |
2364 | __ orps(kScratchDoubleReg, dst); |
2365 | __ psrld(dst, 10); |
2366 | __ andnps(dst, kScratchDoubleReg); |
2367 | break; |
2368 | } |
2369 | case kX64F32x4Max: { |
2370 | XMMRegister src1 = i.InputSimd128Register(1), |
2371 | dst = i.OutputSimd128Register(); |
2372 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
2373 | // The maxps instruction doesn't propagate NaNs and +0's in its first |
2374 | // operand. Perform maxps in both orders, merge the resuls, and adjust. |
2375 | __ movaps(kScratchDoubleReg, src1); |
2376 | __ maxps(kScratchDoubleReg, dst); |
2377 | __ maxps(dst, src1); |
2378 | // Find discrepancies. |
2379 | __ xorps(dst, kScratchDoubleReg); |
2380 | // Propagate NaNs, which may be non-canonical. |
2381 | __ orps(kScratchDoubleReg, dst); |
2382 | // Propagate sign discrepancy and (subtle) quiet NaNs. |
2383 | __ subps(kScratchDoubleReg, dst); |
2384 | // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. |
2385 | __ cmpps(dst, kScratchDoubleReg, 3); |
2386 | __ psrld(dst, 10); |
2387 | __ andnps(dst, kScratchDoubleReg); |
2388 | break; |
2389 | } |
2390 | case kX64F32x4Eq: { |
2391 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2392 | __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0); |
2393 | break; |
2394 | } |
2395 | case kX64F32x4Ne: { |
2396 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2397 | __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4); |
2398 | break; |
2399 | } |
2400 | case kX64F32x4Lt: { |
2401 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2402 | __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2403 | break; |
2404 | } |
2405 | case kX64F32x4Le: { |
2406 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2407 | __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2408 | break; |
2409 | } |
2410 | case kX64I32x4Splat: { |
2411 | XMMRegister dst = i.OutputSimd128Register(); |
2412 | if (instr->InputAt(0)->IsRegister()) { |
2413 | __ movd(dst, i.InputRegister(0)); |
2414 | } else { |
2415 | __ movd(dst, i.InputOperand(0)); |
2416 | } |
2417 | __ pshufd(dst, dst, 0x0); |
2418 | break; |
2419 | } |
2420 | case kX64I32x4ExtractLane: { |
2421 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2422 | __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); |
2423 | break; |
2424 | } |
2425 | case kX64I32x4ReplaceLane: { |
2426 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2427 | if (instr->InputAt(2)->IsRegister()) { |
2428 | __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2), |
2429 | i.InputInt8(1)); |
2430 | } else { |
2431 | __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
2432 | } |
2433 | break; |
2434 | } |
2435 | case kX64I32x4SConvertF32x4: { |
2436 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2437 | XMMRegister dst = i.OutputSimd128Register(); |
2438 | // NAN->0 |
2439 | __ movaps(kScratchDoubleReg, dst); |
2440 | __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg); |
2441 | __ pand(dst, kScratchDoubleReg); |
2442 | // Set top bit if >= 0 (but not -0.0!) |
2443 | __ pxor(kScratchDoubleReg, dst); |
2444 | // Convert |
2445 | __ cvttps2dq(dst, dst); |
2446 | // Set top bit if >=0 is now < 0 |
2447 | __ pand(kScratchDoubleReg, dst); |
2448 | __ psrad(kScratchDoubleReg, 31); |
2449 | // Set positive overflow lanes to 0x7FFFFFFF |
2450 | __ pxor(dst, kScratchDoubleReg); |
2451 | break; |
2452 | } |
2453 | case kX64I32x4SConvertI16x8Low: { |
2454 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2455 | __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2456 | break; |
2457 | } |
2458 | case kX64I32x4SConvertI16x8High: { |
2459 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2460 | XMMRegister dst = i.OutputSimd128Register(); |
2461 | __ palignr(dst, i.InputSimd128Register(0), 8); |
2462 | __ pmovsxwd(dst, dst); |
2463 | break; |
2464 | } |
2465 | case kX64I32x4Neg: { |
2466 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2467 | XMMRegister dst = i.OutputSimd128Register(); |
2468 | XMMRegister src = i.InputSimd128Register(0); |
2469 | if (dst == src) { |
2470 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2471 | __ psignd(dst, kScratchDoubleReg); |
2472 | } else { |
2473 | __ pxor(dst, dst); |
2474 | __ psubd(dst, src); |
2475 | } |
2476 | break; |
2477 | } |
2478 | case kX64I32x4Shl: { |
2479 | __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); |
2480 | break; |
2481 | } |
2482 | case kX64I32x4ShrS: { |
2483 | __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); |
2484 | break; |
2485 | } |
2486 | case kX64I32x4Add: { |
2487 | __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2488 | break; |
2489 | } |
2490 | case kX64I32x4AddHoriz: { |
2491 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2492 | __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2493 | break; |
2494 | } |
2495 | case kX64I32x4Sub: { |
2496 | __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2497 | break; |
2498 | } |
2499 | case kX64I32x4Mul: { |
2500 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2501 | __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2502 | break; |
2503 | } |
2504 | case kX64I32x4MinS: { |
2505 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2506 | __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2507 | break; |
2508 | } |
2509 | case kX64I32x4MaxS: { |
2510 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2511 | __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2512 | break; |
2513 | } |
2514 | case kX64I32x4Eq: { |
2515 | __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2516 | break; |
2517 | } |
2518 | case kX64I32x4Ne: { |
2519 | __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2520 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2521 | __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
2522 | break; |
2523 | } |
2524 | case kX64I32x4GtS: { |
2525 | __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2526 | break; |
2527 | } |
2528 | case kX64I32x4GeS: { |
2529 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2530 | XMMRegister dst = i.OutputSimd128Register(); |
2531 | XMMRegister src = i.InputSimd128Register(1); |
2532 | __ pminsd(dst, src); |
2533 | __ pcmpeqd(dst, src); |
2534 | break; |
2535 | } |
2536 | case kX64I32x4UConvertF32x4: { |
2537 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2538 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2539 | XMMRegister dst = i.OutputSimd128Register(); |
2540 | XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
2541 | // NAN->0, negative->0 |
2542 | __ pxor(kScratchDoubleReg, kScratchDoubleReg); |
2543 | __ maxps(dst, kScratchDoubleReg); |
2544 | // scratch: float representation of max_signed |
2545 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2546 | __ psrld(kScratchDoubleReg, 1); // 0x7fffffff |
2547 | __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 |
2548 | // tmp: convert (src-max_signed). |
2549 | // Positive overflow lanes -> 0x7FFFFFFF |
2550 | // Negative lanes -> 0 |
2551 | __ movaps(tmp, dst); |
2552 | __ subps(tmp, kScratchDoubleReg); |
2553 | __ cmpleps(kScratchDoubleReg, tmp); |
2554 | __ cvttps2dq(tmp, tmp); |
2555 | __ pxor(tmp, kScratchDoubleReg); |
2556 | __ pxor(kScratchDoubleReg, kScratchDoubleReg); |
2557 | __ pmaxsd(tmp, kScratchDoubleReg); |
2558 | // convert. Overflow lanes above max_signed will be 0x80000000 |
2559 | __ cvttps2dq(dst, dst); |
2560 | // Add (src-max_signed) for overflow lanes. |
2561 | __ paddd(dst, tmp); |
2562 | break; |
2563 | } |
2564 | case kX64I32x4UConvertI16x8Low: { |
2565 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2566 | __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2567 | break; |
2568 | } |
2569 | case kX64I32x4UConvertI16x8High: { |
2570 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2571 | XMMRegister dst = i.OutputSimd128Register(); |
2572 | __ palignr(dst, i.InputSimd128Register(0), 8); |
2573 | __ pmovzxwd(dst, dst); |
2574 | break; |
2575 | } |
2576 | case kX64I32x4ShrU: { |
2577 | __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); |
2578 | break; |
2579 | } |
2580 | case kX64I32x4MinU: { |
2581 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2582 | __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2583 | break; |
2584 | } |
2585 | case kX64I32x4MaxU: { |
2586 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2587 | __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2588 | break; |
2589 | } |
2590 | case kX64I32x4GtU: { |
2591 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2592 | XMMRegister dst = i.OutputSimd128Register(); |
2593 | XMMRegister src = i.InputSimd128Register(1); |
2594 | __ pmaxud(dst, src); |
2595 | __ pcmpeqd(dst, src); |
2596 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2597 | __ pxor(dst, kScratchDoubleReg); |
2598 | break; |
2599 | } |
2600 | case kX64I32x4GeU: { |
2601 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2602 | XMMRegister dst = i.OutputSimd128Register(); |
2603 | XMMRegister src = i.InputSimd128Register(1); |
2604 | __ pminud(dst, src); |
2605 | __ pcmpeqd(dst, src); |
2606 | break; |
2607 | } |
2608 | case kX64S128Zero: { |
2609 | XMMRegister dst = i.OutputSimd128Register(); |
2610 | __ xorps(dst, dst); |
2611 | break; |
2612 | } |
2613 | case kX64I16x8Splat: { |
2614 | XMMRegister dst = i.OutputSimd128Register(); |
2615 | if (instr->InputAt(0)->IsRegister()) { |
2616 | __ movd(dst, i.InputRegister(0)); |
2617 | } else { |
2618 | __ movd(dst, i.InputOperand(0)); |
2619 | } |
2620 | __ pshuflw(dst, dst, 0x0); |
2621 | __ pshufd(dst, dst, 0x0); |
2622 | break; |
2623 | } |
2624 | case kX64I16x8ExtractLane: { |
2625 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2626 | Register dst = i.OutputRegister(); |
2627 | __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); |
2628 | __ movsxwl(dst, dst); |
2629 | break; |
2630 | } |
2631 | case kX64I16x8ReplaceLane: { |
2632 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2633 | if (instr->InputAt(2)->IsRegister()) { |
2634 | __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2), |
2635 | i.InputInt8(1)); |
2636 | } else { |
2637 | __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
2638 | } |
2639 | break; |
2640 | } |
2641 | case kX64I16x8SConvertI8x16Low: { |
2642 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2643 | __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2644 | break; |
2645 | } |
2646 | case kX64I16x8SConvertI8x16High: { |
2647 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2648 | XMMRegister dst = i.OutputSimd128Register(); |
2649 | __ palignr(dst, i.InputSimd128Register(0), 8); |
2650 | __ pmovsxbw(dst, dst); |
2651 | break; |
2652 | } |
2653 | case kX64I16x8Neg: { |
2654 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2655 | XMMRegister dst = i.OutputSimd128Register(); |
2656 | XMMRegister src = i.InputSimd128Register(0); |
2657 | if (dst == src) { |
2658 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2659 | __ psignw(dst, kScratchDoubleReg); |
2660 | } else { |
2661 | __ pxor(dst, dst); |
2662 | __ psubw(dst, src); |
2663 | } |
2664 | break; |
2665 | } |
2666 | case kX64I16x8Shl: { |
2667 | __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); |
2668 | break; |
2669 | } |
2670 | case kX64I16x8ShrS: { |
2671 | __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); |
2672 | break; |
2673 | } |
2674 | case kX64I16x8SConvertI32x4: { |
2675 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2676 | __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2677 | break; |
2678 | } |
2679 | case kX64I16x8Add: { |
2680 | __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2681 | break; |
2682 | } |
2683 | case kX64I16x8AddSaturateS: { |
2684 | __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2685 | break; |
2686 | } |
2687 | case kX64I16x8AddHoriz: { |
2688 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2689 | __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2690 | break; |
2691 | } |
2692 | case kX64I16x8Sub: { |
2693 | __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2694 | break; |
2695 | } |
2696 | case kX64I16x8SubSaturateS: { |
2697 | __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2698 | break; |
2699 | } |
2700 | case kX64I16x8Mul: { |
2701 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2702 | __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2703 | break; |
2704 | } |
2705 | case kX64I16x8MinS: { |
2706 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2707 | __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2708 | break; |
2709 | } |
2710 | case kX64I16x8MaxS: { |
2711 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2712 | __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2713 | break; |
2714 | } |
2715 | case kX64I16x8Eq: { |
2716 | __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2717 | break; |
2718 | } |
2719 | case kX64I16x8Ne: { |
2720 | __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2721 | __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); |
2722 | __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
2723 | break; |
2724 | } |
2725 | case kX64I16x8GtS: { |
2726 | __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2727 | break; |
2728 | } |
2729 | case kX64I16x8GeS: { |
2730 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2731 | XMMRegister dst = i.OutputSimd128Register(); |
2732 | XMMRegister src = i.InputSimd128Register(1); |
2733 | __ pminsw(dst, src); |
2734 | __ pcmpeqw(dst, src); |
2735 | break; |
2736 | } |
2737 | case kX64I16x8UConvertI8x16Low: { |
2738 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2739 | __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2740 | break; |
2741 | } |
2742 | case kX64I16x8UConvertI8x16High: { |
2743 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2744 | XMMRegister dst = i.OutputSimd128Register(); |
2745 | __ palignr(dst, i.InputSimd128Register(0), 8); |
2746 | __ pmovzxbw(dst, dst); |
2747 | break; |
2748 | } |
2749 | case kX64I16x8ShrU: { |
2750 | __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); |
2751 | break; |
2752 | } |
2753 | case kX64I16x8UConvertI32x4: { |
2754 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2755 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2756 | XMMRegister dst = i.OutputSimd128Register(); |
2757 | // Change negative lanes to 0x7FFFFFFF |
2758 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2759 | __ psrld(kScratchDoubleReg, 1); |
2760 | __ pminud(dst, kScratchDoubleReg); |
2761 | __ pminud(kScratchDoubleReg, i.InputSimd128Register(1)); |
2762 | __ packusdw(dst, kScratchDoubleReg); |
2763 | break; |
2764 | } |
2765 | case kX64I16x8AddSaturateU: { |
2766 | __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2767 | break; |
2768 | } |
2769 | case kX64I16x8SubSaturateU: { |
2770 | __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2771 | break; |
2772 | } |
2773 | case kX64I16x8MinU: { |
2774 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2775 | __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2776 | break; |
2777 | } |
2778 | case kX64I16x8MaxU: { |
2779 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2780 | __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2781 | break; |
2782 | } |
2783 | case kX64I16x8GtU: { |
2784 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2785 | XMMRegister dst = i.OutputSimd128Register(); |
2786 | XMMRegister src = i.InputSimd128Register(1); |
2787 | __ pmaxuw(dst, src); |
2788 | __ pcmpeqw(dst, src); |
2789 | __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); |
2790 | __ pxor(dst, kScratchDoubleReg); |
2791 | break; |
2792 | } |
2793 | case kX64I16x8GeU: { |
2794 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2795 | XMMRegister dst = i.OutputSimd128Register(); |
2796 | XMMRegister src = i.InputSimd128Register(1); |
2797 | __ pminuw(dst, src); |
2798 | __ pcmpeqw(dst, src); |
2799 | break; |
2800 | } |
2801 | case kX64I8x16Splat: { |
2802 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2803 | XMMRegister dst = i.OutputSimd128Register(); |
2804 | if (instr->InputAt(0)->IsRegister()) { |
2805 | __ movd(dst, i.InputRegister(0)); |
2806 | } else { |
2807 | __ movd(dst, i.InputOperand(0)); |
2808 | } |
2809 | __ xorps(kScratchDoubleReg, kScratchDoubleReg); |
2810 | __ pshufb(dst, kScratchDoubleReg); |
2811 | break; |
2812 | } |
2813 | case kX64I8x16ExtractLane: { |
2814 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2815 | Register dst = i.OutputRegister(); |
2816 | __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); |
2817 | __ movsxbl(dst, dst); |
2818 | break; |
2819 | } |
2820 | case kX64I8x16ReplaceLane: { |
2821 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2822 | if (instr->InputAt(2)->IsRegister()) { |
2823 | __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2), |
2824 | i.InputInt8(1)); |
2825 | } else { |
2826 | __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); |
2827 | } |
2828 | break; |
2829 | } |
2830 | case kX64I8x16SConvertI16x8: { |
2831 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2832 | __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2833 | break; |
2834 | } |
2835 | case kX64I8x16Neg: { |
2836 | CpuFeatureScope sse_scope(tasm(), SSSE3); |
2837 | XMMRegister dst = i.OutputSimd128Register(); |
2838 | XMMRegister src = i.InputSimd128Register(0); |
2839 | if (dst == src) { |
2840 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
2841 | __ psignb(dst, kScratchDoubleReg); |
2842 | } else { |
2843 | __ pxor(dst, dst); |
2844 | __ psubb(dst, src); |
2845 | } |
2846 | break; |
2847 | } |
2848 | case kX64I8x16Shl: { |
2849 | XMMRegister dst = i.OutputSimd128Register(); |
2850 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
2851 | int8_t shift = i.InputInt8(1) & 0x7; |
2852 | if (shift < 4) { |
2853 | // For small shifts, doubling is faster. |
2854 | for (int i = 0; i < shift; ++i) { |
2855 | __ paddb(dst, dst); |
2856 | } |
2857 | } else { |
2858 | // Mask off the unwanted bits before word-shifting. |
2859 | __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); |
2860 | __ psrlw(kScratchDoubleReg, 8 + shift); |
2861 | __ packuswb(kScratchDoubleReg, kScratchDoubleReg); |
2862 | __ pand(dst, kScratchDoubleReg); |
2863 | __ psllw(dst, shift); |
2864 | } |
2865 | break; |
2866 | } |
2867 | case kX64I8x16ShrS: { |
2868 | XMMRegister dst = i.OutputSimd128Register(); |
2869 | XMMRegister src = i.InputSimd128Register(0); |
2870 | int8_t shift = i.InputInt8(1) & 0x7; |
2871 | // Unpack the bytes into words, do arithmetic shifts, and repack. |
2872 | __ punpckhbw(kScratchDoubleReg, src); |
2873 | __ punpcklbw(dst, src); |
2874 | __ psraw(kScratchDoubleReg, 8 + shift); |
2875 | __ psraw(dst, 8 + shift); |
2876 | __ packsswb(dst, kScratchDoubleReg); |
2877 | break; |
2878 | } |
2879 | case kX64I8x16Add: { |
2880 | __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2881 | break; |
2882 | } |
2883 | case kX64I8x16AddSaturateS: { |
2884 | __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2885 | break; |
2886 | } |
2887 | case kX64I8x16Sub: { |
2888 | __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2889 | break; |
2890 | } |
2891 | case kX64I8x16SubSaturateS: { |
2892 | __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2893 | break; |
2894 | } |
2895 | case kX64I8x16Mul: { |
2896 | XMMRegister dst = i.OutputSimd128Register(); |
2897 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
2898 | XMMRegister right = i.InputSimd128Register(1); |
2899 | XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); |
2900 | // I16x8 view of I8x16 |
2901 | // left = AAaa AAaa ... AAaa AAaa |
2902 | // right= BBbb BBbb ... BBbb BBbb |
2903 | // t = 00AA 00AA ... 00AA 00AA |
2904 | // s = 00BB 00BB ... 00BB 00BB |
2905 | __ movaps(tmp, dst); |
2906 | __ movaps(kScratchDoubleReg, right); |
2907 | __ psrlw(tmp, 8); |
2908 | __ psrlw(kScratchDoubleReg, 8); |
2909 | // dst = left * 256 |
2910 | __ psllw(dst, 8); |
2911 | // t = I16x8Mul(t, s) |
2912 | // => __PP __PP ... __PP __PP |
2913 | __ pmullw(tmp, kScratchDoubleReg); |
2914 | // dst = I16x8Mul(left * 256, right) |
2915 | // => pp__ pp__ ... pp__ pp__ |
2916 | __ pmullw(dst, right); |
2917 | // t = I16x8Shl(t, 8) |
2918 | // => PP00 PP00 ... PP00 PP00 |
2919 | __ psllw(tmp, 8); |
2920 | // dst = I16x8Shr(dst, 8) |
2921 | // => 00pp 00pp ... 00pp 00pp |
2922 | __ psrlw(dst, 8); |
2923 | // dst = I16x8Or(dst, t) |
2924 | // => PPpp PPpp ... PPpp PPpp |
2925 | __ por(dst, tmp); |
2926 | break; |
2927 | } |
2928 | case kX64I8x16MinS: { |
2929 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2930 | __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2931 | break; |
2932 | } |
2933 | case kX64I8x16MaxS: { |
2934 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2935 | __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2936 | break; |
2937 | } |
2938 | case kX64I8x16Eq: { |
2939 | __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2940 | break; |
2941 | } |
2942 | case kX64I8x16Ne: { |
2943 | __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2944 | __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); |
2945 | __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
2946 | break; |
2947 | } |
2948 | case kX64I8x16GtS: { |
2949 | __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2950 | break; |
2951 | } |
2952 | case kX64I8x16GeS: { |
2953 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2954 | XMMRegister dst = i.OutputSimd128Register(); |
2955 | XMMRegister src = i.InputSimd128Register(1); |
2956 | __ pminsb(dst, src); |
2957 | __ pcmpeqb(dst, src); |
2958 | break; |
2959 | } |
2960 | case kX64I8x16UConvertI16x8: { |
2961 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2962 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2963 | XMMRegister dst = i.OutputSimd128Register(); |
2964 | // Change negative lanes to 0x7FFF |
2965 | __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); |
2966 | __ psrlw(kScratchDoubleReg, 1); |
2967 | __ pminuw(dst, kScratchDoubleReg); |
2968 | __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1)); |
2969 | __ packuswb(dst, kScratchDoubleReg); |
2970 | break; |
2971 | } |
2972 | case kX64I8x16ShrU: { |
2973 | XMMRegister dst = i.OutputSimd128Register(); |
2974 | XMMRegister src = i.InputSimd128Register(0); |
2975 | int8_t shift = i.InputInt8(1) & 0x7; |
2976 | // Unpack the bytes into words, do logical shifts, and repack. |
2977 | __ punpckhbw(kScratchDoubleReg, src); |
2978 | __ punpcklbw(dst, src); |
2979 | __ psrlw(kScratchDoubleReg, 8 + shift); |
2980 | __ psrlw(dst, 8 + shift); |
2981 | __ packuswb(dst, kScratchDoubleReg); |
2982 | break; |
2983 | } |
2984 | case kX64I8x16AddSaturateU: { |
2985 | __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2986 | break; |
2987 | } |
2988 | case kX64I8x16SubSaturateU: { |
2989 | __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2990 | break; |
2991 | } |
2992 | case kX64I8x16MinU: { |
2993 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2994 | __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
2995 | break; |
2996 | } |
2997 | case kX64I8x16MaxU: { |
2998 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
2999 | __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
3000 | break; |
3001 | } |
3002 | case kX64I8x16GtU: { |
3003 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3004 | XMMRegister dst = i.OutputSimd128Register(); |
3005 | XMMRegister src = i.InputSimd128Register(1); |
3006 | __ pmaxub(dst, src); |
3007 | __ pcmpeqb(dst, src); |
3008 | __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); |
3009 | __ pxor(dst, kScratchDoubleReg); |
3010 | break; |
3011 | } |
3012 | case kX64I8x16GeU: { |
3013 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3014 | XMMRegister dst = i.OutputSimd128Register(); |
3015 | XMMRegister src = i.InputSimd128Register(1); |
3016 | __ pminub(dst, src); |
3017 | __ pcmpeqb(dst, src); |
3018 | break; |
3019 | } |
3020 | case kX64S128And: { |
3021 | __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
3022 | break; |
3023 | } |
3024 | case kX64S128Or: { |
3025 | __ por(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
3026 | break; |
3027 | } |
3028 | case kX64S128Xor: { |
3029 | __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
3030 | break; |
3031 | } |
3032 | case kX64S128Not: { |
3033 | XMMRegister dst = i.OutputSimd128Register(); |
3034 | XMMRegister src = i.InputSimd128Register(0); |
3035 | if (dst == src) { |
3036 | __ movaps(kScratchDoubleReg, dst); |
3037 | __ pcmpeqd(dst, dst); |
3038 | __ pxor(dst, kScratchDoubleReg); |
3039 | } else { |
3040 | __ pcmpeqd(dst, dst); |
3041 | __ pxor(dst, src); |
3042 | } |
3043 | |
3044 | break; |
3045 | } |
3046 | case kX64S128Select: { |
3047 | // Mask used here is stored in dst. |
3048 | XMMRegister dst = i.OutputSimd128Register(); |
3049 | __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); |
3050 | __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); |
3051 | __ andps(dst, kScratchDoubleReg); |
3052 | __ xorps(dst, i.InputSimd128Register(2)); |
3053 | break; |
3054 | } |
3055 | case kX64S8x16Shuffle: { |
3056 | XMMRegister dst = i.OutputSimd128Register(); |
3057 | Register tmp = i.TempRegister(0); |
3058 | // Prepare 16 byte aligned buffer for shuffle control mask |
3059 | __ movq(tmp, rsp); |
3060 | __ andq(rsp, Immediate(-16)); |
3061 | if (instr->InputCount() == 5) { // only one input operand |
3062 | uint32_t mask[4] = {}; |
3063 | DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
3064 | for (int j = 4; j > 0; j--) { |
3065 | mask[j - 1] = i.InputUint32(j); |
3066 | } |
3067 | |
3068 | SetupShuffleMaskOnStack(tasm(), mask); |
3069 | __ pshufb(dst, Operand(rsp, 0)); |
3070 | } else { // two input operands |
3071 | DCHECK_EQ(6, instr->InputCount()); |
3072 | ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0); |
3073 | uint32_t mask[4] = {}; |
3074 | for (int j = 5; j > 1; j--) { |
3075 | uint32_t lanes = i.InputUint32(j); |
3076 | for (int k = 0; k < 32; k += 8) { |
3077 | uint8_t lane = lanes >> k; |
3078 | mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k; |
3079 | } |
3080 | } |
3081 | SetupShuffleMaskOnStack(tasm(), mask); |
3082 | __ pshufb(kScratchDoubleReg, Operand(rsp, 0)); |
3083 | uint32_t mask1[4] = {}; |
3084 | if (instr->InputAt(1)->IsSimd128Register()) { |
3085 | XMMRegister src1 = i.InputSimd128Register(1); |
3086 | if (src1 != dst) __ movups(dst, src1); |
3087 | } else { |
3088 | __ movups(dst, i.InputOperand(1)); |
3089 | } |
3090 | for (int j = 5; j > 1; j--) { |
3091 | uint32_t lanes = i.InputUint32(j); |
3092 | for (int k = 0; k < 32; k += 8) { |
3093 | uint8_t lane = lanes >> k; |
3094 | mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k; |
3095 | } |
3096 | } |
3097 | SetupShuffleMaskOnStack(tasm(), mask1); |
3098 | __ pshufb(dst, Operand(rsp, 0)); |
3099 | __ por(dst, kScratchDoubleReg); |
3100 | } |
3101 | __ movq(rsp, tmp); |
3102 | break; |
3103 | } |
3104 | case kX64S32x4Swizzle: { |
3105 | DCHECK_EQ(2, instr->InputCount()); |
3106 | ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, |
3107 | i.InputInt8(1)); |
3108 | break; |
3109 | } |
3110 | case kX64S32x4Shuffle: { |
3111 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3112 | DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above. |
3113 | int8_t shuffle = i.InputInt8(2); |
3114 | DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below. |
3115 | ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle); |
3116 | ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle); |
3117 | __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3)); |
3118 | break; |
3119 | } |
3120 | case kX64S16x8Blend: { |
3121 | ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2)); |
3122 | break; |
3123 | } |
3124 | case kX64S16x8HalfShuffle1: { |
3125 | XMMRegister dst = i.OutputSimd128Register(); |
3126 | ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1)); |
3127 | __ pshufhw(dst, dst, i.InputInt8(2)); |
3128 | break; |
3129 | } |
3130 | case kX64S16x8HalfShuffle2: { |
3131 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3132 | XMMRegister dst = i.OutputSimd128Register(); |
3133 | ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2)); |
3134 | __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3)); |
3135 | ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2)); |
3136 | __ pshufhw(dst, dst, i.InputInt8(3)); |
3137 | __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4)); |
3138 | break; |
3139 | } |
3140 | case kX64S8x16Alignr: { |
3141 | ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2)); |
3142 | break; |
3143 | } |
3144 | case kX64S16x8Dup: { |
3145 | XMMRegister dst = i.OutputSimd128Register(); |
3146 | int8_t lane = i.InputInt8(1) & 0x7; |
3147 | int8_t lane4 = lane & 0x3; |
3148 | int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6); |
3149 | if (lane < 4) { |
3150 | ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup); |
3151 | __ pshufd(dst, dst, 0); |
3152 | } else { |
3153 | ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup); |
3154 | __ pshufd(dst, dst, 0xaa); |
3155 | } |
3156 | break; |
3157 | } |
3158 | case kX64S8x16Dup: { |
3159 | XMMRegister dst = i.OutputSimd128Register(); |
3160 | int8_t lane = i.InputInt8(1) & 0xf; |
3161 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3162 | if (lane < 8) { |
3163 | __ punpcklbw(dst, dst); |
3164 | } else { |
3165 | __ punpckhbw(dst, dst); |
3166 | } |
3167 | lane &= 0x7; |
3168 | int8_t lane4 = lane & 0x3; |
3169 | int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6); |
3170 | if (lane < 4) { |
3171 | __ pshuflw(dst, dst, half_dup); |
3172 | __ pshufd(dst, dst, 0); |
3173 | } else { |
3174 | __ pshufhw(dst, dst, half_dup); |
3175 | __ pshufd(dst, dst, 0xaa); |
3176 | } |
3177 | break; |
3178 | } |
3179 | case kX64S64x2UnpackHigh: |
3180 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq); |
3181 | break; |
3182 | case kX64S32x4UnpackHigh: |
3183 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq); |
3184 | break; |
3185 | case kX64S16x8UnpackHigh: |
3186 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd); |
3187 | break; |
3188 | case kX64S8x16UnpackHigh: |
3189 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw); |
3190 | break; |
3191 | case kX64S64x2UnpackLow: |
3192 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq); |
3193 | break; |
3194 | case kX64S32x4UnpackLow: |
3195 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq); |
3196 | break; |
3197 | case kX64S16x8UnpackLow: |
3198 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd); |
3199 | break; |
3200 | case kX64S8x16UnpackLow: |
3201 | ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw); |
3202 | break; |
3203 | case kX64S16x8UnzipHigh: { |
3204 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3205 | XMMRegister dst = i.OutputSimd128Register(); |
3206 | XMMRegister src2 = dst; |
3207 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3208 | if (instr->InputCount() == 2) { |
3209 | ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1); |
3210 | __ psrld(kScratchDoubleReg, 16); |
3211 | src2 = kScratchDoubleReg; |
3212 | } |
3213 | __ psrld(dst, 16); |
3214 | __ packusdw(dst, src2); |
3215 | break; |
3216 | } |
3217 | case kX64S16x8UnzipLow: { |
3218 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3219 | XMMRegister dst = i.OutputSimd128Register(); |
3220 | XMMRegister src2 = dst; |
3221 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3222 | __ pxor(kScratchDoubleReg, kScratchDoubleReg); |
3223 | if (instr->InputCount() == 2) { |
3224 | ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55); |
3225 | src2 = kScratchDoubleReg; |
3226 | } |
3227 | __ pblendw(dst, kScratchDoubleReg, 0xaa); |
3228 | __ packusdw(dst, src2); |
3229 | break; |
3230 | } |
3231 | case kX64S8x16UnzipHigh: { |
3232 | XMMRegister dst = i.OutputSimd128Register(); |
3233 | XMMRegister src2 = dst; |
3234 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3235 | if (instr->InputCount() == 2) { |
3236 | ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1); |
3237 | __ psrlw(kScratchDoubleReg, 8); |
3238 | src2 = kScratchDoubleReg; |
3239 | } |
3240 | __ psrlw(dst, 8); |
3241 | __ packuswb(dst, src2); |
3242 | break; |
3243 | } |
3244 | case kX64S8x16UnzipLow: { |
3245 | XMMRegister dst = i.OutputSimd128Register(); |
3246 | XMMRegister src2 = dst; |
3247 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3248 | if (instr->InputCount() == 2) { |
3249 | ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1); |
3250 | __ psllw(kScratchDoubleReg, 8); |
3251 | __ psrlw(kScratchDoubleReg, 8); |
3252 | src2 = kScratchDoubleReg; |
3253 | } |
3254 | __ psllw(dst, 8); |
3255 | __ psrlw(dst, 8); |
3256 | __ packuswb(dst, src2); |
3257 | break; |
3258 | } |
3259 | case kX64S8x16TransposeLow: { |
3260 | XMMRegister dst = i.OutputSimd128Register(); |
3261 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3262 | __ psllw(dst, 8); |
3263 | if (instr->InputCount() == 1) { |
3264 | __ movups(kScratchDoubleReg, dst); |
3265 | } else { |
3266 | DCHECK_EQ(2, instr->InputCount()); |
3267 | ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1); |
3268 | __ psllw(kScratchDoubleReg, 8); |
3269 | } |
3270 | __ psrlw(dst, 8); |
3271 | __ por(dst, kScratchDoubleReg); |
3272 | break; |
3273 | } |
3274 | case kX64S8x16TransposeHigh: { |
3275 | XMMRegister dst = i.OutputSimd128Register(); |
3276 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3277 | __ psrlw(dst, 8); |
3278 | if (instr->InputCount() == 1) { |
3279 | __ movups(kScratchDoubleReg, dst); |
3280 | } else { |
3281 | DCHECK_EQ(2, instr->InputCount()); |
3282 | ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1); |
3283 | __ psrlw(kScratchDoubleReg, 8); |
3284 | } |
3285 | __ psllw(kScratchDoubleReg, 8); |
3286 | __ por(dst, kScratchDoubleReg); |
3287 | break; |
3288 | } |
3289 | case kX64S8x8Reverse: |
3290 | case kX64S8x4Reverse: |
3291 | case kX64S8x2Reverse: { |
3292 | DCHECK_EQ(1, instr->InputCount()); |
3293 | XMMRegister dst = i.OutputSimd128Register(); |
3294 | DCHECK_EQ(dst, i.InputSimd128Register(0)); |
3295 | if (arch_opcode != kX64S8x2Reverse) { |
3296 | // First shuffle words into position. |
3297 | int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B; |
3298 | __ pshuflw(dst, dst, shuffle_mask); |
3299 | __ pshufhw(dst, dst, shuffle_mask); |
3300 | } |
3301 | __ movaps(kScratchDoubleReg, dst); |
3302 | __ psrlw(kScratchDoubleReg, 8); |
3303 | __ psllw(dst, 8); |
3304 | __ por(dst, kScratchDoubleReg); |
3305 | break; |
3306 | } |
3307 | case kX64S1x4AnyTrue: |
3308 | case kX64S1x8AnyTrue: |
3309 | case kX64S1x16AnyTrue: { |
3310 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3311 | Register dst = i.OutputRegister(); |
3312 | XMMRegister src = i.InputSimd128Register(0); |
3313 | Register tmp = i.TempRegister(0); |
3314 | __ xorq(tmp, tmp); |
3315 | __ movq(dst, Immediate(1)); |
3316 | __ ptest(src, src); |
3317 | __ cmovq(zero, dst, tmp); |
3318 | break; |
3319 | } |
3320 | case kX64S1x4AllTrue: |
3321 | case kX64S1x8AllTrue: |
3322 | case kX64S1x16AllTrue: { |
3323 | CpuFeatureScope sse_scope(tasm(), SSE4_1); |
3324 | Register dst = i.OutputRegister(); |
3325 | XMMRegister src = i.InputSimd128Register(0); |
3326 | Register tmp = i.TempRegister(0); |
3327 | __ movq(tmp, Immediate(1)); |
3328 | __ xorq(dst, dst); |
3329 | __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
3330 | __ pxor(kScratchDoubleReg, src); |
3331 | __ ptest(kScratchDoubleReg, kScratchDoubleReg); |
3332 | __ cmovq(zero, dst, tmp); |
3333 | break; |
3334 | } |
3335 | case kX64StackCheck: |
3336 | __ CompareRoot(rsp, RootIndex::kStackLimit); |
3337 | break; |
3338 | case kWord32AtomicExchangeInt8: { |
3339 | __ xchgb(i.InputRegister(0), i.MemoryOperand(1)); |
3340 | __ movsxbl(i.InputRegister(0), i.InputRegister(0)); |
3341 | break; |
3342 | } |
3343 | case kWord32AtomicExchangeUint8: { |
3344 | __ xchgb(i.InputRegister(0), i.MemoryOperand(1)); |
3345 | __ movzxbl(i.InputRegister(0), i.InputRegister(0)); |
3346 | break; |
3347 | } |
3348 | case kWord32AtomicExchangeInt16: { |
3349 | __ xchgw(i.InputRegister(0), i.MemoryOperand(1)); |
3350 | __ movsxwl(i.InputRegister(0), i.InputRegister(0)); |
3351 | break; |
3352 | } |
3353 | case kWord32AtomicExchangeUint16: { |
3354 | __ xchgw(i.InputRegister(0), i.MemoryOperand(1)); |
3355 | __ movzxwl(i.InputRegister(0), i.InputRegister(0)); |
3356 | break; |
3357 | } |
3358 | case kWord32AtomicExchangeWord32: { |
3359 | __ xchgl(i.InputRegister(0), i.MemoryOperand(1)); |
3360 | break; |
3361 | } |
3362 | case kWord32AtomicCompareExchangeInt8: { |
3363 | __ lock(); |
3364 | __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1)); |
3365 | __ movsxbl(rax, rax); |
3366 | break; |
3367 | } |
3368 | case kWord32AtomicCompareExchangeUint8: { |
3369 | __ lock(); |
3370 | __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1)); |
3371 | __ movzxbl(rax, rax); |
3372 | break; |
3373 | } |
3374 | case kWord32AtomicCompareExchangeInt16: { |
3375 | __ lock(); |
3376 | __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1)); |
3377 | __ movsxwl(rax, rax); |
3378 | break; |
3379 | } |
3380 | case kWord32AtomicCompareExchangeUint16: { |
3381 | __ lock(); |
3382 | __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1)); |
3383 | __ movzxwl(rax, rax); |
3384 | break; |
3385 | } |
3386 | case kWord32AtomicCompareExchangeWord32: { |
3387 | __ lock(); |
3388 | __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1)); |
3389 | break; |
3390 | } |
3391 | #define ATOMIC_BINOP_CASE(op, inst) \ |
3392 | case kWord32Atomic##op##Int8: \ |
3393 | ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \ |
3394 | __ movsxbl(rax, rax); \ |
3395 | break; \ |
3396 | case kWord32Atomic##op##Uint8: \ |
3397 | ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \ |
3398 | __ movzxbl(rax, rax); \ |
3399 | break; \ |
3400 | case kWord32Atomic##op##Int16: \ |
3401 | ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \ |
3402 | __ movsxwl(rax, rax); \ |
3403 | break; \ |
3404 | case kWord32Atomic##op##Uint16: \ |
3405 | ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \ |
3406 | __ movzxwl(rax, rax); \ |
3407 | break; \ |
3408 | case kWord32Atomic##op##Word32: \ |
3409 | ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \ |
3410 | break; |
3411 | ATOMIC_BINOP_CASE(Add, addl) |
3412 | ATOMIC_BINOP_CASE(Sub, subl) |
3413 | ATOMIC_BINOP_CASE(And, andl) |
3414 | ATOMIC_BINOP_CASE(Or, orl) |
3415 | ATOMIC_BINOP_CASE(Xor, xorl) |
3416 | #undef ATOMIC_BINOP_CASE |
3417 | case kX64Word64AtomicExchangeUint8: { |
3418 | __ xchgb(i.InputRegister(0), i.MemoryOperand(1)); |
3419 | __ movzxbq(i.InputRegister(0), i.InputRegister(0)); |
3420 | break; |
3421 | } |
3422 | case kX64Word64AtomicExchangeUint16: { |
3423 | __ xchgw(i.InputRegister(0), i.MemoryOperand(1)); |
3424 | __ movzxwq(i.InputRegister(0), i.InputRegister(0)); |
3425 | break; |
3426 | } |
3427 | case kX64Word64AtomicExchangeUint32: { |
3428 | __ xchgl(i.InputRegister(0), i.MemoryOperand(1)); |
3429 | break; |
3430 | } |
3431 | case kX64Word64AtomicExchangeUint64: { |
3432 | __ xchgq(i.InputRegister(0), i.MemoryOperand(1)); |
3433 | break; |
3434 | } |
3435 | case kX64Word64AtomicCompareExchangeUint8: { |
3436 | __ lock(); |
3437 | __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1)); |
3438 | __ movzxbq(rax, rax); |
3439 | break; |
3440 | } |
3441 | case kX64Word64AtomicCompareExchangeUint16: { |
3442 | __ lock(); |
3443 | __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1)); |
3444 | __ movzxwq(rax, rax); |
3445 | break; |
3446 | } |
3447 | case kX64Word64AtomicCompareExchangeUint32: { |
3448 | __ lock(); |
3449 | __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1)); |
3450 | break; |
3451 | } |
3452 | case kX64Word64AtomicCompareExchangeUint64: { |
3453 | __ lock(); |
3454 | __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1)); |
3455 | break; |
3456 | } |
3457 | #define ATOMIC64_BINOP_CASE(op, inst) \ |
3458 | case kX64Word64Atomic##op##Uint8: \ |
3459 | ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \ |
3460 | __ movzxbq(rax, rax); \ |
3461 | break; \ |
3462 | case kX64Word64Atomic##op##Uint16: \ |
3463 | ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \ |
3464 | __ movzxwq(rax, rax); \ |
3465 | break; \ |
3466 | case kX64Word64Atomic##op##Uint32: \ |
3467 | ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \ |
3468 | break; \ |
3469 | case kX64Word64Atomic##op##Uint64: \ |
3470 | ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \ |
3471 | break; |
3472 | ATOMIC64_BINOP_CASE(Add, addq) |
3473 | ATOMIC64_BINOP_CASE(Sub, subq) |
3474 | ATOMIC64_BINOP_CASE(And, andq) |
3475 | ATOMIC64_BINOP_CASE(Or, orq) |
3476 | ATOMIC64_BINOP_CASE(Xor, xorq) |
3477 | #undef ATOMIC64_BINOP_CASE |
3478 | case kWord32AtomicLoadInt8: |
3479 | case kWord32AtomicLoadUint8: |
3480 | case kWord32AtomicLoadInt16: |
3481 | case kWord32AtomicLoadUint16: |
3482 | case kWord32AtomicLoadWord32: |
3483 | case kWord32AtomicStoreWord8: |
3484 | case kWord32AtomicStoreWord16: |
3485 | case kWord32AtomicStoreWord32: |
3486 | case kX64Word64AtomicLoadUint8: |
3487 | case kX64Word64AtomicLoadUint16: |
3488 | case kX64Word64AtomicLoadUint32: |
3489 | case kX64Word64AtomicLoadUint64: |
3490 | case kX64Word64AtomicStoreWord8: |
3491 | case kX64Word64AtomicStoreWord16: |
3492 | case kX64Word64AtomicStoreWord32: |
3493 | case kX64Word64AtomicStoreWord64: |
3494 | UNREACHABLE(); // Won't be generated by instruction selector. |
3495 | break; |
3496 | } |
3497 | return kSuccess; |
3498 | } // NOLadability/fn_size) |
3499 | |
3500 | #undef ASSEMBLE_UNOP |
3501 | #undef ASSEMBLE_BINOP |
3502 | #undef ASSEMBLE_COMPARE |
3503 | #undef ASSEMBLE_MULT |
3504 | #undef ASSEMBLE_SHIFT |
3505 | #undef ASSEMBLE_MOVX |
3506 | #undef ASSEMBLE_SSE_BINOP |
3507 | #undef ASSEMBLE_SSE_UNOP |
3508 | #undef ASSEMBLE_AVX_BINOP |
3509 | #undef ASSEMBLE_IEEE754_BINOP |
3510 | #undef ASSEMBLE_IEEE754_UNOP |
3511 | #undef ASSEMBLE_ATOMIC_BINOP |
3512 | #undef ASSEMBLE_ATOMIC64_BINOP |
3513 | #undef ASSEMBLE_SIMD_INSTR |
3514 | #undef ASSEMBLE_SIMD_IMM_INSTR |
3515 | #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE |
3516 | #undef ASSEMBLE_SIMD_IMM_SHUFFLE |
3517 | |
3518 | namespace { |
3519 | |
3520 | Condition FlagsConditionToCondition(FlagsCondition condition) { |
3521 | switch (condition) { |
3522 | case kUnorderedEqual: |
3523 | case kEqual: |
3524 | return equal; |
3525 | case kUnorderedNotEqual: |
3526 | case kNotEqual: |
3527 | return not_equal; |
3528 | case kSignedLessThan: |
3529 | return less; |
3530 | case kSignedGreaterThanOrEqual: |
3531 | return greater_equal; |
3532 | case kSignedLessThanOrEqual: |
3533 | return less_equal; |
3534 | case kSignedGreaterThan: |
3535 | return greater; |
3536 | case kUnsignedLessThan: |
3537 | return below; |
3538 | case kUnsignedGreaterThanOrEqual: |
3539 | return above_equal; |
3540 | case kUnsignedLessThanOrEqual: |
3541 | return below_equal; |
3542 | case kUnsignedGreaterThan: |
3543 | return above; |
3544 | case kOverflow: |
3545 | return overflow; |
3546 | case kNotOverflow: |
3547 | return no_overflow; |
3548 | default: |
3549 | break; |
3550 | } |
3551 | UNREACHABLE(); |
3552 | } |
3553 | |
3554 | } // namespace |
3555 | |
3556 | // Assembles branches after this instruction. |
3557 | void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) { |
3558 | Label::Distance flabel_distance = |
3559 | branch->fallthru ? Label::kNear : Label::kFar; |
3560 | Label* tlabel = branch->true_label; |
3561 | Label* flabel = branch->false_label; |
3562 | if (branch->condition == kUnorderedEqual) { |
3563 | __ j(parity_even, flabel, flabel_distance); |
3564 | } else if (branch->condition == kUnorderedNotEqual) { |
3565 | __ j(parity_even, tlabel); |
3566 | } |
3567 | __ j(FlagsConditionToCondition(branch->condition), tlabel); |
3568 | |
3569 | if (!branch->fallthru) __ jmp(flabel, flabel_distance); |
3570 | } |
3571 | |
3572 | void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition, |
3573 | Instruction* instr) { |
3574 | // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal). |
3575 | if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) { |
3576 | return; |
3577 | } |
3578 | |
3579 | condition = NegateFlagsCondition(condition); |
3580 | __ movl(kScratchRegister, Immediate(0)); |
3581 | __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister, |
3582 | kScratchRegister); |
3583 | } |
3584 | |
3585 | void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr, |
3586 | BranchInfo* branch) { |
3587 | Label::Distance flabel_distance = |
3588 | branch->fallthru ? Label::kNear : Label::kFar; |
3589 | Label* tlabel = branch->true_label; |
3590 | Label* flabel = branch->false_label; |
3591 | Label nodeopt; |
3592 | if (branch->condition == kUnorderedEqual) { |
3593 | __ j(parity_even, flabel, flabel_distance); |
3594 | } else if (branch->condition == kUnorderedNotEqual) { |
3595 | __ j(parity_even, tlabel); |
3596 | } |
3597 | __ j(FlagsConditionToCondition(branch->condition), tlabel); |
3598 | |
3599 | if (FLAG_deopt_every_n_times > 0) { |
3600 | ExternalReference counter = |
3601 | ExternalReference::stress_deopt_count(isolate()); |
3602 | |
3603 | __ pushfq(); |
3604 | __ pushq(rax); |
3605 | __ load_rax(counter); |
3606 | __ decl(rax); |
3607 | __ j(not_zero, &nodeopt); |
3608 | |
3609 | __ Set(rax, FLAG_deopt_every_n_times); |
3610 | __ store_rax(counter); |
3611 | __ popq(rax); |
3612 | __ popfq(); |
3613 | __ jmp(tlabel); |
3614 | |
3615 | __ bind(&nodeopt); |
3616 | __ store_rax(counter); |
3617 | __ popq(rax); |
3618 | __ popfq(); |
3619 | } |
3620 | |
3621 | if (!branch->fallthru) { |
3622 | __ jmp(flabel, flabel_distance); |
3623 | } |
3624 | } |
3625 | |
3626 | void CodeGenerator::AssembleArchJump(RpoNumber target) { |
3627 | if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target)); |
3628 | } |
3629 | |
3630 | void CodeGenerator::AssembleArchTrap(Instruction* instr, |
3631 | FlagsCondition condition) { |
3632 | auto ool = new (zone()) WasmOutOfLineTrap(this, instr); |
3633 | Label* tlabel = ool->entry(); |
3634 | Label end; |
3635 | if (condition == kUnorderedEqual) { |
3636 | __ j(parity_even, &end); |
3637 | } else if (condition == kUnorderedNotEqual) { |
3638 | __ j(parity_even, tlabel); |
3639 | } |
3640 | __ j(FlagsConditionToCondition(condition), tlabel); |
3641 | __ bind(&end); |
3642 | } |
3643 | |
3644 | // Assembles boolean materializations after this instruction. |
3645 | void CodeGenerator::AssembleArchBoolean(Instruction* instr, |
3646 | FlagsCondition condition) { |
3647 | X64OperandConverter i(this, instr); |
3648 | Label done; |
3649 | |
3650 | // Materialize a full 64-bit 1 or 0 value. The result register is always the |
3651 | // last output of the instruction. |
3652 | Label check; |
3653 | DCHECK_NE(0u, instr->OutputCount()); |
3654 | Register reg = i.OutputRegister(instr->OutputCount() - 1); |
3655 | if (condition == kUnorderedEqual) { |
3656 | __ j(parity_odd, &check, Label::kNear); |
3657 | __ movl(reg, Immediate(0)); |
3658 | __ jmp(&done, Label::kNear); |
3659 | } else if (condition == kUnorderedNotEqual) { |
3660 | __ j(parity_odd, &check, Label::kNear); |
3661 | __ movl(reg, Immediate(1)); |
3662 | __ jmp(&done, Label::kNear); |
3663 | } |
3664 | __ bind(&check); |
3665 | __ setcc(FlagsConditionToCondition(condition), reg); |
3666 | __ movzxbl(reg, reg); |
3667 | __ bind(&done); |
3668 | } |
3669 | |
3670 | void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) { |
3671 | X64OperandConverter i(this, instr); |
3672 | Register input = i.InputRegister(0); |
3673 | std::vector<std::pair<int32_t, Label*>> cases; |
3674 | for (size_t index = 2; index < instr->InputCount(); index += 2) { |
3675 | cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))}); |
3676 | } |
3677 | AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(), |
3678 | cases.data() + cases.size()); |
3679 | } |
3680 | |
3681 | void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) { |
3682 | X64OperandConverter i(this, instr); |
3683 | Register input = i.InputRegister(0); |
3684 | for (size_t index = 2; index < instr->InputCount(); index += 2) { |
3685 | __ cmpl(input, Immediate(i.InputInt32(index + 0))); |
3686 | __ j(equal, GetLabel(i.InputRpo(index + 1))); |
3687 | } |
3688 | AssembleArchJump(i.InputRpo(1)); |
3689 | } |
3690 | |
3691 | void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) { |
3692 | X64OperandConverter i(this, instr); |
3693 | Register input = i.InputRegister(0); |
3694 | int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2); |
3695 | Label** cases = zone()->NewArray<Label*>(case_count); |
3696 | for (int32_t index = 0; index < case_count; ++index) { |
3697 | cases[index] = GetLabel(i.InputRpo(index + 2)); |
3698 | } |
3699 | Label* const table = AddJumpTable(cases, case_count); |
3700 | __ cmpl(input, Immediate(case_count)); |
3701 | __ j(above_equal, GetLabel(i.InputRpo(1))); |
3702 | __ leaq(kScratchRegister, Operand(table)); |
3703 | __ jmp(Operand(kScratchRegister, input, times_8, 0)); |
3704 | } |
3705 | |
3706 | namespace { |
3707 | |
3708 | static const int kQuadWordSize = 16; |
3709 | |
3710 | } // namespace |
3711 | |
3712 | void CodeGenerator::FinishFrame(Frame* frame) { |
3713 | auto call_descriptor = linkage()->GetIncomingDescriptor(); |
3714 | |
3715 | const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); |
3716 | if (saves_fp != 0) { |
3717 | frame->AlignSavedCalleeRegisterSlots(); |
3718 | if (saves_fp != 0) { // Save callee-saved XMM registers. |
3719 | const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp); |
3720 | frame->AllocateSavedCalleeRegisterSlots( |
3721 | saves_fp_count * (kQuadWordSize / kSystemPointerSize)); |
3722 | } |
3723 | } |
3724 | const RegList saves = call_descriptor->CalleeSavedRegisters(); |
3725 | if (saves != 0) { // Save callee-saved registers. |
3726 | int count = 0; |
3727 | for (int i = Register::kNumRegisters - 1; i >= 0; i--) { |
3728 | if (((1 << i) & saves)) { |
3729 | ++count; |
3730 | } |
3731 | } |
3732 | frame->AllocateSavedCalleeRegisterSlots(count); |
3733 | } |
3734 | } |
3735 | |
3736 | void CodeGenerator::AssembleConstructFrame() { |
3737 | auto call_descriptor = linkage()->GetIncomingDescriptor(); |
3738 | if (frame_access_state()->has_frame()) { |
3739 | int pc_base = __ pc_offset(); |
3740 | |
3741 | if (call_descriptor->IsCFunctionCall()) { |
3742 | __ pushq(rbp); |
3743 | __ movq(rbp, rsp); |
3744 | } else if (call_descriptor->IsJSFunctionCall()) { |
3745 | __ Prologue(); |
3746 | if (call_descriptor->PushArgumentCount()) { |
3747 | __ pushq(kJavaScriptCallArgCountRegister); |
3748 | } |
3749 | } else { |
3750 | __ StubPrologue(info()->GetOutputStackFrameType()); |
3751 | if (call_descriptor->IsWasmFunctionCall()) { |
3752 | __ pushq(kWasmInstanceRegister); |
3753 | } else if (call_descriptor->IsWasmImportWrapper()) { |
3754 | // WASM import wrappers are passed a tuple in the place of the instance. |
3755 | // Unpack the tuple into the instance and the target callable. |
3756 | // This must be done here in the codegen because it cannot be expressed |
3757 | // properly in the graph. |
3758 | __ LoadTaggedPointerField( |
3759 | kJSFunctionRegister, |
3760 | FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset)); |
3761 | __ LoadTaggedPointerField( |
3762 | kWasmInstanceRegister, |
3763 | FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset)); |
3764 | __ pushq(kWasmInstanceRegister); |
3765 | } |
3766 | } |
3767 | |
3768 | unwinding_info_writer_.MarkFrameConstructed(pc_base); |
3769 | } |
3770 | int required_slots = frame()->GetTotalFrameSlotCount() - |
3771 | call_descriptor->CalculateFixedFrameSize(); |
3772 | |
3773 | if (info()->is_osr()) { |
3774 | // TurboFan OSR-compiled functions cannot be entered directly. |
3775 | __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction); |
3776 | |
3777 | // Unoptimized code jumps directly to this entrypoint while the unoptimized |
3778 | // frame is still on the stack. Optimized code uses OSR values directly from |
3779 | // the unoptimized frame. Thus, all that needs to be done is to allocate the |
3780 | // remaining stack slots. |
3781 | if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --" ); |
3782 | osr_pc_offset_ = __ pc_offset(); |
3783 | required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots()); |
3784 | ResetSpeculationPoison(); |
3785 | } |
3786 | |
3787 | const RegList saves = call_descriptor->CalleeSavedRegisters(); |
3788 | const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); |
3789 | |
3790 | if (required_slots > 0) { |
3791 | DCHECK(frame_access_state()->has_frame()); |
3792 | if (info()->IsWasm() && required_slots > 128) { |
3793 | // For WebAssembly functions with big frames we have to do the stack |
3794 | // overflow check before we construct the frame. Otherwise we may not |
3795 | // have enough space on the stack to call the runtime for the stack |
3796 | // overflow. |
3797 | Label done; |
3798 | |
3799 | // If the frame is bigger than the stack, we throw the stack overflow |
3800 | // exception unconditionally. Thereby we can avoid the integer overflow |
3801 | // check in the condition code. |
3802 | if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) { |
3803 | __ movq(kScratchRegister, |
3804 | FieldOperand(kWasmInstanceRegister, |
3805 | WasmInstanceObject::kRealStackLimitAddressOffset)); |
3806 | __ movq(kScratchRegister, Operand(kScratchRegister, 0)); |
3807 | __ addq(kScratchRegister, |
3808 | Immediate(required_slots * kSystemPointerSize)); |
3809 | __ cmpq(rsp, kScratchRegister); |
3810 | __ j(above_equal, &done); |
3811 | } |
3812 | |
3813 | __ near_call(wasm::WasmCode::kWasmStackOverflow, |
3814 | RelocInfo::WASM_STUB_CALL); |
3815 | ReferenceMap* reference_map = new (zone()) ReferenceMap(zone()); |
3816 | RecordSafepoint(reference_map, Safepoint::kSimple, |
3817 | Safepoint::kNoLazyDeopt); |
3818 | __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap); |
3819 | __ bind(&done); |
3820 | } |
3821 | |
3822 | // Skip callee-saved and return slots, which are created below. |
3823 | required_slots -= base::bits::CountPopulation(saves); |
3824 | required_slots -= base::bits::CountPopulation(saves_fp) * |
3825 | (kQuadWordSize / kSystemPointerSize); |
3826 | required_slots -= frame()->GetReturnSlotCount(); |
3827 | if (required_slots > 0) { |
3828 | __ subq(rsp, Immediate(required_slots * kSystemPointerSize)); |
3829 | } |
3830 | } |
3831 | |
3832 | if (saves_fp != 0) { // Save callee-saved XMM registers. |
3833 | const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp); |
3834 | const int stack_size = saves_fp_count * kQuadWordSize; |
3835 | // Adjust the stack pointer. |
3836 | __ subq(rsp, Immediate(stack_size)); |
3837 | // Store the registers on the stack. |
3838 | int slot_idx = 0; |
3839 | for (int i = 0; i < XMMRegister::kNumRegisters; i++) { |
3840 | if (!((1 << i) & saves_fp)) continue; |
3841 | __ movdqu(Operand(rsp, kQuadWordSize * slot_idx), |
3842 | XMMRegister::from_code(i)); |
3843 | slot_idx++; |
3844 | } |
3845 | } |
3846 | |
3847 | if (saves != 0) { // Save callee-saved registers. |
3848 | for (int i = Register::kNumRegisters - 1; i >= 0; i--) { |
3849 | if (!((1 << i) & saves)) continue; |
3850 | __ pushq(Register::from_code(i)); |
3851 | } |
3852 | } |
3853 | |
3854 | // Allocate return slots (located after callee-saved). |
3855 | if (frame()->GetReturnSlotCount() > 0) { |
3856 | __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize)); |
3857 | } |
3858 | } |
3859 | |
3860 | void CodeGenerator::AssembleReturn(InstructionOperand* pop) { |
3861 | auto call_descriptor = linkage()->GetIncomingDescriptor(); |
3862 | |
3863 | // Restore registers. |
3864 | const RegList saves = call_descriptor->CalleeSavedRegisters(); |
3865 | if (saves != 0) { |
3866 | const int returns = frame()->GetReturnSlotCount(); |
3867 | if (returns != 0) { |
3868 | __ addq(rsp, Immediate(returns * kSystemPointerSize)); |
3869 | } |
3870 | for (int i = 0; i < Register::kNumRegisters; i++) { |
3871 | if (!((1 << i) & saves)) continue; |
3872 | __ popq(Register::from_code(i)); |
3873 | } |
3874 | } |
3875 | const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); |
3876 | if (saves_fp != 0) { |
3877 | const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp); |
3878 | const int stack_size = saves_fp_count * kQuadWordSize; |
3879 | // Load the registers from the stack. |
3880 | int slot_idx = 0; |
3881 | for (int i = 0; i < XMMRegister::kNumRegisters; i++) { |
3882 | if (!((1 << i) & saves_fp)) continue; |
3883 | __ movdqu(XMMRegister::from_code(i), |
3884 | Operand(rsp, kQuadWordSize * slot_idx)); |
3885 | slot_idx++; |
3886 | } |
3887 | // Adjust the stack pointer. |
3888 | __ addq(rsp, Immediate(stack_size)); |
3889 | } |
3890 | |
3891 | unwinding_info_writer_.MarkBlockWillExit(); |
3892 | |
3893 | // Might need rcx for scratch if pop_size is too big or if there is a variable |
3894 | // pop count. |
3895 | DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit()); |
3896 | DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit()); |
3897 | size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize; |
3898 | X64OperandConverter g(this, nullptr); |
3899 | if (call_descriptor->IsCFunctionCall()) { |
3900 | AssembleDeconstructFrame(); |
3901 | } else if (frame_access_state()->has_frame()) { |
3902 | if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) { |
3903 | // Canonicalize JSFunction return sites for now. |
3904 | if (return_label_.is_bound()) { |
3905 | __ jmp(&return_label_); |
3906 | return; |
3907 | } else { |
3908 | __ bind(&return_label_); |
3909 | AssembleDeconstructFrame(); |
3910 | } |
3911 | } else { |
3912 | AssembleDeconstructFrame(); |
3913 | } |
3914 | } |
3915 | |
3916 | if (pop->IsImmediate()) { |
3917 | pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize; |
3918 | CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max())); |
3919 | __ Ret(static_cast<int>(pop_size), rcx); |
3920 | } else { |
3921 | Register pop_reg = g.ToRegister(pop); |
3922 | Register scratch_reg = pop_reg == rcx ? rdx : rcx; |
3923 | __ popq(scratch_reg); |
3924 | __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size))); |
3925 | __ jmp(scratch_reg); |
3926 | } |
3927 | } |
3928 | |
3929 | void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); } |
3930 | |
3931 | void CodeGenerator::AssembleMove(InstructionOperand* source, |
3932 | InstructionOperand* destination) { |
3933 | X64OperandConverter g(this, nullptr); |
3934 | // Helper function to write the given constant to the dst register. |
3935 | auto MoveConstantToRegister = [&](Register dst, Constant src) { |
3936 | switch (src.type()) { |
3937 | case Constant::kInt32: { |
3938 | if (RelocInfo::IsWasmReference(src.rmode())) { |
3939 | __ movq(dst, Immediate64(src.ToInt64(), src.rmode())); |
3940 | } else { |
3941 | int32_t value = src.ToInt32(); |
3942 | if (value == 0) { |
3943 | __ xorl(dst, dst); |
3944 | } else { |
3945 | __ movl(dst, Immediate(value)); |
3946 | } |
3947 | } |
3948 | break; |
3949 | } |
3950 | case Constant::kInt64: |
3951 | if (RelocInfo::IsWasmReference(src.rmode())) { |
3952 | __ movq(dst, Immediate64(src.ToInt64(), src.rmode())); |
3953 | } else { |
3954 | __ Set(dst, src.ToInt64()); |
3955 | } |
3956 | break; |
3957 | case Constant::kFloat32: |
3958 | __ MoveNumber(dst, src.ToFloat32()); |
3959 | break; |
3960 | case Constant::kFloat64: |
3961 | __ MoveNumber(dst, src.ToFloat64().value()); |
3962 | break; |
3963 | case Constant::kExternalReference: |
3964 | __ Move(dst, src.ToExternalReference()); |
3965 | break; |
3966 | case Constant::kHeapObject: { |
3967 | Handle<HeapObject> src_object = src.ToHeapObject(); |
3968 | RootIndex index; |
3969 | if (IsMaterializableFromRoot(src_object, &index)) { |
3970 | __ LoadRoot(dst, index); |
3971 | } else { |
3972 | __ Move(dst, src_object); |
3973 | } |
3974 | break; |
3975 | } |
3976 | case Constant::kDelayedStringConstant: { |
3977 | const StringConstantBase* src_constant = src.ToDelayedStringConstant(); |
3978 | __ MoveStringConstant(dst, src_constant); |
3979 | break; |
3980 | } |
3981 | case Constant::kRpoNumber: |
3982 | UNREACHABLE(); // TODO(dcarney): load of labels on x64. |
3983 | break; |
3984 | } |
3985 | }; |
3986 | // Helper function to write the given constant to the stack. |
3987 | auto MoveConstantToSlot = [&](Operand dst, Constant src) { |
3988 | if (!RelocInfo::IsWasmReference(src.rmode())) { |
3989 | switch (src.type()) { |
3990 | case Constant::kInt32: |
3991 | __ movq(dst, Immediate(src.ToInt32())); |
3992 | return; |
3993 | case Constant::kInt64: |
3994 | __ Set(dst, src.ToInt64()); |
3995 | return; |
3996 | default: |
3997 | break; |
3998 | } |
3999 | } |
4000 | MoveConstantToRegister(kScratchRegister, src); |
4001 | __ movq(dst, kScratchRegister); |
4002 | }; |
4003 | // Dispatch on the source and destination operand kinds. |
4004 | switch (MoveType::InferMove(source, destination)) { |
4005 | case MoveType::kRegisterToRegister: |
4006 | if (source->IsRegister()) { |
4007 | __ movq(g.ToRegister(destination), g.ToRegister(source)); |
4008 | } else { |
4009 | DCHECK(source->IsFPRegister()); |
4010 | __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source)); |
4011 | } |
4012 | return; |
4013 | case MoveType::kRegisterToStack: { |
4014 | Operand dst = g.ToOperand(destination); |
4015 | if (source->IsRegister()) { |
4016 | __ movq(dst, g.ToRegister(source)); |
4017 | } else { |
4018 | DCHECK(source->IsFPRegister()); |
4019 | XMMRegister src = g.ToDoubleRegister(source); |
4020 | MachineRepresentation rep = |
4021 | LocationOperand::cast(source)->representation(); |
4022 | if (rep != MachineRepresentation::kSimd128) { |
4023 | __ Movsd(dst, src); |
4024 | } else { |
4025 | __ Movups(dst, src); |
4026 | } |
4027 | } |
4028 | return; |
4029 | } |
4030 | case MoveType::kStackToRegister: { |
4031 | Operand src = g.ToOperand(source); |
4032 | if (source->IsStackSlot()) { |
4033 | __ movq(g.ToRegister(destination), src); |
4034 | } else { |
4035 | DCHECK(source->IsFPStackSlot()); |
4036 | XMMRegister dst = g.ToDoubleRegister(destination); |
4037 | MachineRepresentation rep = |
4038 | LocationOperand::cast(source)->representation(); |
4039 | if (rep != MachineRepresentation::kSimd128) { |
4040 | __ Movsd(dst, src); |
4041 | } else { |
4042 | __ Movups(dst, src); |
4043 | } |
4044 | } |
4045 | return; |
4046 | } |
4047 | case MoveType::kStackToStack: { |
4048 | Operand src = g.ToOperand(source); |
4049 | Operand dst = g.ToOperand(destination); |
4050 | if (source->IsStackSlot()) { |
4051 | // Spill on demand to use a temporary register for memory-to-memory |
4052 | // moves. |
4053 | __ movq(kScratchRegister, src); |
4054 | __ movq(dst, kScratchRegister); |
4055 | } else { |
4056 | MachineRepresentation rep = |
4057 | LocationOperand::cast(source)->representation(); |
4058 | if (rep != MachineRepresentation::kSimd128) { |
4059 | __ Movsd(kScratchDoubleReg, src); |
4060 | __ Movsd(dst, kScratchDoubleReg); |
4061 | } else { |
4062 | DCHECK(source->IsSimd128StackSlot()); |
4063 | __ Movups(kScratchDoubleReg, src); |
4064 | __ Movups(dst, kScratchDoubleReg); |
4065 | } |
4066 | } |
4067 | return; |
4068 | } |
4069 | case MoveType::kConstantToRegister: { |
4070 | Constant src = g.ToConstant(source); |
4071 | if (destination->IsRegister()) { |
4072 | MoveConstantToRegister(g.ToRegister(destination), src); |
4073 | } else { |
4074 | DCHECK(destination->IsFPRegister()); |
4075 | XMMRegister dst = g.ToDoubleRegister(destination); |
4076 | if (src.type() == Constant::kFloat32) { |
4077 | // TODO(turbofan): Can we do better here? |
4078 | __ Move(dst, bit_cast<uint32_t>(src.ToFloat32())); |
4079 | } else { |
4080 | DCHECK_EQ(src.type(), Constant::kFloat64); |
4081 | __ Move(dst, src.ToFloat64().AsUint64()); |
4082 | } |
4083 | } |
4084 | return; |
4085 | } |
4086 | case MoveType::kConstantToStack: { |
4087 | Constant src = g.ToConstant(source); |
4088 | Operand dst = g.ToOperand(destination); |
4089 | if (destination->IsStackSlot()) { |
4090 | MoveConstantToSlot(dst, src); |
4091 | } else { |
4092 | DCHECK(destination->IsFPStackSlot()); |
4093 | if (src.type() == Constant::kFloat32) { |
4094 | __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32()))); |
4095 | } else { |
4096 | DCHECK_EQ(src.type(), Constant::kFloat64); |
4097 | __ movq(kScratchRegister, src.ToFloat64().AsUint64()); |
4098 | __ movq(dst, kScratchRegister); |
4099 | } |
4100 | } |
4101 | return; |
4102 | } |
4103 | } |
4104 | UNREACHABLE(); |
4105 | } |
4106 | |
4107 | void CodeGenerator::AssembleSwap(InstructionOperand* source, |
4108 | InstructionOperand* destination) { |
4109 | X64OperandConverter g(this, nullptr); |
4110 | // Dispatch on the source and destination operand kinds. Not all |
4111 | // combinations are possible. |
4112 | switch (MoveType::InferSwap(source, destination)) { |
4113 | case MoveType::kRegisterToRegister: { |
4114 | if (source->IsRegister()) { |
4115 | Register src = g.ToRegister(source); |
4116 | Register dst = g.ToRegister(destination); |
4117 | __ movq(kScratchRegister, src); |
4118 | __ movq(src, dst); |
4119 | __ movq(dst, kScratchRegister); |
4120 | } else { |
4121 | DCHECK(source->IsFPRegister()); |
4122 | XMMRegister src = g.ToDoubleRegister(source); |
4123 | XMMRegister dst = g.ToDoubleRegister(destination); |
4124 | __ Movapd(kScratchDoubleReg, src); |
4125 | __ Movapd(src, dst); |
4126 | __ Movapd(dst, kScratchDoubleReg); |
4127 | } |
4128 | return; |
4129 | } |
4130 | case MoveType::kRegisterToStack: { |
4131 | if (source->IsRegister()) { |
4132 | Register src = g.ToRegister(source); |
4133 | __ pushq(src); |
4134 | frame_access_state()->IncreaseSPDelta(1); |
4135 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4136 | kSystemPointerSize); |
4137 | __ movq(src, g.ToOperand(destination)); |
4138 | frame_access_state()->IncreaseSPDelta(-1); |
4139 | __ popq(g.ToOperand(destination)); |
4140 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4141 | -kSystemPointerSize); |
4142 | } else { |
4143 | DCHECK(source->IsFPRegister()); |
4144 | XMMRegister src = g.ToDoubleRegister(source); |
4145 | Operand dst = g.ToOperand(destination); |
4146 | MachineRepresentation rep = |
4147 | LocationOperand::cast(source)->representation(); |
4148 | if (rep != MachineRepresentation::kSimd128) { |
4149 | __ Movsd(kScratchDoubleReg, src); |
4150 | __ Movsd(src, dst); |
4151 | __ Movsd(dst, kScratchDoubleReg); |
4152 | } else { |
4153 | __ Movups(kScratchDoubleReg, src); |
4154 | __ Movups(src, dst); |
4155 | __ Movups(dst, kScratchDoubleReg); |
4156 | } |
4157 | } |
4158 | return; |
4159 | } |
4160 | case MoveType::kStackToStack: { |
4161 | Operand src = g.ToOperand(source); |
4162 | Operand dst = g.ToOperand(destination); |
4163 | MachineRepresentation rep = |
4164 | LocationOperand::cast(source)->representation(); |
4165 | if (rep != MachineRepresentation::kSimd128) { |
4166 | Register tmp = kScratchRegister; |
4167 | __ movq(tmp, dst); |
4168 | __ pushq(src); // Then use stack to copy src to destination. |
4169 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4170 | kSystemPointerSize); |
4171 | __ popq(dst); |
4172 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4173 | -kSystemPointerSize); |
4174 | __ movq(src, tmp); |
4175 | } else { |
4176 | // Without AVX, misaligned reads and writes will trap. Move using the |
4177 | // stack, in two parts. |
4178 | __ movups(kScratchDoubleReg, dst); // Save dst in scratch register. |
4179 | __ pushq(src); // Then use stack to copy src to destination. |
4180 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4181 | kSystemPointerSize); |
4182 | __ popq(dst); |
4183 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4184 | -kSystemPointerSize); |
4185 | __ pushq(g.ToOperand(source, kSystemPointerSize)); |
4186 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4187 | kSystemPointerSize); |
4188 | __ popq(g.ToOperand(destination, kSystemPointerSize)); |
4189 | unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), |
4190 | -kSystemPointerSize); |
4191 | __ movups(src, kScratchDoubleReg); |
4192 | } |
4193 | return; |
4194 | } |
4195 | default: |
4196 | UNREACHABLE(); |
4197 | break; |
4198 | } |
4199 | } |
4200 | |
4201 | void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) { |
4202 | for (size_t index = 0; index < target_count; ++index) { |
4203 | __ dq(targets[index]); |
4204 | } |
4205 | } |
4206 | |
4207 | #undef __ |
4208 | |
4209 | } // namespace compiler |
4210 | } // namespace internal |
4211 | } // namespace v8 |
4212 | |