1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/backend/code-generator.h"
6
7#include <limits>
8
9#include "src/base/overflowing-math.h"
10#include "src/compiler/backend/code-generator-impl.h"
11#include "src/compiler/backend/gap-resolver.h"
12#include "src/compiler/node-matchers.h"
13#include "src/compiler/osr.h"
14#include "src/heap/heap-inl.h" // crbug.com/v8/8499
15#include "src/macro-assembler.h"
16#include "src/objects/smi.h"
17#include "src/optimized-compilation-info.h"
18#include "src/wasm/wasm-code-manager.h"
19#include "src/wasm/wasm-objects.h"
20#include "src/x64/assembler-x64.h"
21
22namespace v8 {
23namespace internal {
24namespace compiler {
25
26#define __ tasm()->
27
28// Adds X64 specific methods for decoding operands.
29class X64OperandConverter : public InstructionOperandConverter {
30 public:
31 X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : InstructionOperandConverter(gen, instr) {}
33
34 Immediate InputImmediate(size_t index) {
35 return ToImmediate(instr_->InputAt(index));
36 }
37
38 Operand InputOperand(size_t index, int extra = 0) {
39 return ToOperand(instr_->InputAt(index), extra);
40 }
41
42 Operand OutputOperand() { return ToOperand(instr_->Output()); }
43
44 Immediate ToImmediate(InstructionOperand* operand) {
45 Constant constant = ToConstant(operand);
46 if (constant.type() == Constant::kFloat64) {
47 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 return Immediate(0);
49 }
50 if (RelocInfo::IsWasmReference(constant.rmode())) {
51 return Immediate(constant.ToInt32(), constant.rmode());
52 }
53 return Immediate(constant.ToInt32());
54 }
55
56 Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 }
60
61 Operand SlotToOperand(int slot_index, int extra = 0) {
62 FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 offset.offset() + extra);
65 }
66
67 static size_t NextOffset(size_t* offset) {
68 size_t i = *offset;
69 (*offset)++;
70 return i;
71 }
72
73 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 STATIC_ASSERT(0 == static_cast<int>(times_1));
75 STATIC_ASSERT(1 == static_cast<int>(times_2));
76 STATIC_ASSERT(2 == static_cast<int>(times_4));
77 STATIC_ASSERT(3 == static_cast<int>(times_8));
78 int scale = static_cast<int>(mode - one);
79 DCHECK(scale >= 0 && scale < 4);
80 return static_cast<ScaleFactor>(scale);
81 }
82
83 Operand MemoryOperand(size_t* offset) {
84 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 switch (mode) {
86 case kMode_MR: {
87 Register base = InputRegister(NextOffset(offset));
88 int32_t disp = 0;
89 return Operand(base, disp);
90 }
91 case kMode_MRI: {
92 Register base = InputRegister(NextOffset(offset));
93 int32_t disp = InputInt32(NextOffset(offset));
94 return Operand(base, disp);
95 }
96 case kMode_MR1:
97 case kMode_MR2:
98 case kMode_MR4:
99 case kMode_MR8: {
100 Register base = InputRegister(NextOffset(offset));
101 Register index = InputRegister(NextOffset(offset));
102 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 int32_t disp = 0;
104 return Operand(base, index, scale, disp);
105 }
106 case kMode_MR1I:
107 case kMode_MR2I:
108 case kMode_MR4I:
109 case kMode_MR8I: {
110 Register base = InputRegister(NextOffset(offset));
111 Register index = InputRegister(NextOffset(offset));
112 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 int32_t disp = InputInt32(NextOffset(offset));
114 return Operand(base, index, scale, disp);
115 }
116 case kMode_M1: {
117 Register base = InputRegister(NextOffset(offset));
118 int32_t disp = 0;
119 return Operand(base, disp);
120 }
121 case kMode_M2:
122 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 return Operand(no_reg, 0);
124 case kMode_M4:
125 case kMode_M8: {
126 Register index = InputRegister(NextOffset(offset));
127 ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 int32_t disp = 0;
129 return Operand(index, scale, disp);
130 }
131 case kMode_M1I:
132 case kMode_M2I:
133 case kMode_M4I:
134 case kMode_M8I: {
135 Register index = InputRegister(NextOffset(offset));
136 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 int32_t disp = InputInt32(NextOffset(offset));
138 return Operand(index, scale, disp);
139 }
140 case kMode_Root: {
141 Register base = kRootRegister;
142 int32_t disp = InputInt32(NextOffset(offset));
143 return Operand(base, disp);
144 }
145 case kMode_None:
146 UNREACHABLE();
147 }
148 UNREACHABLE();
149 }
150
151 Operand MemoryOperand(size_t first_input = 0) {
152 return MemoryOperand(&first_input);
153 }
154};
155
156namespace {
157
158bool HasImmediateInput(Instruction* instr, size_t index) {
159 return instr->InputAt(index)->IsImmediate();
160}
161
162class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
163 public:
164 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
165 : OutOfLineCode(gen), result_(result) {}
166
167 void Generate() final {
168 __ Xorps(result_, result_);
169 __ Divss(result_, result_);
170 }
171
172 private:
173 XMMRegister const result_;
174};
175
176class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
177 public:
178 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
179 : OutOfLineCode(gen), result_(result) {}
180
181 void Generate() final {
182 __ Xorpd(result_, result_);
183 __ Divsd(result_, result_);
184 }
185
186 private:
187 XMMRegister const result_;
188};
189
190class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
191 public:
192 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
193 XMMRegister input, StubCallMode stub_mode,
194 UnwindingInfoWriter* unwinding_info_writer)
195 : OutOfLineCode(gen),
196 result_(result),
197 input_(input),
198 stub_mode_(stub_mode),
199 unwinding_info_writer_(unwinding_info_writer),
200 isolate_(gen->isolate()),
201 zone_(gen->zone()) {}
202
203 void Generate() final {
204 __ subq(rsp, Immediate(kDoubleSize));
205 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
206 kDoubleSize);
207 __ Movsd(MemOperand(rsp, 0), input_);
208 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
209 // A direct call to a wasm runtime stub defined in this module.
210 // Just encode the stub index. This will be patched when the code
211 // is added to the native module and copied into wasm code space.
212 __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
213 } else {
214 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
215 }
216 __ movl(result_, MemOperand(rsp, 0));
217 __ addq(rsp, Immediate(kDoubleSize));
218 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
219 -kDoubleSize);
220 }
221
222 private:
223 Register const result_;
224 XMMRegister const input_;
225 StubCallMode stub_mode_;
226 UnwindingInfoWriter* const unwinding_info_writer_;
227 Isolate* isolate_;
228 Zone* zone_;
229};
230
231class OutOfLineRecordWrite final : public OutOfLineCode {
232 public:
233 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
234 Register value, Register scratch0, Register scratch1,
235 RecordWriteMode mode, StubCallMode stub_mode)
236 : OutOfLineCode(gen),
237 object_(object),
238 operand_(operand),
239 value_(value),
240 scratch0_(scratch0),
241 scratch1_(scratch1),
242 mode_(mode),
243 stub_mode_(stub_mode),
244 zone_(gen->zone()) {}
245
246 void Generate() final {
247 if (mode_ > RecordWriteMode::kValueIsPointer) {
248 __ JumpIfSmi(value_, exit());
249 }
250 __ CheckPageFlag(value_, scratch0_,
251 MemoryChunk::kPointersToHereAreInterestingMask, zero,
252 exit());
253 __ leaq(scratch1_, operand_);
254
255 RememberedSetAction const remembered_set_action =
256 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
257 : OMIT_REMEMBERED_SET;
258 SaveFPRegsMode const save_fp_mode =
259 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
260
261 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
262 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
263 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
264 // A direct call to a wasm runtime stub defined in this module.
265 // Just encode the stub index. This will be patched when the code
266 // is added to the native module and copied into wasm code space.
267 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
268 save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
269 } else {
270 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
271 save_fp_mode);
272 }
273 }
274
275 private:
276 Register const object_;
277 Operand const operand_;
278 Register const value_;
279 Register const scratch0_;
280 Register const scratch1_;
281 RecordWriteMode const mode_;
282 StubCallMode const stub_mode_;
283 Zone* zone_;
284};
285
286class WasmOutOfLineTrap : public OutOfLineCode {
287 public:
288 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
289 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
290
291 void Generate() override {
292 X64OperandConverter i(gen_, instr_);
293 TrapId trap_id =
294 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
295 GenerateWithTrapId(trap_id);
296 }
297
298 protected:
299 CodeGenerator* gen_;
300
301 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
302
303 private:
304 void GenerateCallToTrap(TrapId trap_id) {
305 if (!gen_->wasm_runtime_exception_support()) {
306 // We cannot test calls to the runtime in cctest/test-run-wasm.
307 // Therefore we emit a call to C here instead of a call to the runtime.
308 __ PrepareCallCFunction(0);
309 __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
310 0);
311 __ LeaveFrame(StackFrame::WASM_COMPILED);
312 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
313 size_t pop_size =
314 call_descriptor->StackParameterCount() * kSystemPointerSize;
315 // Use rcx as a scratch register, we return anyways immediately.
316 __ Ret(static_cast<int>(pop_size), rcx);
317 } else {
318 gen_->AssembleSourcePosition(instr_);
319 // A direct call to a wasm runtime stub defined in this module.
320 // Just encode the stub index. This will be patched when the code
321 // is added to the native module and copied into wasm code space.
322 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
323 ReferenceMap* reference_map =
324 new (gen_->zone()) ReferenceMap(gen_->zone());
325 gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
326 Safepoint::kNoLazyDeopt);
327 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
328 }
329 }
330
331 Instruction* instr_;
332};
333
334class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
335 public:
336 WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
337 : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
338
339 void Generate() final {
340 gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
341 GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
342 }
343
344 private:
345 int pc_;
346};
347
348void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
349 InstructionCode opcode, Instruction* instr,
350 X64OperandConverter& i, int pc) {
351 const MemoryAccessMode access_mode =
352 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
353 if (access_mode == kMemoryAccessProtected) {
354 new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
355 }
356}
357
358void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
359 InstructionCode opcode, Instruction* instr,
360 X64OperandConverter& i) {
361 const MemoryAccessMode access_mode =
362 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
363 if (access_mode == kMemoryAccessPoisoned) {
364 Register value = i.OutputRegister();
365 codegen->tasm()->andq(value, kSpeculationPoisonRegister);
366 }
367}
368
369} // namespace
370
371#define ASSEMBLE_UNOP(asm_instr) \
372 do { \
373 if (instr->Output()->IsRegister()) { \
374 __ asm_instr(i.OutputRegister()); \
375 } else { \
376 __ asm_instr(i.OutputOperand()); \
377 } \
378 } while (false)
379
380#define ASSEMBLE_BINOP(asm_instr) \
381 do { \
382 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
383 size_t index = 1; \
384 Operand right = i.MemoryOperand(&index); \
385 __ asm_instr(i.InputRegister(0), right); \
386 } else { \
387 if (HasImmediateInput(instr, 1)) { \
388 if (instr->InputAt(0)->IsRegister()) { \
389 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
390 } else { \
391 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
392 } \
393 } else { \
394 if (instr->InputAt(1)->IsRegister()) { \
395 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
396 } else { \
397 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
398 } \
399 } \
400 } \
401 } while (false)
402
403#define ASSEMBLE_COMPARE(asm_instr) \
404 do { \
405 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
406 size_t index = 0; \
407 Operand left = i.MemoryOperand(&index); \
408 if (HasImmediateInput(instr, index)) { \
409 __ asm_instr(left, i.InputImmediate(index)); \
410 } else { \
411 __ asm_instr(left, i.InputRegister(index)); \
412 } \
413 } else { \
414 if (HasImmediateInput(instr, 1)) { \
415 if (instr->InputAt(0)->IsRegister()) { \
416 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
417 } else { \
418 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
419 } \
420 } else { \
421 if (instr->InputAt(1)->IsRegister()) { \
422 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
423 } else { \
424 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
425 } \
426 } \
427 } \
428 } while (false)
429
430#define ASSEMBLE_MULT(asm_instr) \
431 do { \
432 if (HasImmediateInput(instr, 1)) { \
433 if (instr->InputAt(0)->IsRegister()) { \
434 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
435 i.InputImmediate(1)); \
436 } else { \
437 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
438 i.InputImmediate(1)); \
439 } \
440 } else { \
441 if (instr->InputAt(1)->IsRegister()) { \
442 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
443 } else { \
444 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
445 } \
446 } \
447 } while (false)
448
449#define ASSEMBLE_SHIFT(asm_instr, width) \
450 do { \
451 if (HasImmediateInput(instr, 1)) { \
452 if (instr->Output()->IsRegister()) { \
453 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
454 } else { \
455 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
456 } \
457 } else { \
458 if (instr->Output()->IsRegister()) { \
459 __ asm_instr##_cl(i.OutputRegister()); \
460 } else { \
461 __ asm_instr##_cl(i.OutputOperand()); \
462 } \
463 } \
464 } while (false)
465
466#define ASSEMBLE_MOVX(asm_instr) \
467 do { \
468 if (instr->addressing_mode() != kMode_None) { \
469 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
470 } else if (instr->InputAt(0)->IsRegister()) { \
471 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
472 } else { \
473 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
474 } \
475 } while (false)
476
477#define ASSEMBLE_SSE_BINOP(asm_instr) \
478 do { \
479 if (instr->InputAt(1)->IsFPRegister()) { \
480 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
481 } else { \
482 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
483 } \
484 } while (false)
485
486#define ASSEMBLE_SSE_UNOP(asm_instr) \
487 do { \
488 if (instr->InputAt(0)->IsFPRegister()) { \
489 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
490 } else { \
491 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
492 } \
493 } while (false)
494
495#define ASSEMBLE_AVX_BINOP(asm_instr) \
496 do { \
497 CpuFeatureScope avx_scope(tasm(), AVX); \
498 if (instr->InputAt(1)->IsFPRegister()) { \
499 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
500 i.InputDoubleRegister(1)); \
501 } else { \
502 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
503 i.InputOperand(1)); \
504 } \
505 } while (false)
506
507#define ASSEMBLE_IEEE754_BINOP(name) \
508 do { \
509 __ PrepareCallCFunction(2); \
510 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
511 } while (false)
512
513#define ASSEMBLE_IEEE754_UNOP(name) \
514 do { \
515 __ PrepareCallCFunction(1); \
516 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
517 } while (false)
518
519#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
520 do { \
521 Label binop; \
522 __ bind(&binop); \
523 __ mov_inst(rax, i.MemoryOperand(1)); \
524 __ movl(i.TempRegister(0), rax); \
525 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
526 __ lock(); \
527 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
528 __ j(not_equal, &binop); \
529 } while (false)
530
531#define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
532 do { \
533 Label binop; \
534 __ bind(&binop); \
535 __ mov_inst(rax, i.MemoryOperand(1)); \
536 __ movq(i.TempRegister(0), rax); \
537 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
538 __ lock(); \
539 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
540 __ j(not_equal, &binop); \
541 } while (false)
542
543#define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
544 do { \
545 if (instr->InputAt(index)->IsSimd128Register()) { \
546 __ opcode(dst_operand, i.InputSimd128Register(index)); \
547 } else { \
548 __ opcode(dst_operand, i.InputOperand(index)); \
549 } \
550 } while (false)
551
552#define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
553 do { \
554 if (instr->InputAt(index)->IsSimd128Register()) { \
555 __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
556 } else { \
557 __ opcode(dst_operand, i.InputOperand(index), imm); \
558 } \
559 } while (false)
560
561#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
562 do { \
563 XMMRegister dst = i.OutputSimd128Register(); \
564 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
565 byte input_index = instr->InputCount() == 2 ? 1 : 0; \
566 ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
567 } while (false)
568
569#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
570 do { \
571 CpuFeatureScope sse_scope(tasm(), SSELevel); \
572 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
573 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
574 } while (false)
575
576void CodeGenerator::AssembleDeconstructFrame() {
577 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
578 __ movq(rsp, rbp);
579 __ popq(rbp);
580}
581
582void CodeGenerator::AssemblePrepareTailCall() {
583 if (frame_access_state()->has_frame()) {
584 __ movq(rbp, MemOperand(rbp, 0));
585 }
586 frame_access_state()->SetFrameAccessToSP();
587}
588
589void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
590 Register scratch1,
591 Register scratch2,
592 Register scratch3) {
593 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
594 Label done;
595
596 // Check if current frame is an arguments adaptor frame.
597 __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
598 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
599 __ j(not_equal, &done, Label::kNear);
600
601 // Load arguments count from current arguments adaptor frame (note, it
602 // does not include receiver).
603 Register caller_args_count_reg = scratch1;
604 __ SmiUntag(caller_args_count_reg,
605 Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
606
607 ParameterCount callee_args_count(args_reg);
608 __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
609 scratch3);
610 __ bind(&done);
611}
612
613namespace {
614
615void AdjustStackPointerForTailCall(Assembler* assembler,
616 FrameAccessState* state,
617 int new_slot_above_sp,
618 bool allow_shrinkage = true) {
619 int current_sp_offset = state->GetSPToFPSlotCount() +
620 StandardFrameConstants::kFixedSlotCountAboveFp;
621 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
622 if (stack_slot_delta > 0) {
623 assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
624 state->IncreaseSPDelta(stack_slot_delta);
625 } else if (allow_shrinkage && stack_slot_delta < 0) {
626 assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
627 state->IncreaseSPDelta(stack_slot_delta);
628 }
629}
630
631void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
632 int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
633 assembler->movq(kScratchRegister, shuffle_mask);
634 assembler->Push(kScratchRegister);
635 shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
636 assembler->movq(kScratchRegister, shuffle_mask);
637 assembler->Push(kScratchRegister);
638}
639
640} // namespace
641
642void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
643 int first_unused_stack_slot) {
644 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
645 ZoneVector<MoveOperands*> pushes(zone());
646 GetPushCompatibleMoves(instr, flags, &pushes);
647
648 if (!pushes.empty() &&
649 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
650 first_unused_stack_slot)) {
651 X64OperandConverter g(this, instr);
652 for (auto move : pushes) {
653 LocationOperand destination_location(
654 LocationOperand::cast(move->destination()));
655 InstructionOperand source(move->source());
656 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
657 destination_location.index());
658 if (source.IsStackSlot()) {
659 LocationOperand source_location(LocationOperand::cast(source));
660 __ Push(g.SlotToOperand(source_location.index()));
661 } else if (source.IsRegister()) {
662 LocationOperand source_location(LocationOperand::cast(source));
663 __ Push(source_location.GetRegister());
664 } else if (source.IsImmediate()) {
665 __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
666 } else {
667 // Pushes of non-scalar data types is not supported.
668 UNIMPLEMENTED();
669 }
670 frame_access_state()->IncreaseSPDelta(1);
671 move->Eliminate();
672 }
673 }
674 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
675 first_unused_stack_slot, false);
676}
677
678void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
679 int first_unused_stack_slot) {
680 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
681 first_unused_stack_slot);
682}
683
684// Check that {kJavaScriptCallCodeStartRegister} is correct.
685void CodeGenerator::AssembleCodeStartRegisterCheck() {
686 __ ComputeCodeStartAddress(rbx);
687 __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
688 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
689}
690
691// Check if the code object is marked for deoptimization. If it is, then it
692// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
693// to:
694// 1. read from memory the word that contains that bit, which can be found in
695// the flags in the referenced {CodeDataContainer} object;
696// 2. test kMarkedForDeoptimizationBit in those flags; and
697// 3. if it is not zero then it jumps to the builtin.
698void CodeGenerator::BailoutIfDeoptimized() {
699 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
700 __ LoadTaggedPointerField(rbx,
701 Operand(kJavaScriptCallCodeStartRegister, offset));
702 __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
703 Immediate(1 << Code::kMarkedForDeoptimizationBit));
704 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
705 RelocInfo::CODE_TARGET, not_zero);
706}
707
708void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
709 // Set a mask which has all bits set in the normal case, but has all
710 // bits cleared if we are speculatively executing the wrong PC.
711 __ ComputeCodeStartAddress(rbx);
712 __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
713 __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
714 __ movq(rbx, Immediate(-1));
715 __ cmovq(equal, kSpeculationPoisonRegister, rbx);
716}
717
718void CodeGenerator::AssembleRegisterArgumentPoisoning() {
719 __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
720 __ andq(kContextRegister, kSpeculationPoisonRegister);
721 __ andq(rsp, kSpeculationPoisonRegister);
722}
723
724// Assembles an instruction after register allocation, producing machine code.
725CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
726 Instruction* instr) {
727 X64OperandConverter i(this, instr);
728 InstructionCode opcode = instr->opcode();
729 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
730 switch (arch_opcode) {
731 case kArchCallCodeObject: {
732 if (HasImmediateInput(instr, 0)) {
733 Handle<Code> code = i.InputCode(0);
734 __ Call(code, RelocInfo::CODE_TARGET);
735 } else {
736 Register reg = i.InputRegister(0);
737 DCHECK_IMPLIES(
738 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
739 reg == kJavaScriptCallCodeStartRegister);
740 __ LoadCodeObjectEntry(reg, reg);
741 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
742 __ RetpolineCall(reg);
743 } else {
744 __ call(reg);
745 }
746 }
747 RecordCallPosition(instr);
748 frame_access_state()->ClearSPDelta();
749 break;
750 }
751 case kArchCallBuiltinPointer: {
752 DCHECK(!HasImmediateInput(instr, 0));
753 Register builtin_pointer = i.InputRegister(0);
754 __ CallBuiltinPointer(builtin_pointer);
755 RecordCallPosition(instr);
756 frame_access_state()->ClearSPDelta();
757 break;
758 }
759 case kArchCallWasmFunction: {
760 if (HasImmediateInput(instr, 0)) {
761 Constant constant = i.ToConstant(instr->InputAt(0));
762 Address wasm_code = static_cast<Address>(constant.ToInt64());
763 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
764 __ near_call(wasm_code, constant.rmode());
765 } else {
766 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
767 __ RetpolineCall(wasm_code, constant.rmode());
768 } else {
769 __ Call(wasm_code, constant.rmode());
770 }
771 }
772 } else {
773 Register reg = i.InputRegister(0);
774 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
775 __ RetpolineCall(reg);
776 } else {
777 __ call(reg);
778 }
779 }
780 RecordCallPosition(instr);
781 frame_access_state()->ClearSPDelta();
782 break;
783 }
784 case kArchTailCallCodeObjectFromJSFunction:
785 case kArchTailCallCodeObject: {
786 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
787 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
788 i.TempRegister(0), i.TempRegister(1),
789 i.TempRegister(2));
790 }
791 if (HasImmediateInput(instr, 0)) {
792 Handle<Code> code = i.InputCode(0);
793 __ Jump(code, RelocInfo::CODE_TARGET);
794 } else {
795 Register reg = i.InputRegister(0);
796 DCHECK_IMPLIES(
797 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
798 reg == kJavaScriptCallCodeStartRegister);
799 __ LoadCodeObjectEntry(reg, reg);
800 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
801 __ RetpolineJump(reg);
802 } else {
803 __ jmp(reg);
804 }
805 }
806 unwinding_info_writer_.MarkBlockWillExit();
807 frame_access_state()->ClearSPDelta();
808 frame_access_state()->SetFrameAccessToDefault();
809 break;
810 }
811 case kArchTailCallWasm: {
812 if (HasImmediateInput(instr, 0)) {
813 Constant constant = i.ToConstant(instr->InputAt(0));
814 Address wasm_code = static_cast<Address>(constant.ToInt64());
815 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
816 __ near_jmp(wasm_code, constant.rmode());
817 } else {
818 __ Move(kScratchRegister, wasm_code, constant.rmode());
819 __ jmp(kScratchRegister);
820 }
821 } else {
822 Register reg = i.InputRegister(0);
823 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
824 __ RetpolineJump(reg);
825 } else {
826 __ jmp(reg);
827 }
828 }
829 unwinding_info_writer_.MarkBlockWillExit();
830 frame_access_state()->ClearSPDelta();
831 frame_access_state()->SetFrameAccessToDefault();
832 break;
833 }
834 case kArchTailCallAddress: {
835 CHECK(!HasImmediateInput(instr, 0));
836 Register reg = i.InputRegister(0);
837 DCHECK_IMPLIES(
838 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
839 reg == kJavaScriptCallCodeStartRegister);
840 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
841 __ RetpolineJump(reg);
842 } else {
843 __ jmp(reg);
844 }
845 unwinding_info_writer_.MarkBlockWillExit();
846 frame_access_state()->ClearSPDelta();
847 frame_access_state()->SetFrameAccessToDefault();
848 break;
849 }
850 case kArchCallJSFunction: {
851 Register func = i.InputRegister(0);
852 if (FLAG_debug_code) {
853 // Check the function's context matches the context argument.
854 __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
855 __ Assert(equal, AbortReason::kWrongFunctionContext);
856 }
857 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
858 __ LoadTaggedPointerField(rcx,
859 FieldOperand(func, JSFunction::kCodeOffset));
860 __ CallCodeObject(rcx);
861 frame_access_state()->ClearSPDelta();
862 RecordCallPosition(instr);
863 break;
864 }
865 case kArchPrepareCallCFunction: {
866 // Frame alignment requires using FP-relative frame addressing.
867 frame_access_state()->SetFrameAccessToFP();
868 int const num_parameters = MiscField::decode(instr->opcode());
869 __ PrepareCallCFunction(num_parameters);
870 break;
871 }
872 case kArchSaveCallerRegisters: {
873 fp_mode_ =
874 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
875 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
876 // kReturnRegister0 should have been saved before entering the stub.
877 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
878 DCHECK(IsAligned(bytes, kSystemPointerSize));
879 DCHECK_EQ(0, frame_access_state()->sp_delta());
880 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
881 DCHECK(!caller_registers_saved_);
882 caller_registers_saved_ = true;
883 break;
884 }
885 case kArchRestoreCallerRegisters: {
886 DCHECK(fp_mode_ ==
887 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
888 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
889 // Don't overwrite the returned value.
890 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
891 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
892 DCHECK_EQ(0, frame_access_state()->sp_delta());
893 DCHECK(caller_registers_saved_);
894 caller_registers_saved_ = false;
895 break;
896 }
897 case kArchPrepareTailCall:
898 AssemblePrepareTailCall();
899 break;
900 case kArchCallCFunction: {
901 int const num_parameters = MiscField::decode(instr->opcode());
902 if (HasImmediateInput(instr, 0)) {
903 ExternalReference ref = i.InputExternalReference(0);
904 __ CallCFunction(ref, num_parameters);
905 } else {
906 Register func = i.InputRegister(0);
907 __ CallCFunction(func, num_parameters);
908 }
909 frame_access_state()->SetFrameAccessToDefault();
910 // Ideally, we should decrement SP delta to match the change of stack
911 // pointer in CallCFunction. However, for certain architectures (e.g.
912 // ARM), there may be more strict alignment requirement, causing old SP
913 // to be saved on the stack. In those cases, we can not calculate the SP
914 // delta statically.
915 frame_access_state()->ClearSPDelta();
916 if (caller_registers_saved_) {
917 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
918 // Here, we assume the sequence to be:
919 // kArchSaveCallerRegisters;
920 // kArchCallCFunction;
921 // kArchRestoreCallerRegisters;
922 int bytes =
923 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
924 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
925 }
926 // TODO(tebbi): Do we need an lfence here?
927 break;
928 }
929 case kArchJmp:
930 AssembleArchJump(i.InputRpo(0));
931 break;
932 case kArchBinarySearchSwitch:
933 AssembleArchBinarySearchSwitch(instr);
934 break;
935 case kArchLookupSwitch:
936 AssembleArchLookupSwitch(instr);
937 break;
938 case kArchTableSwitch:
939 AssembleArchTableSwitch(instr);
940 break;
941 case kArchComment:
942 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
943 break;
944 case kArchDebugAbort:
945 DCHECK(i.InputRegister(0) == rdx);
946 if (!frame_access_state()->has_frame()) {
947 // We don't actually want to generate a pile of code for this, so just
948 // claim there is a stack frame, without generating one.
949 FrameScope scope(tasm(), StackFrame::NONE);
950 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
951 RelocInfo::CODE_TARGET);
952 } else {
953 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
954 RelocInfo::CODE_TARGET);
955 }
956 __ int3();
957 unwinding_info_writer_.MarkBlockWillExit();
958 break;
959 case kArchDebugBreak:
960 __ int3();
961 break;
962 case kArchThrowTerminator:
963 unwinding_info_writer_.MarkBlockWillExit();
964 break;
965 case kArchNop:
966 // don't emit code for nops.
967 break;
968 case kArchDeoptimize: {
969 int deopt_state_id =
970 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
971 CodeGenResult result =
972 AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
973 if (result != kSuccess) return result;
974 unwinding_info_writer_.MarkBlockWillExit();
975 break;
976 }
977 case kArchRet:
978 AssembleReturn(instr->InputAt(0));
979 break;
980 case kArchStackPointer:
981 __ movq(i.OutputRegister(), rsp);
982 break;
983 case kArchFramePointer:
984 __ movq(i.OutputRegister(), rbp);
985 break;
986 case kArchParentFramePointer:
987 if (frame_access_state()->has_frame()) {
988 __ movq(i.OutputRegister(), Operand(rbp, 0));
989 } else {
990 __ movq(i.OutputRegister(), rbp);
991 }
992 break;
993 case kArchTruncateDoubleToI: {
994 auto result = i.OutputRegister();
995 auto input = i.InputDoubleRegister(0);
996 auto ool = new (zone()) OutOfLineTruncateDoubleToI(
997 this, result, input, DetermineStubCallMode(),
998 &unwinding_info_writer_);
999 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1000 // use of Cvttsd2siq requires the movl below to avoid sign extension.
1001 __ Cvttsd2siq(result, input);
1002 __ cmpq(result, Immediate(1));
1003 __ j(overflow, ool->entry());
1004 __ bind(ool->exit());
1005 __ movl(result, result);
1006 break;
1007 }
1008 case kArchStoreWithWriteBarrier: {
1009 RecordWriteMode mode =
1010 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1011 Register object = i.InputRegister(0);
1012 size_t index = 0;
1013 Operand operand = i.MemoryOperand(&index);
1014 Register value = i.InputRegister(index);
1015 Register scratch0 = i.TempRegister(0);
1016 Register scratch1 = i.TempRegister(1);
1017 auto ool = new (zone())
1018 OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1019 mode, DetermineStubCallMode());
1020 __ StoreTaggedField(operand, value);
1021 __ CheckPageFlag(object, scratch0,
1022 MemoryChunk::kPointersFromHereAreInterestingMask,
1023 not_zero, ool->entry());
1024 __ bind(ool->exit());
1025 break;
1026 }
1027 case kArchWordPoisonOnSpeculation:
1028 DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1029 __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1030 break;
1031 case kLFence:
1032 __ lfence();
1033 break;
1034 case kArchStackSlot: {
1035 FrameOffset offset =
1036 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1037 Register base = offset.from_stack_pointer() ? rsp : rbp;
1038 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1039 break;
1040 }
1041 case kIeee754Float64Acos:
1042 ASSEMBLE_IEEE754_UNOP(acos);
1043 break;
1044 case kIeee754Float64Acosh:
1045 ASSEMBLE_IEEE754_UNOP(acosh);
1046 break;
1047 case kIeee754Float64Asin:
1048 ASSEMBLE_IEEE754_UNOP(asin);
1049 break;
1050 case kIeee754Float64Asinh:
1051 ASSEMBLE_IEEE754_UNOP(asinh);
1052 break;
1053 case kIeee754Float64Atan:
1054 ASSEMBLE_IEEE754_UNOP(atan);
1055 break;
1056 case kIeee754Float64Atanh:
1057 ASSEMBLE_IEEE754_UNOP(atanh);
1058 break;
1059 case kIeee754Float64Atan2:
1060 ASSEMBLE_IEEE754_BINOP(atan2);
1061 break;
1062 case kIeee754Float64Cbrt:
1063 ASSEMBLE_IEEE754_UNOP(cbrt);
1064 break;
1065 case kIeee754Float64Cos:
1066 ASSEMBLE_IEEE754_UNOP(cos);
1067 break;
1068 case kIeee754Float64Cosh:
1069 ASSEMBLE_IEEE754_UNOP(cosh);
1070 break;
1071 case kIeee754Float64Exp:
1072 ASSEMBLE_IEEE754_UNOP(exp);
1073 break;
1074 case kIeee754Float64Expm1:
1075 ASSEMBLE_IEEE754_UNOP(expm1);
1076 break;
1077 case kIeee754Float64Log:
1078 ASSEMBLE_IEEE754_UNOP(log);
1079 break;
1080 case kIeee754Float64Log1p:
1081 ASSEMBLE_IEEE754_UNOP(log1p);
1082 break;
1083 case kIeee754Float64Log2:
1084 ASSEMBLE_IEEE754_UNOP(log2);
1085 break;
1086 case kIeee754Float64Log10:
1087 ASSEMBLE_IEEE754_UNOP(log10);
1088 break;
1089 case kIeee754Float64Pow:
1090 ASSEMBLE_IEEE754_BINOP(pow);
1091 break;
1092 case kIeee754Float64Sin:
1093 ASSEMBLE_IEEE754_UNOP(sin);
1094 break;
1095 case kIeee754Float64Sinh:
1096 ASSEMBLE_IEEE754_UNOP(sinh);
1097 break;
1098 case kIeee754Float64Tan:
1099 ASSEMBLE_IEEE754_UNOP(tan);
1100 break;
1101 case kIeee754Float64Tanh:
1102 ASSEMBLE_IEEE754_UNOP(tanh);
1103 break;
1104 case kX64Add32:
1105 ASSEMBLE_BINOP(addl);
1106 break;
1107 case kX64Add:
1108 ASSEMBLE_BINOP(addq);
1109 break;
1110 case kX64Sub32:
1111 ASSEMBLE_BINOP(subl);
1112 break;
1113 case kX64Sub:
1114 ASSEMBLE_BINOP(subq);
1115 break;
1116 case kX64And32:
1117 ASSEMBLE_BINOP(andl);
1118 break;
1119 case kX64And:
1120 ASSEMBLE_BINOP(andq);
1121 break;
1122 case kX64Cmp8:
1123 ASSEMBLE_COMPARE(cmpb);
1124 break;
1125 case kX64Cmp16:
1126 ASSEMBLE_COMPARE(cmpw);
1127 break;
1128 case kX64Cmp32:
1129 ASSEMBLE_COMPARE(cmpl);
1130 break;
1131 case kX64Cmp:
1132 ASSEMBLE_COMPARE(cmpq);
1133 break;
1134 case kX64Test8:
1135 ASSEMBLE_COMPARE(testb);
1136 break;
1137 case kX64Test16:
1138 ASSEMBLE_COMPARE(testw);
1139 break;
1140 case kX64Test32:
1141 ASSEMBLE_COMPARE(testl);
1142 break;
1143 case kX64Test:
1144 ASSEMBLE_COMPARE(testq);
1145 break;
1146 case kX64Imul32:
1147 ASSEMBLE_MULT(imull);
1148 break;
1149 case kX64Imul:
1150 ASSEMBLE_MULT(imulq);
1151 break;
1152 case kX64ImulHigh32:
1153 if (instr->InputAt(1)->IsRegister()) {
1154 __ imull(i.InputRegister(1));
1155 } else {
1156 __ imull(i.InputOperand(1));
1157 }
1158 break;
1159 case kX64UmulHigh32:
1160 if (instr->InputAt(1)->IsRegister()) {
1161 __ mull(i.InputRegister(1));
1162 } else {
1163 __ mull(i.InputOperand(1));
1164 }
1165 break;
1166 case kX64Idiv32:
1167 __ cdq();
1168 __ idivl(i.InputRegister(1));
1169 break;
1170 case kX64Idiv:
1171 __ cqo();
1172 __ idivq(i.InputRegister(1));
1173 break;
1174 case kX64Udiv32:
1175 __ xorl(rdx, rdx);
1176 __ divl(i.InputRegister(1));
1177 break;
1178 case kX64Udiv:
1179 __ xorq(rdx, rdx);
1180 __ divq(i.InputRegister(1));
1181 break;
1182 case kX64Not:
1183 ASSEMBLE_UNOP(notq);
1184 break;
1185 case kX64Not32:
1186 ASSEMBLE_UNOP(notl);
1187 break;
1188 case kX64Neg:
1189 ASSEMBLE_UNOP(negq);
1190 break;
1191 case kX64Neg32:
1192 ASSEMBLE_UNOP(negl);
1193 break;
1194 case kX64Or32:
1195 ASSEMBLE_BINOP(orl);
1196 break;
1197 case kX64Or:
1198 ASSEMBLE_BINOP(orq);
1199 break;
1200 case kX64Xor32:
1201 ASSEMBLE_BINOP(xorl);
1202 break;
1203 case kX64Xor:
1204 ASSEMBLE_BINOP(xorq);
1205 break;
1206 case kX64Shl32:
1207 ASSEMBLE_SHIFT(shll, 5);
1208 break;
1209 case kX64Shl:
1210 ASSEMBLE_SHIFT(shlq, 6);
1211 break;
1212 case kX64Shr32:
1213 ASSEMBLE_SHIFT(shrl, 5);
1214 break;
1215 case kX64Shr:
1216 ASSEMBLE_SHIFT(shrq, 6);
1217 break;
1218 case kX64Sar32:
1219 ASSEMBLE_SHIFT(sarl, 5);
1220 break;
1221 case kX64Sar:
1222 ASSEMBLE_SHIFT(sarq, 6);
1223 break;
1224 case kX64Ror32:
1225 ASSEMBLE_SHIFT(rorl, 5);
1226 break;
1227 case kX64Ror:
1228 ASSEMBLE_SHIFT(rorq, 6);
1229 break;
1230 case kX64Lzcnt:
1231 if (instr->InputAt(0)->IsRegister()) {
1232 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1233 } else {
1234 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1235 }
1236 break;
1237 case kX64Lzcnt32:
1238 if (instr->InputAt(0)->IsRegister()) {
1239 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1240 } else {
1241 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1242 }
1243 break;
1244 case kX64Tzcnt:
1245 if (instr->InputAt(0)->IsRegister()) {
1246 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1247 } else {
1248 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1249 }
1250 break;
1251 case kX64Tzcnt32:
1252 if (instr->InputAt(0)->IsRegister()) {
1253 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1254 } else {
1255 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1256 }
1257 break;
1258 case kX64Popcnt:
1259 if (instr->InputAt(0)->IsRegister()) {
1260 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1261 } else {
1262 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1263 }
1264 break;
1265 case kX64Popcnt32:
1266 if (instr->InputAt(0)->IsRegister()) {
1267 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1268 } else {
1269 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1270 }
1271 break;
1272 case kX64Bswap:
1273 __ bswapq(i.OutputRegister());
1274 break;
1275 case kX64Bswap32:
1276 __ bswapl(i.OutputRegister());
1277 break;
1278 case kSSEFloat32Cmp:
1279 ASSEMBLE_SSE_BINOP(Ucomiss);
1280 break;
1281 case kSSEFloat32Add:
1282 ASSEMBLE_SSE_BINOP(addss);
1283 break;
1284 case kSSEFloat32Sub:
1285 ASSEMBLE_SSE_BINOP(subss);
1286 break;
1287 case kSSEFloat32Mul:
1288 ASSEMBLE_SSE_BINOP(mulss);
1289 break;
1290 case kSSEFloat32Div:
1291 ASSEMBLE_SSE_BINOP(divss);
1292 // Don't delete this mov. It may improve performance on some CPUs,
1293 // when there is a (v)mulss depending on the result.
1294 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1295 break;
1296 case kSSEFloat32Abs: {
1297 // TODO(bmeurer): Use RIP relative 128-bit constants.
1298 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1299 __ psrlq(kScratchDoubleReg, 33);
1300 __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1301 break;
1302 }
1303 case kSSEFloat32Neg: {
1304 // TODO(bmeurer): Use RIP relative 128-bit constants.
1305 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1306 __ psllq(kScratchDoubleReg, 31);
1307 __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1308 break;
1309 }
1310 case kSSEFloat32Sqrt:
1311 ASSEMBLE_SSE_UNOP(sqrtss);
1312 break;
1313 case kSSEFloat32ToFloat64:
1314 ASSEMBLE_SSE_UNOP(Cvtss2sd);
1315 break;
1316 case kSSEFloat32Round: {
1317 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1318 RoundingMode const mode =
1319 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1320 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1321 break;
1322 }
1323 case kSSEFloat32ToInt32:
1324 if (instr->InputAt(0)->IsFPRegister()) {
1325 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1326 } else {
1327 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1328 }
1329 break;
1330 case kSSEFloat32ToUint32: {
1331 if (instr->InputAt(0)->IsFPRegister()) {
1332 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1333 } else {
1334 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1335 }
1336 break;
1337 }
1338 case kSSEFloat64Cmp:
1339 ASSEMBLE_SSE_BINOP(Ucomisd);
1340 break;
1341 case kSSEFloat64Add:
1342 ASSEMBLE_SSE_BINOP(addsd);
1343 break;
1344 case kSSEFloat64Sub:
1345 ASSEMBLE_SSE_BINOP(subsd);
1346 break;
1347 case kSSEFloat64Mul:
1348 ASSEMBLE_SSE_BINOP(mulsd);
1349 break;
1350 case kSSEFloat64Div:
1351 ASSEMBLE_SSE_BINOP(divsd);
1352 // Don't delete this mov. It may improve performance on some CPUs,
1353 // when there is a (v)mulsd depending on the result.
1354 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1355 break;
1356 case kSSEFloat64Mod: {
1357 __ subq(rsp, Immediate(kDoubleSize));
1358 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1359 kDoubleSize);
1360 // Move values to st(0) and st(1).
1361 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1362 __ fld_d(Operand(rsp, 0));
1363 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1364 __ fld_d(Operand(rsp, 0));
1365 // Loop while fprem isn't done.
1366 Label mod_loop;
1367 __ bind(&mod_loop);
1368 // This instructions traps on all kinds inputs, but we are assuming the
1369 // floating point control word is set to ignore them all.
1370 __ fprem();
1371 // The following 2 instruction implicitly use rax.
1372 __ fnstsw_ax();
1373 if (CpuFeatures::IsSupported(SAHF)) {
1374 CpuFeatureScope sahf_scope(tasm(), SAHF);
1375 __ sahf();
1376 } else {
1377 __ shrl(rax, Immediate(8));
1378 __ andl(rax, Immediate(0xFF));
1379 __ pushq(rax);
1380 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1381 kSystemPointerSize);
1382 __ popfq();
1383 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1384 -kSystemPointerSize);
1385 }
1386 __ j(parity_even, &mod_loop);
1387 // Move output to stack and clean up.
1388 __ fstp(1);
1389 __ fstp_d(Operand(rsp, 0));
1390 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1391 __ addq(rsp, Immediate(kDoubleSize));
1392 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1393 -kDoubleSize);
1394 break;
1395 }
1396 case kSSEFloat32Max: {
1397 Label compare_swap, done_compare;
1398 if (instr->InputAt(1)->IsFPRegister()) {
1399 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1400 } else {
1401 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1402 }
1403 auto ool =
1404 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1405 __ j(parity_even, ool->entry());
1406 __ j(above, &done_compare, Label::kNear);
1407 __ j(below, &compare_swap, Label::kNear);
1408 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1409 __ testl(kScratchRegister, Immediate(1));
1410 __ j(zero, &done_compare, Label::kNear);
1411 __ bind(&compare_swap);
1412 if (instr->InputAt(1)->IsFPRegister()) {
1413 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1414 } else {
1415 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1416 }
1417 __ bind(&done_compare);
1418 __ bind(ool->exit());
1419 break;
1420 }
1421 case kSSEFloat32Min: {
1422 Label compare_swap, done_compare;
1423 if (instr->InputAt(1)->IsFPRegister()) {
1424 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1425 } else {
1426 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1427 }
1428 auto ool =
1429 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1430 __ j(parity_even, ool->entry());
1431 __ j(below, &done_compare, Label::kNear);
1432 __ j(above, &compare_swap, Label::kNear);
1433 if (instr->InputAt(1)->IsFPRegister()) {
1434 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1435 } else {
1436 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1437 __ Movmskps(kScratchRegister, kScratchDoubleReg);
1438 }
1439 __ testl(kScratchRegister, Immediate(1));
1440 __ j(zero, &done_compare, Label::kNear);
1441 __ bind(&compare_swap);
1442 if (instr->InputAt(1)->IsFPRegister()) {
1443 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1444 } else {
1445 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1446 }
1447 __ bind(&done_compare);
1448 __ bind(ool->exit());
1449 break;
1450 }
1451 case kSSEFloat64Max: {
1452 Label compare_swap, done_compare;
1453 if (instr->InputAt(1)->IsFPRegister()) {
1454 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1455 } else {
1456 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1457 }
1458 auto ool =
1459 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1460 __ j(parity_even, ool->entry());
1461 __ j(above, &done_compare, Label::kNear);
1462 __ j(below, &compare_swap, Label::kNear);
1463 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1464 __ testl(kScratchRegister, Immediate(1));
1465 __ j(zero, &done_compare, Label::kNear);
1466 __ bind(&compare_swap);
1467 if (instr->InputAt(1)->IsFPRegister()) {
1468 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1469 } else {
1470 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1471 }
1472 __ bind(&done_compare);
1473 __ bind(ool->exit());
1474 break;
1475 }
1476 case kSSEFloat64Min: {
1477 Label compare_swap, done_compare;
1478 if (instr->InputAt(1)->IsFPRegister()) {
1479 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1480 } else {
1481 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1482 }
1483 auto ool =
1484 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1485 __ j(parity_even, ool->entry());
1486 __ j(below, &done_compare, Label::kNear);
1487 __ j(above, &compare_swap, Label::kNear);
1488 if (instr->InputAt(1)->IsFPRegister()) {
1489 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1490 } else {
1491 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1492 __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1493 }
1494 __ testl(kScratchRegister, Immediate(1));
1495 __ j(zero, &done_compare, Label::kNear);
1496 __ bind(&compare_swap);
1497 if (instr->InputAt(1)->IsFPRegister()) {
1498 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1499 } else {
1500 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1501 }
1502 __ bind(&done_compare);
1503 __ bind(ool->exit());
1504 break;
1505 }
1506 case kSSEFloat64Abs: {
1507 // TODO(bmeurer): Use RIP relative 128-bit constants.
1508 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1509 __ psrlq(kScratchDoubleReg, 1);
1510 __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1511 break;
1512 }
1513 case kSSEFloat64Neg: {
1514 // TODO(bmeurer): Use RIP relative 128-bit constants.
1515 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1516 __ psllq(kScratchDoubleReg, 63);
1517 __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1518 break;
1519 }
1520 case kSSEFloat64Sqrt:
1521 ASSEMBLE_SSE_UNOP(Sqrtsd);
1522 break;
1523 case kSSEFloat64Round: {
1524 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1525 RoundingMode const mode =
1526 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1527 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1528 break;
1529 }
1530 case kSSEFloat64ToFloat32:
1531 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1532 break;
1533 case kSSEFloat64ToInt32:
1534 if (instr->InputAt(0)->IsFPRegister()) {
1535 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1536 } else {
1537 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1538 }
1539 break;
1540 case kSSEFloat64ToUint32: {
1541 if (instr->InputAt(0)->IsFPRegister()) {
1542 __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1543 } else {
1544 __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1545 }
1546 if (MiscField::decode(instr->opcode())) {
1547 __ AssertZeroExtended(i.OutputRegister());
1548 }
1549 break;
1550 }
1551 case kSSEFloat32ToInt64:
1552 if (instr->InputAt(0)->IsFPRegister()) {
1553 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1554 } else {
1555 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1556 }
1557 if (instr->OutputCount() > 1) {
1558 __ Set(i.OutputRegister(1), 1);
1559 Label done;
1560 Label fail;
1561 __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1562 if (instr->InputAt(0)->IsFPRegister()) {
1563 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1564 } else {
1565 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1566 }
1567 // If the input is NaN, then the conversion fails.
1568 __ j(parity_even, &fail);
1569 // If the input is INT64_MIN, then the conversion succeeds.
1570 __ j(equal, &done);
1571 __ cmpq(i.OutputRegister(0), Immediate(1));
1572 // If the conversion results in INT64_MIN, but the input was not
1573 // INT64_MIN, then the conversion fails.
1574 __ j(no_overflow, &done);
1575 __ bind(&fail);
1576 __ Set(i.OutputRegister(1), 0);
1577 __ bind(&done);
1578 }
1579 break;
1580 case kSSEFloat64ToInt64:
1581 if (instr->InputAt(0)->IsFPRegister()) {
1582 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1583 } else {
1584 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1585 }
1586 if (instr->OutputCount() > 1) {
1587 __ Set(i.OutputRegister(1), 1);
1588 Label done;
1589 Label fail;
1590 __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1591 if (instr->InputAt(0)->IsFPRegister()) {
1592 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1593 } else {
1594 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1595 }
1596 // If the input is NaN, then the conversion fails.
1597 __ j(parity_even, &fail);
1598 // If the input is INT64_MIN, then the conversion succeeds.
1599 __ j(equal, &done);
1600 __ cmpq(i.OutputRegister(0), Immediate(1));
1601 // If the conversion results in INT64_MIN, but the input was not
1602 // INT64_MIN, then the conversion fails.
1603 __ j(no_overflow, &done);
1604 __ bind(&fail);
1605 __ Set(i.OutputRegister(1), 0);
1606 __ bind(&done);
1607 }
1608 break;
1609 case kSSEFloat32ToUint64: {
1610 Label fail;
1611 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1612 if (instr->InputAt(0)->IsFPRegister()) {
1613 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1614 } else {
1615 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1616 }
1617 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1618 __ bind(&fail);
1619 break;
1620 }
1621 case kSSEFloat64ToUint64: {
1622 Label fail;
1623 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1624 if (instr->InputAt(0)->IsFPRegister()) {
1625 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1626 } else {
1627 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1628 }
1629 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1630 __ bind(&fail);
1631 break;
1632 }
1633 case kSSEInt32ToFloat64:
1634 if (instr->InputAt(0)->IsRegister()) {
1635 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1636 } else {
1637 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1638 }
1639 break;
1640 case kSSEInt32ToFloat32:
1641 if (instr->InputAt(0)->IsRegister()) {
1642 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1643 } else {
1644 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1645 }
1646 break;
1647 case kSSEInt64ToFloat32:
1648 if (instr->InputAt(0)->IsRegister()) {
1649 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1650 } else {
1651 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1652 }
1653 break;
1654 case kSSEInt64ToFloat64:
1655 if (instr->InputAt(0)->IsRegister()) {
1656 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1657 } else {
1658 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1659 }
1660 break;
1661 case kSSEUint64ToFloat32:
1662 if (instr->InputAt(0)->IsRegister()) {
1663 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1664 } else {
1665 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1666 }
1667 break;
1668 case kSSEUint64ToFloat64:
1669 if (instr->InputAt(0)->IsRegister()) {
1670 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1671 } else {
1672 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1673 }
1674 break;
1675 case kSSEUint32ToFloat64:
1676 if (instr->InputAt(0)->IsRegister()) {
1677 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1678 } else {
1679 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1680 }
1681 break;
1682 case kSSEUint32ToFloat32:
1683 if (instr->InputAt(0)->IsRegister()) {
1684 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1685 } else {
1686 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1687 }
1688 break;
1689 case kSSEFloat64ExtractLowWord32:
1690 if (instr->InputAt(0)->IsFPStackSlot()) {
1691 __ movl(i.OutputRegister(), i.InputOperand(0));
1692 } else {
1693 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1694 }
1695 break;
1696 case kSSEFloat64ExtractHighWord32:
1697 if (instr->InputAt(0)->IsFPStackSlot()) {
1698 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1699 } else {
1700 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1701 }
1702 break;
1703 case kSSEFloat64InsertLowWord32:
1704 if (instr->InputAt(1)->IsRegister()) {
1705 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1706 } else {
1707 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1708 }
1709 break;
1710 case kSSEFloat64InsertHighWord32:
1711 if (instr->InputAt(1)->IsRegister()) {
1712 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1713 } else {
1714 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1715 }
1716 break;
1717 case kSSEFloat64LoadLowWord32:
1718 if (instr->InputAt(0)->IsRegister()) {
1719 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1720 } else {
1721 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1722 }
1723 break;
1724 case kAVXFloat32Cmp: {
1725 CpuFeatureScope avx_scope(tasm(), AVX);
1726 if (instr->InputAt(1)->IsFPRegister()) {
1727 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1728 } else {
1729 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1730 }
1731 break;
1732 }
1733 case kAVXFloat32Add:
1734 ASSEMBLE_AVX_BINOP(vaddss);
1735 break;
1736 case kAVXFloat32Sub:
1737 ASSEMBLE_AVX_BINOP(vsubss);
1738 break;
1739 case kAVXFloat32Mul:
1740 ASSEMBLE_AVX_BINOP(vmulss);
1741 break;
1742 case kAVXFloat32Div:
1743 ASSEMBLE_AVX_BINOP(vdivss);
1744 // Don't delete this mov. It may improve performance on some CPUs,
1745 // when there is a (v)mulss depending on the result.
1746 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1747 break;
1748 case kAVXFloat64Cmp: {
1749 CpuFeatureScope avx_scope(tasm(), AVX);
1750 if (instr->InputAt(1)->IsFPRegister()) {
1751 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1752 } else {
1753 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1754 }
1755 break;
1756 }
1757 case kAVXFloat64Add:
1758 ASSEMBLE_AVX_BINOP(vaddsd);
1759 break;
1760 case kAVXFloat64Sub:
1761 ASSEMBLE_AVX_BINOP(vsubsd);
1762 break;
1763 case kAVXFloat64Mul:
1764 ASSEMBLE_AVX_BINOP(vmulsd);
1765 break;
1766 case kAVXFloat64Div:
1767 ASSEMBLE_AVX_BINOP(vdivsd);
1768 // Don't delete this mov. It may improve performance on some CPUs,
1769 // when there is a (v)mulsd depending on the result.
1770 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1771 break;
1772 case kAVXFloat32Abs: {
1773 // TODO(bmeurer): Use RIP relative 128-bit constants.
1774 CpuFeatureScope avx_scope(tasm(), AVX);
1775 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1776 __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1777 if (instr->InputAt(0)->IsFPRegister()) {
1778 __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1779 i.InputDoubleRegister(0));
1780 } else {
1781 __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1782 i.InputOperand(0));
1783 }
1784 break;
1785 }
1786 case kAVXFloat32Neg: {
1787 // TODO(bmeurer): Use RIP relative 128-bit constants.
1788 CpuFeatureScope avx_scope(tasm(), AVX);
1789 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1790 __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1791 if (instr->InputAt(0)->IsFPRegister()) {
1792 __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1793 i.InputDoubleRegister(0));
1794 } else {
1795 __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1796 i.InputOperand(0));
1797 }
1798 break;
1799 }
1800 case kAVXFloat64Abs: {
1801 // TODO(bmeurer): Use RIP relative 128-bit constants.
1802 CpuFeatureScope avx_scope(tasm(), AVX);
1803 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1804 __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1805 if (instr->InputAt(0)->IsFPRegister()) {
1806 __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1807 i.InputDoubleRegister(0));
1808 } else {
1809 __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1810 i.InputOperand(0));
1811 }
1812 break;
1813 }
1814 case kAVXFloat64Neg: {
1815 // TODO(bmeurer): Use RIP relative 128-bit constants.
1816 CpuFeatureScope avx_scope(tasm(), AVX);
1817 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1818 __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1819 if (instr->InputAt(0)->IsFPRegister()) {
1820 __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1821 i.InputDoubleRegister(0));
1822 } else {
1823 __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1824 i.InputOperand(0));
1825 }
1826 break;
1827 }
1828 case kSSEFloat64SilenceNaN:
1829 __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1830 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1831 break;
1832 case kX64Movsxbl:
1833 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1834 ASSEMBLE_MOVX(movsxbl);
1835 __ AssertZeroExtended(i.OutputRegister());
1836 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1837 break;
1838 case kX64Movzxbl:
1839 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1840 ASSEMBLE_MOVX(movzxbl);
1841 __ AssertZeroExtended(i.OutputRegister());
1842 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1843 break;
1844 case kX64Movsxbq:
1845 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1846 ASSEMBLE_MOVX(movsxbq);
1847 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1848 break;
1849 case kX64Movzxbq:
1850 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1851 ASSEMBLE_MOVX(movzxbq);
1852 __ AssertZeroExtended(i.OutputRegister());
1853 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1854 break;
1855 case kX64Movb: {
1856 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1857 size_t index = 0;
1858 Operand operand = i.MemoryOperand(&index);
1859 if (HasImmediateInput(instr, index)) {
1860 __ movb(operand, Immediate(i.InputInt8(index)));
1861 } else {
1862 __ movb(operand, i.InputRegister(index));
1863 }
1864 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1865 break;
1866 }
1867 case kX64Movsxwl:
1868 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1869 ASSEMBLE_MOVX(movsxwl);
1870 __ AssertZeroExtended(i.OutputRegister());
1871 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1872 break;
1873 case kX64Movzxwl:
1874 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1875 ASSEMBLE_MOVX(movzxwl);
1876 __ AssertZeroExtended(i.OutputRegister());
1877 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1878 break;
1879 case kX64Movsxwq:
1880 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1881 ASSEMBLE_MOVX(movsxwq);
1882 break;
1883 case kX64Movzxwq:
1884 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1885 ASSEMBLE_MOVX(movzxwq);
1886 __ AssertZeroExtended(i.OutputRegister());
1887 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1888 break;
1889 case kX64Movw: {
1890 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1891 size_t index = 0;
1892 Operand operand = i.MemoryOperand(&index);
1893 if (HasImmediateInput(instr, index)) {
1894 __ movw(operand, Immediate(i.InputInt16(index)));
1895 } else {
1896 __ movw(operand, i.InputRegister(index));
1897 }
1898 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1899 break;
1900 }
1901 case kX64Movl:
1902 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1903 if (instr->HasOutput()) {
1904 if (instr->addressing_mode() == kMode_None) {
1905 if (instr->InputAt(0)->IsRegister()) {
1906 __ movl(i.OutputRegister(), i.InputRegister(0));
1907 } else {
1908 __ movl(i.OutputRegister(), i.InputOperand(0));
1909 }
1910 } else {
1911 __ movl(i.OutputRegister(), i.MemoryOperand());
1912 }
1913 __ AssertZeroExtended(i.OutputRegister());
1914 } else {
1915 size_t index = 0;
1916 Operand operand = i.MemoryOperand(&index);
1917 if (HasImmediateInput(instr, index)) {
1918 __ movl(operand, i.InputImmediate(index));
1919 } else {
1920 __ movl(operand, i.InputRegister(index));
1921 }
1922 }
1923 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1924 break;
1925 case kX64Movsxlq:
1926 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1927 ASSEMBLE_MOVX(movsxlq);
1928 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1929 break;
1930 case kX64MovqDecompressTaggedSigned: {
1931 CHECK(instr->HasOutput());
1932 __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1933 break;
1934 }
1935 case kX64MovqDecompressTaggedPointer: {
1936 CHECK(instr->HasOutput());
1937 __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1938 break;
1939 }
1940 case kX64MovqDecompressAnyTagged: {
1941 CHECK(instr->HasOutput());
1942 __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1943 break;
1944 }
1945 case kX64MovqCompressTagged: {
1946 CHECK(!instr->HasOutput());
1947 size_t index = 0;
1948 Operand operand = i.MemoryOperand(&index);
1949 if (HasImmediateInput(instr, index)) {
1950 __ StoreTaggedField(operand, i.InputImmediate(index));
1951 } else {
1952 __ StoreTaggedField(operand, i.InputRegister(index));
1953 }
1954 break;
1955 }
1956 case kX64DecompressSigned: {
1957 CHECK(instr->HasOutput());
1958 ASSEMBLE_MOVX(movsxlq);
1959 break;
1960 }
1961 case kX64DecompressPointer: {
1962 CHECK(instr->HasOutput());
1963 ASSEMBLE_MOVX(movsxlq);
1964 __ addq(i.OutputRegister(), kRootRegister);
1965 break;
1966 }
1967 case kX64DecompressAny: {
1968 CHECK(instr->HasOutput());
1969 ASSEMBLE_MOVX(movsxlq);
1970 // TODO(solanes): Do branchful compute?
1971 // Branchlessly compute |masked_root|:
1972 STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
1973 Register masked_root = kScratchRegister;
1974 __ movl(masked_root, i.OutputRegister());
1975 __ andl(masked_root, Immediate(kSmiTagMask));
1976 __ negq(masked_root);
1977 __ andq(masked_root, kRootRegister);
1978 // Now this add operation will either leave the value unchanged if it is a
1979 // smi or add the isolate root if it is a heap object.
1980 __ addq(i.OutputRegister(), masked_root);
1981 break;
1982 }
1983 // TODO(solanes): Combine into one Compress? They seem to be identical.
1984 // TODO(solanes): We might get away with doing a no-op in these three cases.
1985 // The movl instruction is the conservative way for the moment.
1986 case kX64CompressSigned: {
1987 ASSEMBLE_MOVX(movl);
1988 break;
1989 }
1990 case kX64CompressPointer: {
1991 ASSEMBLE_MOVX(movl);
1992 break;
1993 }
1994 case kX64CompressAny: {
1995 ASSEMBLE_MOVX(movl);
1996 break;
1997 }
1998 case kX64Movq:
1999 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2000 if (instr->HasOutput()) {
2001 __ movq(i.OutputRegister(), i.MemoryOperand());
2002 } else {
2003 size_t index = 0;
2004 Operand operand = i.MemoryOperand(&index);
2005 if (HasImmediateInput(instr, index)) {
2006 __ movq(operand, i.InputImmediate(index));
2007 } else {
2008 __ movq(operand, i.InputRegister(index));
2009 }
2010 }
2011 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2012 break;
2013 case kX64Movss:
2014 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2015 if (instr->HasOutput()) {
2016 __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
2017 } else {
2018 size_t index = 0;
2019 Operand operand = i.MemoryOperand(&index);
2020 __ movss(operand, i.InputDoubleRegister(index));
2021 }
2022 break;
2023 case kX64Movsd: {
2024 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2025 if (instr->HasOutput()) {
2026 const MemoryAccessMode access_mode =
2027 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2028 if (access_mode == kMemoryAccessPoisoned) {
2029 // If we have to poison the loaded value, we load into a general
2030 // purpose register first, mask it with the poison, and move the
2031 // value from the general purpose register into the double register.
2032 __ movq(kScratchRegister, i.MemoryOperand());
2033 __ andq(kScratchRegister, kSpeculationPoisonRegister);
2034 __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2035 } else {
2036 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2037 }
2038 } else {
2039 size_t index = 0;
2040 Operand operand = i.MemoryOperand(&index);
2041 __ Movsd(operand, i.InputDoubleRegister(index));
2042 }
2043 break;
2044 }
2045 case kX64Movdqu: {
2046 CpuFeatureScope sse_scope(tasm(), SSSE3);
2047 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2048 if (instr->HasOutput()) {
2049 __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2050 } else {
2051 size_t index = 0;
2052 Operand operand = i.MemoryOperand(&index);
2053 __ movdqu(operand, i.InputSimd128Register(index));
2054 }
2055 break;
2056 }
2057 case kX64BitcastFI:
2058 if (instr->InputAt(0)->IsFPStackSlot()) {
2059 __ movl(i.OutputRegister(), i.InputOperand(0));
2060 } else {
2061 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2062 }
2063 break;
2064 case kX64BitcastDL:
2065 if (instr->InputAt(0)->IsFPStackSlot()) {
2066 __ movq(i.OutputRegister(), i.InputOperand(0));
2067 } else {
2068 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2069 }
2070 break;
2071 case kX64BitcastIF:
2072 if (instr->InputAt(0)->IsRegister()) {
2073 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2074 } else {
2075 __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
2076 }
2077 break;
2078 case kX64BitcastLD:
2079 if (instr->InputAt(0)->IsRegister()) {
2080 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2081 } else {
2082 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2083 }
2084 break;
2085 case kX64Lea32: {
2086 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2087 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2088 // and addressing mode just happens to work out. The "addl"/"subl" forms
2089 // in these cases are faster based on measurements.
2090 if (i.InputRegister(0) == i.OutputRegister()) {
2091 if (mode == kMode_MRI) {
2092 int32_t constant_summand = i.InputInt32(1);
2093 DCHECK_NE(0, constant_summand);
2094 if (constant_summand > 0) {
2095 __ addl(i.OutputRegister(), Immediate(constant_summand));
2096 } else {
2097 __ subl(i.OutputRegister(),
2098 Immediate(base::NegateWithWraparound(constant_summand)));
2099 }
2100 } else if (mode == kMode_MR1) {
2101 if (i.InputRegister(1) == i.OutputRegister()) {
2102 __ shll(i.OutputRegister(), Immediate(1));
2103 } else {
2104 __ addl(i.OutputRegister(), i.InputRegister(1));
2105 }
2106 } else if (mode == kMode_M2) {
2107 __ shll(i.OutputRegister(), Immediate(1));
2108 } else if (mode == kMode_M4) {
2109 __ shll(i.OutputRegister(), Immediate(2));
2110 } else if (mode == kMode_M8) {
2111 __ shll(i.OutputRegister(), Immediate(3));
2112 } else {
2113 __ leal(i.OutputRegister(), i.MemoryOperand());
2114 }
2115 } else if (mode == kMode_MR1 &&
2116 i.InputRegister(1) == i.OutputRegister()) {
2117 __ addl(i.OutputRegister(), i.InputRegister(0));
2118 } else {
2119 __ leal(i.OutputRegister(), i.MemoryOperand());
2120 }
2121 __ AssertZeroExtended(i.OutputRegister());
2122 break;
2123 }
2124 case kX64Lea: {
2125 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2126 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2127 // and addressing mode just happens to work out. The "addq"/"subq" forms
2128 // in these cases are faster based on measurements.
2129 if (i.InputRegister(0) == i.OutputRegister()) {
2130 if (mode == kMode_MRI) {
2131 int32_t constant_summand = i.InputInt32(1);
2132 if (constant_summand > 0) {
2133 __ addq(i.OutputRegister(), Immediate(constant_summand));
2134 } else if (constant_summand < 0) {
2135 __ subq(i.OutputRegister(), Immediate(-constant_summand));
2136 }
2137 } else if (mode == kMode_MR1) {
2138 if (i.InputRegister(1) == i.OutputRegister()) {
2139 __ shlq(i.OutputRegister(), Immediate(1));
2140 } else {
2141 __ addq(i.OutputRegister(), i.InputRegister(1));
2142 }
2143 } else if (mode == kMode_M2) {
2144 __ shlq(i.OutputRegister(), Immediate(1));
2145 } else if (mode == kMode_M4) {
2146 __ shlq(i.OutputRegister(), Immediate(2));
2147 } else if (mode == kMode_M8) {
2148 __ shlq(i.OutputRegister(), Immediate(3));
2149 } else {
2150 __ leaq(i.OutputRegister(), i.MemoryOperand());
2151 }
2152 } else if (mode == kMode_MR1 &&
2153 i.InputRegister(1) == i.OutputRegister()) {
2154 __ addq(i.OutputRegister(), i.InputRegister(0));
2155 } else {
2156 __ leaq(i.OutputRegister(), i.MemoryOperand());
2157 }
2158 break;
2159 }
2160 case kX64Dec32:
2161 __ decl(i.OutputRegister());
2162 break;
2163 case kX64Inc32:
2164 __ incl(i.OutputRegister());
2165 break;
2166 case kX64Push:
2167 if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2168 size_t index = 0;
2169 Operand operand = i.MemoryOperand(&index);
2170 __ pushq(operand);
2171 frame_access_state()->IncreaseSPDelta(1);
2172 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2173 kSystemPointerSize);
2174 } else if (HasImmediateInput(instr, 0)) {
2175 __ pushq(i.InputImmediate(0));
2176 frame_access_state()->IncreaseSPDelta(1);
2177 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2178 kSystemPointerSize);
2179 } else if (instr->InputAt(0)->IsRegister()) {
2180 __ pushq(i.InputRegister(0));
2181 frame_access_state()->IncreaseSPDelta(1);
2182 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2183 kSystemPointerSize);
2184 } else if (instr->InputAt(0)->IsFloatRegister() ||
2185 instr->InputAt(0)->IsDoubleRegister()) {
2186 // TODO(titzer): use another machine instruction?
2187 __ subq(rsp, Immediate(kDoubleSize));
2188 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2189 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2190 kDoubleSize);
2191 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2192 } else if (instr->InputAt(0)->IsSimd128Register()) {
2193 // TODO(titzer): use another machine instruction?
2194 __ subq(rsp, Immediate(kSimd128Size));
2195 frame_access_state()->IncreaseSPDelta(kSimd128Size /
2196 kSystemPointerSize);
2197 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2198 kSimd128Size);
2199 __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2200 } else if (instr->InputAt(0)->IsStackSlot() ||
2201 instr->InputAt(0)->IsFloatStackSlot() ||
2202 instr->InputAt(0)->IsDoubleStackSlot()) {
2203 __ pushq(i.InputOperand(0));
2204 frame_access_state()->IncreaseSPDelta(1);
2205 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2206 kSystemPointerSize);
2207 } else {
2208 DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2209 __ Movups(kScratchDoubleReg, i.InputOperand(0));
2210 // TODO(titzer): use another machine instruction?
2211 __ subq(rsp, Immediate(kSimd128Size));
2212 frame_access_state()->IncreaseSPDelta(kSimd128Size /
2213 kSystemPointerSize);
2214 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2215 kSimd128Size);
2216 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2217 }
2218 break;
2219 case kX64Poke: {
2220 int slot = MiscField::decode(instr->opcode());
2221 if (HasImmediateInput(instr, 0)) {
2222 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2223 } else {
2224 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2225 }
2226 break;
2227 }
2228 case kX64Peek: {
2229 int reverse_slot = i.InputInt32(0);
2230 int offset =
2231 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2232 if (instr->OutputAt(0)->IsFPRegister()) {
2233 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2234 if (op->representation() == MachineRepresentation::kFloat64) {
2235 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2236 } else {
2237 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2238 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2239 }
2240 } else {
2241 __ movq(i.OutputRegister(), Operand(rbp, offset));
2242 }
2243 break;
2244 }
2245 // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2246 case kX64F32x4Splat: {
2247 XMMRegister dst = i.OutputSimd128Register();
2248 if (instr->InputAt(0)->IsFPRegister()) {
2249 __ movss(dst, i.InputDoubleRegister(0));
2250 } else {
2251 __ movss(dst, i.InputOperand(0));
2252 }
2253 __ shufps(dst, dst, 0x0);
2254 break;
2255 }
2256 case kX64F32x4ExtractLane: {
2257 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2258 __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2259 __ movd(i.OutputDoubleRegister(), kScratchRegister);
2260 break;
2261 }
2262 case kX64F32x4ReplaceLane: {
2263 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2264 // The insertps instruction uses imm8[5:4] to indicate the lane
2265 // that needs to be replaced.
2266 byte select = i.InputInt8(1) << 4 & 0x30;
2267 if (instr->InputAt(2)->IsFPRegister()) {
2268 __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2269 select);
2270 } else {
2271 __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2272 }
2273 break;
2274 }
2275 case kX64F32x4SConvertI32x4: {
2276 __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2277 break;
2278 }
2279 case kX64F32x4UConvertI32x4: {
2280 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2281 DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2282 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2283 XMMRegister dst = i.OutputSimd128Register();
2284 __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2285 __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2286 __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2287 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2288 __ psrld(dst, 1); // divide by 2 to get in unsigned range
2289 __ cvtdq2ps(dst, dst); // convert hi exactly
2290 __ addps(dst, dst); // double hi, exactly
2291 __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2292 break;
2293 }
2294 case kX64F32x4Abs: {
2295 XMMRegister dst = i.OutputSimd128Register();
2296 XMMRegister src = i.InputSimd128Register(0);
2297 if (dst == src) {
2298 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2299 __ psrld(kScratchDoubleReg, 1);
2300 __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2301 } else {
2302 __ pcmpeqd(dst, dst);
2303 __ psrld(dst, 1);
2304 __ andps(dst, i.InputSimd128Register(0));
2305 }
2306 break;
2307 }
2308 case kX64F32x4Neg: {
2309 XMMRegister dst = i.OutputSimd128Register();
2310 XMMRegister src = i.InputSimd128Register(0);
2311 if (dst == src) {
2312 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2313 __ pslld(kScratchDoubleReg, 31);
2314 __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2315 } else {
2316 __ pcmpeqd(dst, dst);
2317 __ pslld(dst, 31);
2318 __ xorps(dst, i.InputSimd128Register(0));
2319 }
2320 break;
2321 }
2322 case kX64F32x4RecipApprox: {
2323 __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2324 break;
2325 }
2326 case kX64F32x4RecipSqrtApprox: {
2327 __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2328 break;
2329 }
2330 case kX64F32x4Add: {
2331 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2332 __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2333 break;
2334 }
2335 case kX64F32x4AddHoriz: {
2336 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2337 CpuFeatureScope sse_scope(tasm(), SSE3);
2338 __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2339 break;
2340 }
2341 case kX64F32x4Sub: {
2342 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2343 __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2344 break;
2345 }
2346 case kX64F32x4Mul: {
2347 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2348 __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2349 break;
2350 }
2351 case kX64F32x4Min: {
2352 XMMRegister src1 = i.InputSimd128Register(1),
2353 dst = i.OutputSimd128Register();
2354 DCHECK_EQ(dst, i.InputSimd128Register(0));
2355 // The minps instruction doesn't propagate NaNs and +0's in its first
2356 // operand. Perform minps in both orders, merge the resuls, and adjust.
2357 __ movaps(kScratchDoubleReg, src1);
2358 __ minps(kScratchDoubleReg, dst);
2359 __ minps(dst, src1);
2360 // propagate -0's and NaNs, which may be non-canonical.
2361 __ orps(kScratchDoubleReg, dst);
2362 // Canonicalize NaNs by quieting and clearing the payload.
2363 __ cmpps(dst, kScratchDoubleReg, 3);
2364 __ orps(kScratchDoubleReg, dst);
2365 __ psrld(dst, 10);
2366 __ andnps(dst, kScratchDoubleReg);
2367 break;
2368 }
2369 case kX64F32x4Max: {
2370 XMMRegister src1 = i.InputSimd128Register(1),
2371 dst = i.OutputSimd128Register();
2372 DCHECK_EQ(dst, i.InputSimd128Register(0));
2373 // The maxps instruction doesn't propagate NaNs and +0's in its first
2374 // operand. Perform maxps in both orders, merge the resuls, and adjust.
2375 __ movaps(kScratchDoubleReg, src1);
2376 __ maxps(kScratchDoubleReg, dst);
2377 __ maxps(dst, src1);
2378 // Find discrepancies.
2379 __ xorps(dst, kScratchDoubleReg);
2380 // Propagate NaNs, which may be non-canonical.
2381 __ orps(kScratchDoubleReg, dst);
2382 // Propagate sign discrepancy and (subtle) quiet NaNs.
2383 __ subps(kScratchDoubleReg, dst);
2384 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2385 __ cmpps(dst, kScratchDoubleReg, 3);
2386 __ psrld(dst, 10);
2387 __ andnps(dst, kScratchDoubleReg);
2388 break;
2389 }
2390 case kX64F32x4Eq: {
2391 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2392 __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2393 break;
2394 }
2395 case kX64F32x4Ne: {
2396 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2397 __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2398 break;
2399 }
2400 case kX64F32x4Lt: {
2401 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2402 __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2403 break;
2404 }
2405 case kX64F32x4Le: {
2406 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2407 __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2408 break;
2409 }
2410 case kX64I32x4Splat: {
2411 XMMRegister dst = i.OutputSimd128Register();
2412 if (instr->InputAt(0)->IsRegister()) {
2413 __ movd(dst, i.InputRegister(0));
2414 } else {
2415 __ movd(dst, i.InputOperand(0));
2416 }
2417 __ pshufd(dst, dst, 0x0);
2418 break;
2419 }
2420 case kX64I32x4ExtractLane: {
2421 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2422 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2423 break;
2424 }
2425 case kX64I32x4ReplaceLane: {
2426 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2427 if (instr->InputAt(2)->IsRegister()) {
2428 __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2429 i.InputInt8(1));
2430 } else {
2431 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2432 }
2433 break;
2434 }
2435 case kX64I32x4SConvertF32x4: {
2436 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2437 XMMRegister dst = i.OutputSimd128Register();
2438 // NAN->0
2439 __ movaps(kScratchDoubleReg, dst);
2440 __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2441 __ pand(dst, kScratchDoubleReg);
2442 // Set top bit if >= 0 (but not -0.0!)
2443 __ pxor(kScratchDoubleReg, dst);
2444 // Convert
2445 __ cvttps2dq(dst, dst);
2446 // Set top bit if >=0 is now < 0
2447 __ pand(kScratchDoubleReg, dst);
2448 __ psrad(kScratchDoubleReg, 31);
2449 // Set positive overflow lanes to 0x7FFFFFFF
2450 __ pxor(dst, kScratchDoubleReg);
2451 break;
2452 }
2453 case kX64I32x4SConvertI16x8Low: {
2454 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2455 __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2456 break;
2457 }
2458 case kX64I32x4SConvertI16x8High: {
2459 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2460 XMMRegister dst = i.OutputSimd128Register();
2461 __ palignr(dst, i.InputSimd128Register(0), 8);
2462 __ pmovsxwd(dst, dst);
2463 break;
2464 }
2465 case kX64I32x4Neg: {
2466 CpuFeatureScope sse_scope(tasm(), SSSE3);
2467 XMMRegister dst = i.OutputSimd128Register();
2468 XMMRegister src = i.InputSimd128Register(0);
2469 if (dst == src) {
2470 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2471 __ psignd(dst, kScratchDoubleReg);
2472 } else {
2473 __ pxor(dst, dst);
2474 __ psubd(dst, src);
2475 }
2476 break;
2477 }
2478 case kX64I32x4Shl: {
2479 __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2480 break;
2481 }
2482 case kX64I32x4ShrS: {
2483 __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2484 break;
2485 }
2486 case kX64I32x4Add: {
2487 __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2488 break;
2489 }
2490 case kX64I32x4AddHoriz: {
2491 CpuFeatureScope sse_scope(tasm(), SSSE3);
2492 __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2493 break;
2494 }
2495 case kX64I32x4Sub: {
2496 __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2497 break;
2498 }
2499 case kX64I32x4Mul: {
2500 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2501 __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2502 break;
2503 }
2504 case kX64I32x4MinS: {
2505 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2506 __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2507 break;
2508 }
2509 case kX64I32x4MaxS: {
2510 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2511 __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2512 break;
2513 }
2514 case kX64I32x4Eq: {
2515 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2516 break;
2517 }
2518 case kX64I32x4Ne: {
2519 __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2520 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2521 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2522 break;
2523 }
2524 case kX64I32x4GtS: {
2525 __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2526 break;
2527 }
2528 case kX64I32x4GeS: {
2529 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2530 XMMRegister dst = i.OutputSimd128Register();
2531 XMMRegister src = i.InputSimd128Register(1);
2532 __ pminsd(dst, src);
2533 __ pcmpeqd(dst, src);
2534 break;
2535 }
2536 case kX64I32x4UConvertF32x4: {
2537 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2538 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2539 XMMRegister dst = i.OutputSimd128Register();
2540 XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2541 // NAN->0, negative->0
2542 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2543 __ maxps(dst, kScratchDoubleReg);
2544 // scratch: float representation of max_signed
2545 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2546 __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2547 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2548 // tmp: convert (src-max_signed).
2549 // Positive overflow lanes -> 0x7FFFFFFF
2550 // Negative lanes -> 0
2551 __ movaps(tmp, dst);
2552 __ subps(tmp, kScratchDoubleReg);
2553 __ cmpleps(kScratchDoubleReg, tmp);
2554 __ cvttps2dq(tmp, tmp);
2555 __ pxor(tmp, kScratchDoubleReg);
2556 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2557 __ pmaxsd(tmp, kScratchDoubleReg);
2558 // convert. Overflow lanes above max_signed will be 0x80000000
2559 __ cvttps2dq(dst, dst);
2560 // Add (src-max_signed) for overflow lanes.
2561 __ paddd(dst, tmp);
2562 break;
2563 }
2564 case kX64I32x4UConvertI16x8Low: {
2565 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2566 __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2567 break;
2568 }
2569 case kX64I32x4UConvertI16x8High: {
2570 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2571 XMMRegister dst = i.OutputSimd128Register();
2572 __ palignr(dst, i.InputSimd128Register(0), 8);
2573 __ pmovzxwd(dst, dst);
2574 break;
2575 }
2576 case kX64I32x4ShrU: {
2577 __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2578 break;
2579 }
2580 case kX64I32x4MinU: {
2581 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2582 __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2583 break;
2584 }
2585 case kX64I32x4MaxU: {
2586 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2587 __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2588 break;
2589 }
2590 case kX64I32x4GtU: {
2591 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2592 XMMRegister dst = i.OutputSimd128Register();
2593 XMMRegister src = i.InputSimd128Register(1);
2594 __ pmaxud(dst, src);
2595 __ pcmpeqd(dst, src);
2596 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2597 __ pxor(dst, kScratchDoubleReg);
2598 break;
2599 }
2600 case kX64I32x4GeU: {
2601 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2602 XMMRegister dst = i.OutputSimd128Register();
2603 XMMRegister src = i.InputSimd128Register(1);
2604 __ pminud(dst, src);
2605 __ pcmpeqd(dst, src);
2606 break;
2607 }
2608 case kX64S128Zero: {
2609 XMMRegister dst = i.OutputSimd128Register();
2610 __ xorps(dst, dst);
2611 break;
2612 }
2613 case kX64I16x8Splat: {
2614 XMMRegister dst = i.OutputSimd128Register();
2615 if (instr->InputAt(0)->IsRegister()) {
2616 __ movd(dst, i.InputRegister(0));
2617 } else {
2618 __ movd(dst, i.InputOperand(0));
2619 }
2620 __ pshuflw(dst, dst, 0x0);
2621 __ pshufd(dst, dst, 0x0);
2622 break;
2623 }
2624 case kX64I16x8ExtractLane: {
2625 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2626 Register dst = i.OutputRegister();
2627 __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2628 __ movsxwl(dst, dst);
2629 break;
2630 }
2631 case kX64I16x8ReplaceLane: {
2632 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2633 if (instr->InputAt(2)->IsRegister()) {
2634 __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2635 i.InputInt8(1));
2636 } else {
2637 __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2638 }
2639 break;
2640 }
2641 case kX64I16x8SConvertI8x16Low: {
2642 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2643 __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2644 break;
2645 }
2646 case kX64I16x8SConvertI8x16High: {
2647 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2648 XMMRegister dst = i.OutputSimd128Register();
2649 __ palignr(dst, i.InputSimd128Register(0), 8);
2650 __ pmovsxbw(dst, dst);
2651 break;
2652 }
2653 case kX64I16x8Neg: {
2654 CpuFeatureScope sse_scope(tasm(), SSSE3);
2655 XMMRegister dst = i.OutputSimd128Register();
2656 XMMRegister src = i.InputSimd128Register(0);
2657 if (dst == src) {
2658 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2659 __ psignw(dst, kScratchDoubleReg);
2660 } else {
2661 __ pxor(dst, dst);
2662 __ psubw(dst, src);
2663 }
2664 break;
2665 }
2666 case kX64I16x8Shl: {
2667 __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2668 break;
2669 }
2670 case kX64I16x8ShrS: {
2671 __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2672 break;
2673 }
2674 case kX64I16x8SConvertI32x4: {
2675 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2676 __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2677 break;
2678 }
2679 case kX64I16x8Add: {
2680 __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2681 break;
2682 }
2683 case kX64I16x8AddSaturateS: {
2684 __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2685 break;
2686 }
2687 case kX64I16x8AddHoriz: {
2688 CpuFeatureScope sse_scope(tasm(), SSSE3);
2689 __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2690 break;
2691 }
2692 case kX64I16x8Sub: {
2693 __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2694 break;
2695 }
2696 case kX64I16x8SubSaturateS: {
2697 __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2698 break;
2699 }
2700 case kX64I16x8Mul: {
2701 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2702 __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2703 break;
2704 }
2705 case kX64I16x8MinS: {
2706 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2707 __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2708 break;
2709 }
2710 case kX64I16x8MaxS: {
2711 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2712 __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2713 break;
2714 }
2715 case kX64I16x8Eq: {
2716 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2717 break;
2718 }
2719 case kX64I16x8Ne: {
2720 __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2721 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2722 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2723 break;
2724 }
2725 case kX64I16x8GtS: {
2726 __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2727 break;
2728 }
2729 case kX64I16x8GeS: {
2730 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2731 XMMRegister dst = i.OutputSimd128Register();
2732 XMMRegister src = i.InputSimd128Register(1);
2733 __ pminsw(dst, src);
2734 __ pcmpeqw(dst, src);
2735 break;
2736 }
2737 case kX64I16x8UConvertI8x16Low: {
2738 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2739 __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2740 break;
2741 }
2742 case kX64I16x8UConvertI8x16High: {
2743 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2744 XMMRegister dst = i.OutputSimd128Register();
2745 __ palignr(dst, i.InputSimd128Register(0), 8);
2746 __ pmovzxbw(dst, dst);
2747 break;
2748 }
2749 case kX64I16x8ShrU: {
2750 __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2751 break;
2752 }
2753 case kX64I16x8UConvertI32x4: {
2754 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2755 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2756 XMMRegister dst = i.OutputSimd128Register();
2757 // Change negative lanes to 0x7FFFFFFF
2758 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2759 __ psrld(kScratchDoubleReg, 1);
2760 __ pminud(dst, kScratchDoubleReg);
2761 __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2762 __ packusdw(dst, kScratchDoubleReg);
2763 break;
2764 }
2765 case kX64I16x8AddSaturateU: {
2766 __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2767 break;
2768 }
2769 case kX64I16x8SubSaturateU: {
2770 __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2771 break;
2772 }
2773 case kX64I16x8MinU: {
2774 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2775 __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2776 break;
2777 }
2778 case kX64I16x8MaxU: {
2779 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2780 __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2781 break;
2782 }
2783 case kX64I16x8GtU: {
2784 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2785 XMMRegister dst = i.OutputSimd128Register();
2786 XMMRegister src = i.InputSimd128Register(1);
2787 __ pmaxuw(dst, src);
2788 __ pcmpeqw(dst, src);
2789 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2790 __ pxor(dst, kScratchDoubleReg);
2791 break;
2792 }
2793 case kX64I16x8GeU: {
2794 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2795 XMMRegister dst = i.OutputSimd128Register();
2796 XMMRegister src = i.InputSimd128Register(1);
2797 __ pminuw(dst, src);
2798 __ pcmpeqw(dst, src);
2799 break;
2800 }
2801 case kX64I8x16Splat: {
2802 CpuFeatureScope sse_scope(tasm(), SSSE3);
2803 XMMRegister dst = i.OutputSimd128Register();
2804 if (instr->InputAt(0)->IsRegister()) {
2805 __ movd(dst, i.InputRegister(0));
2806 } else {
2807 __ movd(dst, i.InputOperand(0));
2808 }
2809 __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2810 __ pshufb(dst, kScratchDoubleReg);
2811 break;
2812 }
2813 case kX64I8x16ExtractLane: {
2814 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2815 Register dst = i.OutputRegister();
2816 __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2817 __ movsxbl(dst, dst);
2818 break;
2819 }
2820 case kX64I8x16ReplaceLane: {
2821 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2822 if (instr->InputAt(2)->IsRegister()) {
2823 __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2824 i.InputInt8(1));
2825 } else {
2826 __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2827 }
2828 break;
2829 }
2830 case kX64I8x16SConvertI16x8: {
2831 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2832 __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2833 break;
2834 }
2835 case kX64I8x16Neg: {
2836 CpuFeatureScope sse_scope(tasm(), SSSE3);
2837 XMMRegister dst = i.OutputSimd128Register();
2838 XMMRegister src = i.InputSimd128Register(0);
2839 if (dst == src) {
2840 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2841 __ psignb(dst, kScratchDoubleReg);
2842 } else {
2843 __ pxor(dst, dst);
2844 __ psubb(dst, src);
2845 }
2846 break;
2847 }
2848 case kX64I8x16Shl: {
2849 XMMRegister dst = i.OutputSimd128Register();
2850 DCHECK_EQ(dst, i.InputSimd128Register(0));
2851 int8_t shift = i.InputInt8(1) & 0x7;
2852 if (shift < 4) {
2853 // For small shifts, doubling is faster.
2854 for (int i = 0; i < shift; ++i) {
2855 __ paddb(dst, dst);
2856 }
2857 } else {
2858 // Mask off the unwanted bits before word-shifting.
2859 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2860 __ psrlw(kScratchDoubleReg, 8 + shift);
2861 __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2862 __ pand(dst, kScratchDoubleReg);
2863 __ psllw(dst, shift);
2864 }
2865 break;
2866 }
2867 case kX64I8x16ShrS: {
2868 XMMRegister dst = i.OutputSimd128Register();
2869 XMMRegister src = i.InputSimd128Register(0);
2870 int8_t shift = i.InputInt8(1) & 0x7;
2871 // Unpack the bytes into words, do arithmetic shifts, and repack.
2872 __ punpckhbw(kScratchDoubleReg, src);
2873 __ punpcklbw(dst, src);
2874 __ psraw(kScratchDoubleReg, 8 + shift);
2875 __ psraw(dst, 8 + shift);
2876 __ packsswb(dst, kScratchDoubleReg);
2877 break;
2878 }
2879 case kX64I8x16Add: {
2880 __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2881 break;
2882 }
2883 case kX64I8x16AddSaturateS: {
2884 __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2885 break;
2886 }
2887 case kX64I8x16Sub: {
2888 __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2889 break;
2890 }
2891 case kX64I8x16SubSaturateS: {
2892 __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2893 break;
2894 }
2895 case kX64I8x16Mul: {
2896 XMMRegister dst = i.OutputSimd128Register();
2897 DCHECK_EQ(dst, i.InputSimd128Register(0));
2898 XMMRegister right = i.InputSimd128Register(1);
2899 XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2900 // I16x8 view of I8x16
2901 // left = AAaa AAaa ... AAaa AAaa
2902 // right= BBbb BBbb ... BBbb BBbb
2903 // t = 00AA 00AA ... 00AA 00AA
2904 // s = 00BB 00BB ... 00BB 00BB
2905 __ movaps(tmp, dst);
2906 __ movaps(kScratchDoubleReg, right);
2907 __ psrlw(tmp, 8);
2908 __ psrlw(kScratchDoubleReg, 8);
2909 // dst = left * 256
2910 __ psllw(dst, 8);
2911 // t = I16x8Mul(t, s)
2912 // => __PP __PP ... __PP __PP
2913 __ pmullw(tmp, kScratchDoubleReg);
2914 // dst = I16x8Mul(left * 256, right)
2915 // => pp__ pp__ ... pp__ pp__
2916 __ pmullw(dst, right);
2917 // t = I16x8Shl(t, 8)
2918 // => PP00 PP00 ... PP00 PP00
2919 __ psllw(tmp, 8);
2920 // dst = I16x8Shr(dst, 8)
2921 // => 00pp 00pp ... 00pp 00pp
2922 __ psrlw(dst, 8);
2923 // dst = I16x8Or(dst, t)
2924 // => PPpp PPpp ... PPpp PPpp
2925 __ por(dst, tmp);
2926 break;
2927 }
2928 case kX64I8x16MinS: {
2929 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2930 __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2931 break;
2932 }
2933 case kX64I8x16MaxS: {
2934 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2935 __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2936 break;
2937 }
2938 case kX64I8x16Eq: {
2939 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2940 break;
2941 }
2942 case kX64I8x16Ne: {
2943 __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2944 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2945 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2946 break;
2947 }
2948 case kX64I8x16GtS: {
2949 __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2950 break;
2951 }
2952 case kX64I8x16GeS: {
2953 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2954 XMMRegister dst = i.OutputSimd128Register();
2955 XMMRegister src = i.InputSimd128Register(1);
2956 __ pminsb(dst, src);
2957 __ pcmpeqb(dst, src);
2958 break;
2959 }
2960 case kX64I8x16UConvertI16x8: {
2961 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2962 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2963 XMMRegister dst = i.OutputSimd128Register();
2964 // Change negative lanes to 0x7FFF
2965 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2966 __ psrlw(kScratchDoubleReg, 1);
2967 __ pminuw(dst, kScratchDoubleReg);
2968 __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2969 __ packuswb(dst, kScratchDoubleReg);
2970 break;
2971 }
2972 case kX64I8x16ShrU: {
2973 XMMRegister dst = i.OutputSimd128Register();
2974 XMMRegister src = i.InputSimd128Register(0);
2975 int8_t shift = i.InputInt8(1) & 0x7;
2976 // Unpack the bytes into words, do logical shifts, and repack.
2977 __ punpckhbw(kScratchDoubleReg, src);
2978 __ punpcklbw(dst, src);
2979 __ psrlw(kScratchDoubleReg, 8 + shift);
2980 __ psrlw(dst, 8 + shift);
2981 __ packuswb(dst, kScratchDoubleReg);
2982 break;
2983 }
2984 case kX64I8x16AddSaturateU: {
2985 __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2986 break;
2987 }
2988 case kX64I8x16SubSaturateU: {
2989 __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2990 break;
2991 }
2992 case kX64I8x16MinU: {
2993 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2994 __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2995 break;
2996 }
2997 case kX64I8x16MaxU: {
2998 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2999 __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3000 break;
3001 }
3002 case kX64I8x16GtU: {
3003 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3004 XMMRegister dst = i.OutputSimd128Register();
3005 XMMRegister src = i.InputSimd128Register(1);
3006 __ pmaxub(dst, src);
3007 __ pcmpeqb(dst, src);
3008 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3009 __ pxor(dst, kScratchDoubleReg);
3010 break;
3011 }
3012 case kX64I8x16GeU: {
3013 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3014 XMMRegister dst = i.OutputSimd128Register();
3015 XMMRegister src = i.InputSimd128Register(1);
3016 __ pminub(dst, src);
3017 __ pcmpeqb(dst, src);
3018 break;
3019 }
3020 case kX64S128And: {
3021 __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
3022 break;
3023 }
3024 case kX64S128Or: {
3025 __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
3026 break;
3027 }
3028 case kX64S128Xor: {
3029 __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
3030 break;
3031 }
3032 case kX64S128Not: {
3033 XMMRegister dst = i.OutputSimd128Register();
3034 XMMRegister src = i.InputSimd128Register(0);
3035 if (dst == src) {
3036 __ movaps(kScratchDoubleReg, dst);
3037 __ pcmpeqd(dst, dst);
3038 __ pxor(dst, kScratchDoubleReg);
3039 } else {
3040 __ pcmpeqd(dst, dst);
3041 __ pxor(dst, src);
3042 }
3043
3044 break;
3045 }
3046 case kX64S128Select: {
3047 // Mask used here is stored in dst.
3048 XMMRegister dst = i.OutputSimd128Register();
3049 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3050 __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3051 __ andps(dst, kScratchDoubleReg);
3052 __ xorps(dst, i.InputSimd128Register(2));
3053 break;
3054 }
3055 case kX64S8x16Shuffle: {
3056 XMMRegister dst = i.OutputSimd128Register();
3057 Register tmp = i.TempRegister(0);
3058 // Prepare 16 byte aligned buffer for shuffle control mask
3059 __ movq(tmp, rsp);
3060 __ andq(rsp, Immediate(-16));
3061 if (instr->InputCount() == 5) { // only one input operand
3062 uint32_t mask[4] = {};
3063 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3064 for (int j = 4; j > 0; j--) {
3065 mask[j - 1] = i.InputUint32(j);
3066 }
3067
3068 SetupShuffleMaskOnStack(tasm(), mask);
3069 __ pshufb(dst, Operand(rsp, 0));
3070 } else { // two input operands
3071 DCHECK_EQ(6, instr->InputCount());
3072 ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
3073 uint32_t mask[4] = {};
3074 for (int j = 5; j > 1; j--) {
3075 uint32_t lanes = i.InputUint32(j);
3076 for (int k = 0; k < 32; k += 8) {
3077 uint8_t lane = lanes >> k;
3078 mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3079 }
3080 }
3081 SetupShuffleMaskOnStack(tasm(), mask);
3082 __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
3083 uint32_t mask1[4] = {};
3084 if (instr->InputAt(1)->IsSimd128Register()) {
3085 XMMRegister src1 = i.InputSimd128Register(1);
3086 if (src1 != dst) __ movups(dst, src1);
3087 } else {
3088 __ movups(dst, i.InputOperand(1));
3089 }
3090 for (int j = 5; j > 1; j--) {
3091 uint32_t lanes = i.InputUint32(j);
3092 for (int k = 0; k < 32; k += 8) {
3093 uint8_t lane = lanes >> k;
3094 mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3095 }
3096 }
3097 SetupShuffleMaskOnStack(tasm(), mask1);
3098 __ pshufb(dst, Operand(rsp, 0));
3099 __ por(dst, kScratchDoubleReg);
3100 }
3101 __ movq(rsp, tmp);
3102 break;
3103 }
3104 case kX64S32x4Swizzle: {
3105 DCHECK_EQ(2, instr->InputCount());
3106 ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
3107 i.InputInt8(1));
3108 break;
3109 }
3110 case kX64S32x4Shuffle: {
3111 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3112 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3113 int8_t shuffle = i.InputInt8(2);
3114 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3115 ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
3116 ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
3117 __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3118 break;
3119 }
3120 case kX64S16x8Blend: {
3121 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3122 break;
3123 }
3124 case kX64S16x8HalfShuffle1: {
3125 XMMRegister dst = i.OutputSimd128Register();
3126 ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
3127 __ pshufhw(dst, dst, i.InputInt8(2));
3128 break;
3129 }
3130 case kX64S16x8HalfShuffle2: {
3131 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3132 XMMRegister dst = i.OutputSimd128Register();
3133 ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
3134 __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3135 ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
3136 __ pshufhw(dst, dst, i.InputInt8(3));
3137 __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3138 break;
3139 }
3140 case kX64S8x16Alignr: {
3141 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3142 break;
3143 }
3144 case kX64S16x8Dup: {
3145 XMMRegister dst = i.OutputSimd128Register();
3146 int8_t lane = i.InputInt8(1) & 0x7;
3147 int8_t lane4 = lane & 0x3;
3148 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3149 if (lane < 4) {
3150 ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
3151 __ pshufd(dst, dst, 0);
3152 } else {
3153 ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
3154 __ pshufd(dst, dst, 0xaa);
3155 }
3156 break;
3157 }
3158 case kX64S8x16Dup: {
3159 XMMRegister dst = i.OutputSimd128Register();
3160 int8_t lane = i.InputInt8(1) & 0xf;
3161 DCHECK_EQ(dst, i.InputSimd128Register(0));
3162 if (lane < 8) {
3163 __ punpcklbw(dst, dst);
3164 } else {
3165 __ punpckhbw(dst, dst);
3166 }
3167 lane &= 0x7;
3168 int8_t lane4 = lane & 0x3;
3169 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3170 if (lane < 4) {
3171 __ pshuflw(dst, dst, half_dup);
3172 __ pshufd(dst, dst, 0);
3173 } else {
3174 __ pshufhw(dst, dst, half_dup);
3175 __ pshufd(dst, dst, 0xaa);
3176 }
3177 break;
3178 }
3179 case kX64S64x2UnpackHigh:
3180 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3181 break;
3182 case kX64S32x4UnpackHigh:
3183 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3184 break;
3185 case kX64S16x8UnpackHigh:
3186 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3187 break;
3188 case kX64S8x16UnpackHigh:
3189 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3190 break;
3191 case kX64S64x2UnpackLow:
3192 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3193 break;
3194 case kX64S32x4UnpackLow:
3195 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3196 break;
3197 case kX64S16x8UnpackLow:
3198 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3199 break;
3200 case kX64S8x16UnpackLow:
3201 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3202 break;
3203 case kX64S16x8UnzipHigh: {
3204 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3205 XMMRegister dst = i.OutputSimd128Register();
3206 XMMRegister src2 = dst;
3207 DCHECK_EQ(dst, i.InputSimd128Register(0));
3208 if (instr->InputCount() == 2) {
3209 ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3210 __ psrld(kScratchDoubleReg, 16);
3211 src2 = kScratchDoubleReg;
3212 }
3213 __ psrld(dst, 16);
3214 __ packusdw(dst, src2);
3215 break;
3216 }
3217 case kX64S16x8UnzipLow: {
3218 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3219 XMMRegister dst = i.OutputSimd128Register();
3220 XMMRegister src2 = dst;
3221 DCHECK_EQ(dst, i.InputSimd128Register(0));
3222 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3223 if (instr->InputCount() == 2) {
3224 ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
3225 src2 = kScratchDoubleReg;
3226 }
3227 __ pblendw(dst, kScratchDoubleReg, 0xaa);
3228 __ packusdw(dst, src2);
3229 break;
3230 }
3231 case kX64S8x16UnzipHigh: {
3232 XMMRegister dst = i.OutputSimd128Register();
3233 XMMRegister src2 = dst;
3234 DCHECK_EQ(dst, i.InputSimd128Register(0));
3235 if (instr->InputCount() == 2) {
3236 ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3237 __ psrlw(kScratchDoubleReg, 8);
3238 src2 = kScratchDoubleReg;
3239 }
3240 __ psrlw(dst, 8);
3241 __ packuswb(dst, src2);
3242 break;
3243 }
3244 case kX64S8x16UnzipLow: {
3245 XMMRegister dst = i.OutputSimd128Register();
3246 XMMRegister src2 = dst;
3247 DCHECK_EQ(dst, i.InputSimd128Register(0));
3248 if (instr->InputCount() == 2) {
3249 ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3250 __ psllw(kScratchDoubleReg, 8);
3251 __ psrlw(kScratchDoubleReg, 8);
3252 src2 = kScratchDoubleReg;
3253 }
3254 __ psllw(dst, 8);
3255 __ psrlw(dst, 8);
3256 __ packuswb(dst, src2);
3257 break;
3258 }
3259 case kX64S8x16TransposeLow: {
3260 XMMRegister dst = i.OutputSimd128Register();
3261 DCHECK_EQ(dst, i.InputSimd128Register(0));
3262 __ psllw(dst, 8);
3263 if (instr->InputCount() == 1) {
3264 __ movups(kScratchDoubleReg, dst);
3265 } else {
3266 DCHECK_EQ(2, instr->InputCount());
3267 ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3268 __ psllw(kScratchDoubleReg, 8);
3269 }
3270 __ psrlw(dst, 8);
3271 __ por(dst, kScratchDoubleReg);
3272 break;
3273 }
3274 case kX64S8x16TransposeHigh: {
3275 XMMRegister dst = i.OutputSimd128Register();
3276 DCHECK_EQ(dst, i.InputSimd128Register(0));
3277 __ psrlw(dst, 8);
3278 if (instr->InputCount() == 1) {
3279 __ movups(kScratchDoubleReg, dst);
3280 } else {
3281 DCHECK_EQ(2, instr->InputCount());
3282 ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3283 __ psrlw(kScratchDoubleReg, 8);
3284 }
3285 __ psllw(kScratchDoubleReg, 8);
3286 __ por(dst, kScratchDoubleReg);
3287 break;
3288 }
3289 case kX64S8x8Reverse:
3290 case kX64S8x4Reverse:
3291 case kX64S8x2Reverse: {
3292 DCHECK_EQ(1, instr->InputCount());
3293 XMMRegister dst = i.OutputSimd128Register();
3294 DCHECK_EQ(dst, i.InputSimd128Register(0));
3295 if (arch_opcode != kX64S8x2Reverse) {
3296 // First shuffle words into position.
3297 int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3298 __ pshuflw(dst, dst, shuffle_mask);
3299 __ pshufhw(dst, dst, shuffle_mask);
3300 }
3301 __ movaps(kScratchDoubleReg, dst);
3302 __ psrlw(kScratchDoubleReg, 8);
3303 __ psllw(dst, 8);
3304 __ por(dst, kScratchDoubleReg);
3305 break;
3306 }
3307 case kX64S1x4AnyTrue:
3308 case kX64S1x8AnyTrue:
3309 case kX64S1x16AnyTrue: {
3310 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3311 Register dst = i.OutputRegister();
3312 XMMRegister src = i.InputSimd128Register(0);
3313 Register tmp = i.TempRegister(0);
3314 __ xorq(tmp, tmp);
3315 __ movq(dst, Immediate(1));
3316 __ ptest(src, src);
3317 __ cmovq(zero, dst, tmp);
3318 break;
3319 }
3320 case kX64S1x4AllTrue:
3321 case kX64S1x8AllTrue:
3322 case kX64S1x16AllTrue: {
3323 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3324 Register dst = i.OutputRegister();
3325 XMMRegister src = i.InputSimd128Register(0);
3326 Register tmp = i.TempRegister(0);
3327 __ movq(tmp, Immediate(1));
3328 __ xorq(dst, dst);
3329 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3330 __ pxor(kScratchDoubleReg, src);
3331 __ ptest(kScratchDoubleReg, kScratchDoubleReg);
3332 __ cmovq(zero, dst, tmp);
3333 break;
3334 }
3335 case kX64StackCheck:
3336 __ CompareRoot(rsp, RootIndex::kStackLimit);
3337 break;
3338 case kWord32AtomicExchangeInt8: {
3339 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3340 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3341 break;
3342 }
3343 case kWord32AtomicExchangeUint8: {
3344 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3345 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3346 break;
3347 }
3348 case kWord32AtomicExchangeInt16: {
3349 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3350 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3351 break;
3352 }
3353 case kWord32AtomicExchangeUint16: {
3354 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3355 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3356 break;
3357 }
3358 case kWord32AtomicExchangeWord32: {
3359 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3360 break;
3361 }
3362 case kWord32AtomicCompareExchangeInt8: {
3363 __ lock();
3364 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3365 __ movsxbl(rax, rax);
3366 break;
3367 }
3368 case kWord32AtomicCompareExchangeUint8: {
3369 __ lock();
3370 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3371 __ movzxbl(rax, rax);
3372 break;
3373 }
3374 case kWord32AtomicCompareExchangeInt16: {
3375 __ lock();
3376 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3377 __ movsxwl(rax, rax);
3378 break;
3379 }
3380 case kWord32AtomicCompareExchangeUint16: {
3381 __ lock();
3382 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3383 __ movzxwl(rax, rax);
3384 break;
3385 }
3386 case kWord32AtomicCompareExchangeWord32: {
3387 __ lock();
3388 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3389 break;
3390 }
3391#define ATOMIC_BINOP_CASE(op, inst) \
3392 case kWord32Atomic##op##Int8: \
3393 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3394 __ movsxbl(rax, rax); \
3395 break; \
3396 case kWord32Atomic##op##Uint8: \
3397 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3398 __ movzxbl(rax, rax); \
3399 break; \
3400 case kWord32Atomic##op##Int16: \
3401 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3402 __ movsxwl(rax, rax); \
3403 break; \
3404 case kWord32Atomic##op##Uint16: \
3405 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3406 __ movzxwl(rax, rax); \
3407 break; \
3408 case kWord32Atomic##op##Word32: \
3409 ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3410 break;
3411 ATOMIC_BINOP_CASE(Add, addl)
3412 ATOMIC_BINOP_CASE(Sub, subl)
3413 ATOMIC_BINOP_CASE(And, andl)
3414 ATOMIC_BINOP_CASE(Or, orl)
3415 ATOMIC_BINOP_CASE(Xor, xorl)
3416#undef ATOMIC_BINOP_CASE
3417 case kX64Word64AtomicExchangeUint8: {
3418 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3419 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3420 break;
3421 }
3422 case kX64Word64AtomicExchangeUint16: {
3423 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3424 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3425 break;
3426 }
3427 case kX64Word64AtomicExchangeUint32: {
3428 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3429 break;
3430 }
3431 case kX64Word64AtomicExchangeUint64: {
3432 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3433 break;
3434 }
3435 case kX64Word64AtomicCompareExchangeUint8: {
3436 __ lock();
3437 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3438 __ movzxbq(rax, rax);
3439 break;
3440 }
3441 case kX64Word64AtomicCompareExchangeUint16: {
3442 __ lock();
3443 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3444 __ movzxwq(rax, rax);
3445 break;
3446 }
3447 case kX64Word64AtomicCompareExchangeUint32: {
3448 __ lock();
3449 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3450 break;
3451 }
3452 case kX64Word64AtomicCompareExchangeUint64: {
3453 __ lock();
3454 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3455 break;
3456 }
3457#define ATOMIC64_BINOP_CASE(op, inst) \
3458 case kX64Word64Atomic##op##Uint8: \
3459 ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3460 __ movzxbq(rax, rax); \
3461 break; \
3462 case kX64Word64Atomic##op##Uint16: \
3463 ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3464 __ movzxwq(rax, rax); \
3465 break; \
3466 case kX64Word64Atomic##op##Uint32: \
3467 ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3468 break; \
3469 case kX64Word64Atomic##op##Uint64: \
3470 ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3471 break;
3472 ATOMIC64_BINOP_CASE(Add, addq)
3473 ATOMIC64_BINOP_CASE(Sub, subq)
3474 ATOMIC64_BINOP_CASE(And, andq)
3475 ATOMIC64_BINOP_CASE(Or, orq)
3476 ATOMIC64_BINOP_CASE(Xor, xorq)
3477#undef ATOMIC64_BINOP_CASE
3478 case kWord32AtomicLoadInt8:
3479 case kWord32AtomicLoadUint8:
3480 case kWord32AtomicLoadInt16:
3481 case kWord32AtomicLoadUint16:
3482 case kWord32AtomicLoadWord32:
3483 case kWord32AtomicStoreWord8:
3484 case kWord32AtomicStoreWord16:
3485 case kWord32AtomicStoreWord32:
3486 case kX64Word64AtomicLoadUint8:
3487 case kX64Word64AtomicLoadUint16:
3488 case kX64Word64AtomicLoadUint32:
3489 case kX64Word64AtomicLoadUint64:
3490 case kX64Word64AtomicStoreWord8:
3491 case kX64Word64AtomicStoreWord16:
3492 case kX64Word64AtomicStoreWord32:
3493 case kX64Word64AtomicStoreWord64:
3494 UNREACHABLE(); // Won't be generated by instruction selector.
3495 break;
3496 }
3497 return kSuccess;
3498} // NOLadability/fn_size)
3499
3500#undef ASSEMBLE_UNOP
3501#undef ASSEMBLE_BINOP
3502#undef ASSEMBLE_COMPARE
3503#undef ASSEMBLE_MULT
3504#undef ASSEMBLE_SHIFT
3505#undef ASSEMBLE_MOVX
3506#undef ASSEMBLE_SSE_BINOP
3507#undef ASSEMBLE_SSE_UNOP
3508#undef ASSEMBLE_AVX_BINOP
3509#undef ASSEMBLE_IEEE754_BINOP
3510#undef ASSEMBLE_IEEE754_UNOP
3511#undef ASSEMBLE_ATOMIC_BINOP
3512#undef ASSEMBLE_ATOMIC64_BINOP
3513#undef ASSEMBLE_SIMD_INSTR
3514#undef ASSEMBLE_SIMD_IMM_INSTR
3515#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
3516#undef ASSEMBLE_SIMD_IMM_SHUFFLE
3517
3518namespace {
3519
3520Condition FlagsConditionToCondition(FlagsCondition condition) {
3521 switch (condition) {
3522 case kUnorderedEqual:
3523 case kEqual:
3524 return equal;
3525 case kUnorderedNotEqual:
3526 case kNotEqual:
3527 return not_equal;
3528 case kSignedLessThan:
3529 return less;
3530 case kSignedGreaterThanOrEqual:
3531 return greater_equal;
3532 case kSignedLessThanOrEqual:
3533 return less_equal;
3534 case kSignedGreaterThan:
3535 return greater;
3536 case kUnsignedLessThan:
3537 return below;
3538 case kUnsignedGreaterThanOrEqual:
3539 return above_equal;
3540 case kUnsignedLessThanOrEqual:
3541 return below_equal;
3542 case kUnsignedGreaterThan:
3543 return above;
3544 case kOverflow:
3545 return overflow;
3546 case kNotOverflow:
3547 return no_overflow;
3548 default:
3549 break;
3550 }
3551 UNREACHABLE();
3552}
3553
3554} // namespace
3555
3556// Assembles branches after this instruction.
3557void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3558 Label::Distance flabel_distance =
3559 branch->fallthru ? Label::kNear : Label::kFar;
3560 Label* tlabel = branch->true_label;
3561 Label* flabel = branch->false_label;
3562 if (branch->condition == kUnorderedEqual) {
3563 __ j(parity_even, flabel, flabel_distance);
3564 } else if (branch->condition == kUnorderedNotEqual) {
3565 __ j(parity_even, tlabel);
3566 }
3567 __ j(FlagsConditionToCondition(branch->condition), tlabel);
3568
3569 if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3570}
3571
3572void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3573 Instruction* instr) {
3574 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3575 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3576 return;
3577 }
3578
3579 condition = NegateFlagsCondition(condition);
3580 __ movl(kScratchRegister, Immediate(0));
3581 __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3582 kScratchRegister);
3583}
3584
3585void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3586 BranchInfo* branch) {
3587 Label::Distance flabel_distance =
3588 branch->fallthru ? Label::kNear : Label::kFar;
3589 Label* tlabel = branch->true_label;
3590 Label* flabel = branch->false_label;
3591 Label nodeopt;
3592 if (branch->condition == kUnorderedEqual) {
3593 __ j(parity_even, flabel, flabel_distance);
3594 } else if (branch->condition == kUnorderedNotEqual) {
3595 __ j(parity_even, tlabel);
3596 }
3597 __ j(FlagsConditionToCondition(branch->condition), tlabel);
3598
3599 if (FLAG_deopt_every_n_times > 0) {
3600 ExternalReference counter =
3601 ExternalReference::stress_deopt_count(isolate());
3602
3603 __ pushfq();
3604 __ pushq(rax);
3605 __ load_rax(counter);
3606 __ decl(rax);
3607 __ j(not_zero, &nodeopt);
3608
3609 __ Set(rax, FLAG_deopt_every_n_times);
3610 __ store_rax(counter);
3611 __ popq(rax);
3612 __ popfq();
3613 __ jmp(tlabel);
3614
3615 __ bind(&nodeopt);
3616 __ store_rax(counter);
3617 __ popq(rax);
3618 __ popfq();
3619 }
3620
3621 if (!branch->fallthru) {
3622 __ jmp(flabel, flabel_distance);
3623 }
3624}
3625
3626void CodeGenerator::AssembleArchJump(RpoNumber target) {
3627 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3628}
3629
3630void CodeGenerator::AssembleArchTrap(Instruction* instr,
3631 FlagsCondition condition) {
3632 auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3633 Label* tlabel = ool->entry();
3634 Label end;
3635 if (condition == kUnorderedEqual) {
3636 __ j(parity_even, &end);
3637 } else if (condition == kUnorderedNotEqual) {
3638 __ j(parity_even, tlabel);
3639 }
3640 __ j(FlagsConditionToCondition(condition), tlabel);
3641 __ bind(&end);
3642}
3643
3644// Assembles boolean materializations after this instruction.
3645void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3646 FlagsCondition condition) {
3647 X64OperandConverter i(this, instr);
3648 Label done;
3649
3650 // Materialize a full 64-bit 1 or 0 value. The result register is always the
3651 // last output of the instruction.
3652 Label check;
3653 DCHECK_NE(0u, instr->OutputCount());
3654 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3655 if (condition == kUnorderedEqual) {
3656 __ j(parity_odd, &check, Label::kNear);
3657 __ movl(reg, Immediate(0));
3658 __ jmp(&done, Label::kNear);
3659 } else if (condition == kUnorderedNotEqual) {
3660 __ j(parity_odd, &check, Label::kNear);
3661 __ movl(reg, Immediate(1));
3662 __ jmp(&done, Label::kNear);
3663 }
3664 __ bind(&check);
3665 __ setcc(FlagsConditionToCondition(condition), reg);
3666 __ movzxbl(reg, reg);
3667 __ bind(&done);
3668}
3669
3670void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3671 X64OperandConverter i(this, instr);
3672 Register input = i.InputRegister(0);
3673 std::vector<std::pair<int32_t, Label*>> cases;
3674 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3675 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3676 }
3677 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3678 cases.data() + cases.size());
3679}
3680
3681void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3682 X64OperandConverter i(this, instr);
3683 Register input = i.InputRegister(0);
3684 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3685 __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3686 __ j(equal, GetLabel(i.InputRpo(index + 1)));
3687 }
3688 AssembleArchJump(i.InputRpo(1));
3689}
3690
3691void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3692 X64OperandConverter i(this, instr);
3693 Register input = i.InputRegister(0);
3694 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3695 Label** cases = zone()->NewArray<Label*>(case_count);
3696 for (int32_t index = 0; index < case_count; ++index) {
3697 cases[index] = GetLabel(i.InputRpo(index + 2));
3698 }
3699 Label* const table = AddJumpTable(cases, case_count);
3700 __ cmpl(input, Immediate(case_count));
3701 __ j(above_equal, GetLabel(i.InputRpo(1)));
3702 __ leaq(kScratchRegister, Operand(table));
3703 __ jmp(Operand(kScratchRegister, input, times_8, 0));
3704}
3705
3706namespace {
3707
3708static const int kQuadWordSize = 16;
3709
3710} // namespace
3711
3712void CodeGenerator::FinishFrame(Frame* frame) {
3713 auto call_descriptor = linkage()->GetIncomingDescriptor();
3714
3715 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3716 if (saves_fp != 0) {
3717 frame->AlignSavedCalleeRegisterSlots();
3718 if (saves_fp != 0) { // Save callee-saved XMM registers.
3719 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3720 frame->AllocateSavedCalleeRegisterSlots(
3721 saves_fp_count * (kQuadWordSize / kSystemPointerSize));
3722 }
3723 }
3724 const RegList saves = call_descriptor->CalleeSavedRegisters();
3725 if (saves != 0) { // Save callee-saved registers.
3726 int count = 0;
3727 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3728 if (((1 << i) & saves)) {
3729 ++count;
3730 }
3731 }
3732 frame->AllocateSavedCalleeRegisterSlots(count);
3733 }
3734}
3735
3736void CodeGenerator::AssembleConstructFrame() {
3737 auto call_descriptor = linkage()->GetIncomingDescriptor();
3738 if (frame_access_state()->has_frame()) {
3739 int pc_base = __ pc_offset();
3740
3741 if (call_descriptor->IsCFunctionCall()) {
3742 __ pushq(rbp);
3743 __ movq(rbp, rsp);
3744 } else if (call_descriptor->IsJSFunctionCall()) {
3745 __ Prologue();
3746 if (call_descriptor->PushArgumentCount()) {
3747 __ pushq(kJavaScriptCallArgCountRegister);
3748 }
3749 } else {
3750 __ StubPrologue(info()->GetOutputStackFrameType());
3751 if (call_descriptor->IsWasmFunctionCall()) {
3752 __ pushq(kWasmInstanceRegister);
3753 } else if (call_descriptor->IsWasmImportWrapper()) {
3754 // WASM import wrappers are passed a tuple in the place of the instance.
3755 // Unpack the tuple into the instance and the target callable.
3756 // This must be done here in the codegen because it cannot be expressed
3757 // properly in the graph.
3758 __ LoadTaggedPointerField(
3759 kJSFunctionRegister,
3760 FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3761 __ LoadTaggedPointerField(
3762 kWasmInstanceRegister,
3763 FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3764 __ pushq(kWasmInstanceRegister);
3765 }
3766 }
3767
3768 unwinding_info_writer_.MarkFrameConstructed(pc_base);
3769 }
3770 int required_slots = frame()->GetTotalFrameSlotCount() -
3771 call_descriptor->CalculateFixedFrameSize();
3772
3773 if (info()->is_osr()) {
3774 // TurboFan OSR-compiled functions cannot be entered directly.
3775 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3776
3777 // Unoptimized code jumps directly to this entrypoint while the unoptimized
3778 // frame is still on the stack. Optimized code uses OSR values directly from
3779 // the unoptimized frame. Thus, all that needs to be done is to allocate the
3780 // remaining stack slots.
3781 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3782 osr_pc_offset_ = __ pc_offset();
3783 required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3784 ResetSpeculationPoison();
3785 }
3786
3787 const RegList saves = call_descriptor->CalleeSavedRegisters();
3788 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3789
3790 if (required_slots > 0) {
3791 DCHECK(frame_access_state()->has_frame());
3792 if (info()->IsWasm() && required_slots > 128) {
3793 // For WebAssembly functions with big frames we have to do the stack
3794 // overflow check before we construct the frame. Otherwise we may not
3795 // have enough space on the stack to call the runtime for the stack
3796 // overflow.
3797 Label done;
3798
3799 // If the frame is bigger than the stack, we throw the stack overflow
3800 // exception unconditionally. Thereby we can avoid the integer overflow
3801 // check in the condition code.
3802 if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
3803 __ movq(kScratchRegister,
3804 FieldOperand(kWasmInstanceRegister,
3805 WasmInstanceObject::kRealStackLimitAddressOffset));
3806 __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3807 __ addq(kScratchRegister,
3808 Immediate(required_slots * kSystemPointerSize));
3809 __ cmpq(rsp, kScratchRegister);
3810 __ j(above_equal, &done);
3811 }
3812
3813 __ near_call(wasm::WasmCode::kWasmStackOverflow,
3814 RelocInfo::WASM_STUB_CALL);
3815 ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3816 RecordSafepoint(reference_map, Safepoint::kSimple,
3817 Safepoint::kNoLazyDeopt);
3818 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3819 __ bind(&done);
3820 }
3821
3822 // Skip callee-saved and return slots, which are created below.
3823 required_slots -= base::bits::CountPopulation(saves);
3824 required_slots -= base::bits::CountPopulation(saves_fp) *
3825 (kQuadWordSize / kSystemPointerSize);
3826 required_slots -= frame()->GetReturnSlotCount();
3827 if (required_slots > 0) {
3828 __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
3829 }
3830 }
3831
3832 if (saves_fp != 0) { // Save callee-saved XMM registers.
3833 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3834 const int stack_size = saves_fp_count * kQuadWordSize;
3835 // Adjust the stack pointer.
3836 __ subq(rsp, Immediate(stack_size));
3837 // Store the registers on the stack.
3838 int slot_idx = 0;
3839 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3840 if (!((1 << i) & saves_fp)) continue;
3841 __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3842 XMMRegister::from_code(i));
3843 slot_idx++;
3844 }
3845 }
3846
3847 if (saves != 0) { // Save callee-saved registers.
3848 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3849 if (!((1 << i) & saves)) continue;
3850 __ pushq(Register::from_code(i));
3851 }
3852 }
3853
3854 // Allocate return slots (located after callee-saved).
3855 if (frame()->GetReturnSlotCount() > 0) {
3856 __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
3857 }
3858}
3859
3860void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3861 auto call_descriptor = linkage()->GetIncomingDescriptor();
3862
3863 // Restore registers.
3864 const RegList saves = call_descriptor->CalleeSavedRegisters();
3865 if (saves != 0) {
3866 const int returns = frame()->GetReturnSlotCount();
3867 if (returns != 0) {
3868 __ addq(rsp, Immediate(returns * kSystemPointerSize));
3869 }
3870 for (int i = 0; i < Register::kNumRegisters; i++) {
3871 if (!((1 << i) & saves)) continue;
3872 __ popq(Register::from_code(i));
3873 }
3874 }
3875 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3876 if (saves_fp != 0) {
3877 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3878 const int stack_size = saves_fp_count * kQuadWordSize;
3879 // Load the registers from the stack.
3880 int slot_idx = 0;
3881 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3882 if (!((1 << i) & saves_fp)) continue;
3883 __ movdqu(XMMRegister::from_code(i),
3884 Operand(rsp, kQuadWordSize * slot_idx));
3885 slot_idx++;
3886 }
3887 // Adjust the stack pointer.
3888 __ addq(rsp, Immediate(stack_size));
3889 }
3890
3891 unwinding_info_writer_.MarkBlockWillExit();
3892
3893 // Might need rcx for scratch if pop_size is too big or if there is a variable
3894 // pop count.
3895 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3896 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3897 size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
3898 X64OperandConverter g(this, nullptr);
3899 if (call_descriptor->IsCFunctionCall()) {
3900 AssembleDeconstructFrame();
3901 } else if (frame_access_state()->has_frame()) {
3902 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3903 // Canonicalize JSFunction return sites for now.
3904 if (return_label_.is_bound()) {
3905 __ jmp(&return_label_);
3906 return;
3907 } else {
3908 __ bind(&return_label_);
3909 AssembleDeconstructFrame();
3910 }
3911 } else {
3912 AssembleDeconstructFrame();
3913 }
3914 }
3915
3916 if (pop->IsImmediate()) {
3917 pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
3918 CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3919 __ Ret(static_cast<int>(pop_size), rcx);
3920 } else {
3921 Register pop_reg = g.ToRegister(pop);
3922 Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3923 __ popq(scratch_reg);
3924 __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3925 __ jmp(scratch_reg);
3926 }
3927}
3928
3929void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3930
3931void CodeGenerator::AssembleMove(InstructionOperand* source,
3932 InstructionOperand* destination) {
3933 X64OperandConverter g(this, nullptr);
3934 // Helper function to write the given constant to the dst register.
3935 auto MoveConstantToRegister = [&](Register dst, Constant src) {
3936 switch (src.type()) {
3937 case Constant::kInt32: {
3938 if (RelocInfo::IsWasmReference(src.rmode())) {
3939 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3940 } else {
3941 int32_t value = src.ToInt32();
3942 if (value == 0) {
3943 __ xorl(dst, dst);
3944 } else {
3945 __ movl(dst, Immediate(value));
3946 }
3947 }
3948 break;
3949 }
3950 case Constant::kInt64:
3951 if (RelocInfo::IsWasmReference(src.rmode())) {
3952 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3953 } else {
3954 __ Set(dst, src.ToInt64());
3955 }
3956 break;
3957 case Constant::kFloat32:
3958 __ MoveNumber(dst, src.ToFloat32());
3959 break;
3960 case Constant::kFloat64:
3961 __ MoveNumber(dst, src.ToFloat64().value());
3962 break;
3963 case Constant::kExternalReference:
3964 __ Move(dst, src.ToExternalReference());
3965 break;
3966 case Constant::kHeapObject: {
3967 Handle<HeapObject> src_object = src.ToHeapObject();
3968 RootIndex index;
3969 if (IsMaterializableFromRoot(src_object, &index)) {
3970 __ LoadRoot(dst, index);
3971 } else {
3972 __ Move(dst, src_object);
3973 }
3974 break;
3975 }
3976 case Constant::kDelayedStringConstant: {
3977 const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3978 __ MoveStringConstant(dst, src_constant);
3979 break;
3980 }
3981 case Constant::kRpoNumber:
3982 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3983 break;
3984 }
3985 };
3986 // Helper function to write the given constant to the stack.
3987 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3988 if (!RelocInfo::IsWasmReference(src.rmode())) {
3989 switch (src.type()) {
3990 case Constant::kInt32:
3991 __ movq(dst, Immediate(src.ToInt32()));
3992 return;
3993 case Constant::kInt64:
3994 __ Set(dst, src.ToInt64());
3995 return;
3996 default:
3997 break;
3998 }
3999 }
4000 MoveConstantToRegister(kScratchRegister, src);
4001 __ movq(dst, kScratchRegister);
4002 };
4003 // Dispatch on the source and destination operand kinds.
4004 switch (MoveType::InferMove(source, destination)) {
4005 case MoveType::kRegisterToRegister:
4006 if (source->IsRegister()) {
4007 __ movq(g.ToRegister(destination), g.ToRegister(source));
4008 } else {
4009 DCHECK(source->IsFPRegister());
4010 __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4011 }
4012 return;
4013 case MoveType::kRegisterToStack: {
4014 Operand dst = g.ToOperand(destination);
4015 if (source->IsRegister()) {
4016 __ movq(dst, g.ToRegister(source));
4017 } else {
4018 DCHECK(source->IsFPRegister());
4019 XMMRegister src = g.ToDoubleRegister(source);
4020 MachineRepresentation rep =
4021 LocationOperand::cast(source)->representation();
4022 if (rep != MachineRepresentation::kSimd128) {
4023 __ Movsd(dst, src);
4024 } else {
4025 __ Movups(dst, src);
4026 }
4027 }
4028 return;
4029 }
4030 case MoveType::kStackToRegister: {
4031 Operand src = g.ToOperand(source);
4032 if (source->IsStackSlot()) {
4033 __ movq(g.ToRegister(destination), src);
4034 } else {
4035 DCHECK(source->IsFPStackSlot());
4036 XMMRegister dst = g.ToDoubleRegister(destination);
4037 MachineRepresentation rep =
4038 LocationOperand::cast(source)->representation();
4039 if (rep != MachineRepresentation::kSimd128) {
4040 __ Movsd(dst, src);
4041 } else {
4042 __ Movups(dst, src);
4043 }
4044 }
4045 return;
4046 }
4047 case MoveType::kStackToStack: {
4048 Operand src = g.ToOperand(source);
4049 Operand dst = g.ToOperand(destination);
4050 if (source->IsStackSlot()) {
4051 // Spill on demand to use a temporary register for memory-to-memory
4052 // moves.
4053 __ movq(kScratchRegister, src);
4054 __ movq(dst, kScratchRegister);
4055 } else {
4056 MachineRepresentation rep =
4057 LocationOperand::cast(source)->representation();
4058 if (rep != MachineRepresentation::kSimd128) {
4059 __ Movsd(kScratchDoubleReg, src);
4060 __ Movsd(dst, kScratchDoubleReg);
4061 } else {
4062 DCHECK(source->IsSimd128StackSlot());
4063 __ Movups(kScratchDoubleReg, src);
4064 __ Movups(dst, kScratchDoubleReg);
4065 }
4066 }
4067 return;
4068 }
4069 case MoveType::kConstantToRegister: {
4070 Constant src = g.ToConstant(source);
4071 if (destination->IsRegister()) {
4072 MoveConstantToRegister(g.ToRegister(destination), src);
4073 } else {
4074 DCHECK(destination->IsFPRegister());
4075 XMMRegister dst = g.ToDoubleRegister(destination);
4076 if (src.type() == Constant::kFloat32) {
4077 // TODO(turbofan): Can we do better here?
4078 __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4079 } else {
4080 DCHECK_EQ(src.type(), Constant::kFloat64);
4081 __ Move(dst, src.ToFloat64().AsUint64());
4082 }
4083 }
4084 return;
4085 }
4086 case MoveType::kConstantToStack: {
4087 Constant src = g.ToConstant(source);
4088 Operand dst = g.ToOperand(destination);
4089 if (destination->IsStackSlot()) {
4090 MoveConstantToSlot(dst, src);
4091 } else {
4092 DCHECK(destination->IsFPStackSlot());
4093 if (src.type() == Constant::kFloat32) {
4094 __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4095 } else {
4096 DCHECK_EQ(src.type(), Constant::kFloat64);
4097 __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4098 __ movq(dst, kScratchRegister);
4099 }
4100 }
4101 return;
4102 }
4103 }
4104 UNREACHABLE();
4105}
4106
4107void CodeGenerator::AssembleSwap(InstructionOperand* source,
4108 InstructionOperand* destination) {
4109 X64OperandConverter g(this, nullptr);
4110 // Dispatch on the source and destination operand kinds. Not all
4111 // combinations are possible.
4112 switch (MoveType::InferSwap(source, destination)) {
4113 case MoveType::kRegisterToRegister: {
4114 if (source->IsRegister()) {
4115 Register src = g.ToRegister(source);
4116 Register dst = g.ToRegister(destination);
4117 __ movq(kScratchRegister, src);
4118 __ movq(src, dst);
4119 __ movq(dst, kScratchRegister);
4120 } else {
4121 DCHECK(source->IsFPRegister());
4122 XMMRegister src = g.ToDoubleRegister(source);
4123 XMMRegister dst = g.ToDoubleRegister(destination);
4124 __ Movapd(kScratchDoubleReg, src);
4125 __ Movapd(src, dst);
4126 __ Movapd(dst, kScratchDoubleReg);
4127 }
4128 return;
4129 }
4130 case MoveType::kRegisterToStack: {
4131 if (source->IsRegister()) {
4132 Register src = g.ToRegister(source);
4133 __ pushq(src);
4134 frame_access_state()->IncreaseSPDelta(1);
4135 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4136 kSystemPointerSize);
4137 __ movq(src, g.ToOperand(destination));
4138 frame_access_state()->IncreaseSPDelta(-1);
4139 __ popq(g.ToOperand(destination));
4140 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4141 -kSystemPointerSize);
4142 } else {
4143 DCHECK(source->IsFPRegister());
4144 XMMRegister src = g.ToDoubleRegister(source);
4145 Operand dst = g.ToOperand(destination);
4146 MachineRepresentation rep =
4147 LocationOperand::cast(source)->representation();
4148 if (rep != MachineRepresentation::kSimd128) {
4149 __ Movsd(kScratchDoubleReg, src);
4150 __ Movsd(src, dst);
4151 __ Movsd(dst, kScratchDoubleReg);
4152 } else {
4153 __ Movups(kScratchDoubleReg, src);
4154 __ Movups(src, dst);
4155 __ Movups(dst, kScratchDoubleReg);
4156 }
4157 }
4158 return;
4159 }
4160 case MoveType::kStackToStack: {
4161 Operand src = g.ToOperand(source);
4162 Operand dst = g.ToOperand(destination);
4163 MachineRepresentation rep =
4164 LocationOperand::cast(source)->representation();
4165 if (rep != MachineRepresentation::kSimd128) {
4166 Register tmp = kScratchRegister;
4167 __ movq(tmp, dst);
4168 __ pushq(src); // Then use stack to copy src to destination.
4169 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4170 kSystemPointerSize);
4171 __ popq(dst);
4172 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4173 -kSystemPointerSize);
4174 __ movq(src, tmp);
4175 } else {
4176 // Without AVX, misaligned reads and writes will trap. Move using the
4177 // stack, in two parts.
4178 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4179 __ pushq(src); // Then use stack to copy src to destination.
4180 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4181 kSystemPointerSize);
4182 __ popq(dst);
4183 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4184 -kSystemPointerSize);
4185 __ pushq(g.ToOperand(source, kSystemPointerSize));
4186 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4187 kSystemPointerSize);
4188 __ popq(g.ToOperand(destination, kSystemPointerSize));
4189 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4190 -kSystemPointerSize);
4191 __ movups(src, kScratchDoubleReg);
4192 }
4193 return;
4194 }
4195 default:
4196 UNREACHABLE();
4197 break;
4198 }
4199}
4200
4201void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4202 for (size_t index = 0; index < target_count; ++index) {
4203 __ dq(targets[index]);
4204 }
4205}
4206
4207#undef __
4208
4209} // namespace compiler
4210} // namespace internal
4211} // namespace v8
4212