| 1 | /* |
| 2 | * Copyright (C) 2019 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "AirAllocateRegistersAndStackAndGenerateCode.h" |
| 28 | |
| 29 | #if ENABLE(B3_JIT) |
| 30 | |
| 31 | #include "AirArgInlines.h" |
| 32 | #include "AirBlockInsertionSet.h" |
| 33 | #include "AirCode.h" |
| 34 | #include "AirHandleCalleeSaves.h" |
| 35 | #include "AirLowerStackArgs.h" |
| 36 | #include "AirStackAllocation.h" |
| 37 | #include "AirTmpMap.h" |
| 38 | #include "CCallHelpers.h" |
| 39 | #include "DisallowMacroScratchRegisterUsage.h" |
| 40 | |
| 41 | namespace JSC { namespace B3 { namespace Air { |
| 42 | |
| 43 | GenerateAndAllocateRegisters::GenerateAndAllocateRegisters(Code& code) |
| 44 | : m_code(code) |
| 45 | , m_map(code) |
| 46 | { } |
| 47 | |
| 48 | ALWAYS_INLINE void GenerateAndAllocateRegisters::checkConsistency() |
| 49 | { |
| 50 | // This isn't exactly the right option for this but adding a new one for just this seems silly. |
| 51 | if (Options::validateGraph() || Options::validateGraphAtEachPhase()) { |
| 52 | m_code.forEachTmp([&] (Tmp tmp) { |
| 53 | Reg reg = m_map[tmp].reg; |
| 54 | if (!reg) |
| 55 | return; |
| 56 | |
| 57 | ASSERT(!m_availableRegs[tmp.bank()].contains(reg)); |
| 58 | ASSERT(m_currentAllocation->at(reg) == tmp); |
| 59 | }); |
| 60 | |
| 61 | for (Reg reg : RegisterSet::allRegisters()) { |
| 62 | if (isDisallowedRegister(reg)) |
| 63 | continue; |
| 64 | |
| 65 | Tmp tmp = m_currentAllocation->at(reg); |
| 66 | if (!tmp) { |
| 67 | ASSERT(m_availableRegs[bankForReg(reg)].contains(reg)); |
| 68 | continue; |
| 69 | } |
| 70 | |
| 71 | ASSERT(!m_availableRegs[tmp.bank()].contains(reg)); |
| 72 | ASSERT(m_map[tmp].reg == reg); |
| 73 | } |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | void GenerateAndAllocateRegisters::buildLiveRanges(UnifiedTmpLiveness& liveness) |
| 78 | { |
| 79 | m_liveRangeEnd = TmpMap<size_t>(m_code, 0); |
| 80 | |
| 81 | m_globalInstIndex = 0; |
| 82 | for (BasicBlock* block : m_code) { |
| 83 | for (Tmp tmp : liveness.liveAtHead(block)) { |
| 84 | if (!tmp.isReg()) |
| 85 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
| 86 | } |
| 87 | for (Inst& inst : *block) { |
| 88 | inst.forEachTmpFast([&] (Tmp tmp) { |
| 89 | if (!tmp.isReg()) |
| 90 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
| 91 | }); |
| 92 | ++m_globalInstIndex; |
| 93 | } |
| 94 | for (Tmp tmp : liveness.liveAtTail(block)) { |
| 95 | if (!tmp.isReg()) |
| 96 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | void GenerateAndAllocateRegisters::insertBlocksForFlushAfterTerminalPatchpoints() |
| 102 | { |
| 103 | BlockInsertionSet blockInsertionSet(m_code); |
| 104 | for (BasicBlock* block : m_code) { |
| 105 | Inst& inst = block->last(); |
| 106 | if (inst.kind.opcode != Patch) |
| 107 | continue; |
| 108 | |
| 109 | HashMap<Tmp, Arg*> needToDef; |
| 110 | |
| 111 | inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) { |
| 112 | if (!arg.isTmp()) |
| 113 | return; |
| 114 | Tmp tmp = arg.tmp(); |
| 115 | if (Arg::isAnyDef(role) && !tmp.isReg()) |
| 116 | needToDef.add(tmp, &arg); |
| 117 | }); |
| 118 | |
| 119 | if (needToDef.isEmpty()) |
| 120 | continue; |
| 121 | |
| 122 | for (FrequentedBlock& frequentedSuccessor : block->successors()) { |
| 123 | BasicBlock* successor = frequentedSuccessor.block(); |
| 124 | BasicBlock* newBlock = blockInsertionSet.insertBefore(successor, successor->frequency()); |
| 125 | newBlock->appendInst(Inst(Jump, inst.origin)); |
| 126 | newBlock->setSuccessors(successor); |
| 127 | newBlock->addPredecessor(block); |
| 128 | frequentedSuccessor.block() = newBlock; |
| 129 | successor->replacePredecessor(block, newBlock); |
| 130 | |
| 131 | m_blocksAfterTerminalPatchForSpilling.add(newBlock, PatchSpillData { CCallHelpers::Jump(), CCallHelpers::Label(), needToDef }); |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | blockInsertionSet.execute(); |
| 136 | } |
| 137 | |
| 138 | static ALWAYS_INLINE CCallHelpers::Address callFrameAddr(CCallHelpers& jit, intptr_t offsetFromFP) |
| 139 | { |
| 140 | if (isX86()) { |
| 141 | ASSERT(Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP).isValidForm(Width64)); |
| 142 | return CCallHelpers::Address(GPRInfo::callFrameRegister, offsetFromFP); |
| 143 | } |
| 144 | |
| 145 | ASSERT(pinnedExtendedOffsetAddrRegister()); |
| 146 | auto addr = Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP); |
| 147 | if (addr.isValidForm(Width64)) |
| 148 | return CCallHelpers::Address(GPRInfo::callFrameRegister, offsetFromFP); |
| 149 | GPRReg reg = *pinnedExtendedOffsetAddrRegister(); |
| 150 | jit.move(CCallHelpers::TrustedImmPtr(offsetFromFP), reg); |
| 151 | jit.add64(GPRInfo::callFrameRegister, reg); |
| 152 | return CCallHelpers::Address(reg); |
| 153 | } |
| 154 | |
| 155 | ALWAYS_INLINE void GenerateAndAllocateRegisters::release(Tmp tmp, Reg reg) |
| 156 | { |
| 157 | ASSERT(reg); |
| 158 | ASSERT(m_currentAllocation->at(reg) == tmp); |
| 159 | m_currentAllocation->at(reg) = Tmp(); |
| 160 | ASSERT(!m_availableRegs[tmp.bank()].contains(reg)); |
| 161 | m_availableRegs[tmp.bank()].set(reg); |
| 162 | ASSERT(m_map[tmp].reg == reg); |
| 163 | m_map[tmp].reg = Reg(); |
| 164 | } |
| 165 | |
| 166 | |
| 167 | ALWAYS_INLINE void GenerateAndAllocateRegisters::flush(Tmp tmp, Reg reg) |
| 168 | { |
| 169 | ASSERT(tmp); |
| 170 | intptr_t offset = m_map[tmp].spillSlot->offsetFromFP(); |
| 171 | if (tmp.isGP()) |
| 172 | m_jit->store64(reg.gpr(), callFrameAddr(*m_jit, offset)); |
| 173 | else |
| 174 | m_jit->storeDouble(reg.fpr(), callFrameAddr(*m_jit, offset)); |
| 175 | } |
| 176 | |
| 177 | ALWAYS_INLINE void GenerateAndAllocateRegisters::spill(Tmp tmp, Reg reg) |
| 178 | { |
| 179 | ASSERT(reg); |
| 180 | ASSERT(m_map[tmp].reg == reg); |
| 181 | flush(tmp, reg); |
| 182 | release(tmp, reg); |
| 183 | } |
| 184 | |
| 185 | ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool isDef) |
| 186 | { |
| 187 | if (Tmp occupyingTmp = m_currentAllocation->at(reg)) |
| 188 | spill(occupyingTmp, reg); |
| 189 | else { |
| 190 | ASSERT(!m_currentAllocation->at(reg)); |
| 191 | ASSERT(m_availableRegs[tmp.bank()].get(reg)); |
| 192 | } |
| 193 | |
| 194 | m_map[tmp].reg = reg; |
| 195 | m_availableRegs[tmp.bank()].clear(reg); |
| 196 | m_currentAllocation->at(reg) = tmp; |
| 197 | |
| 198 | if (!isDef) { |
| 199 | intptr_t offset = m_map[tmp].spillSlot->offsetFromFP(); |
| 200 | if (tmp.bank() == GP) |
| 201 | m_jit->load64(callFrameAddr(*m_jit, offset), reg.gpr()); |
| 202 | else |
| 203 | m_jit->loadDouble(callFrameAddr(*m_jit, offset), reg.fpr()); |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded() |
| 208 | { |
| 209 | if (m_didAlreadyFreeDeadSlots) |
| 210 | return; |
| 211 | |
| 212 | m_didAlreadyFreeDeadSlots = true; |
| 213 | for (size_t i = 0; i < m_currentAllocation->size(); ++i) { |
| 214 | Tmp tmp = m_currentAllocation->at(i); |
| 215 | if (!tmp) |
| 216 | continue; |
| 217 | if (tmp.isReg()) |
| 218 | continue; |
| 219 | if (m_liveRangeEnd[tmp] >= m_globalInstIndex) |
| 220 | continue; |
| 221 | |
| 222 | release(tmp, Reg::fromIndex(i)); |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, bool isDef) |
| 227 | { |
| 228 | ASSERT(!tmp.isReg()); |
| 229 | if (Reg reg = m_map[tmp].reg) { |
| 230 | ASSERT(!m_namedDefdRegs.contains(reg)); |
| 231 | tmp = Tmp(reg); |
| 232 | m_namedUsedRegs.set(reg); |
| 233 | ASSERT(!m_availableRegs[bank].get(reg)); |
| 234 | return true; |
| 235 | } |
| 236 | |
| 237 | if (!m_availableRegs[bank].numberOfSetRegisters()) |
| 238 | freeDeadTmpsIfNeeded(); |
| 239 | |
| 240 | if (m_availableRegs[bank].numberOfSetRegisters()) { |
| 241 | // We first take an available register. |
| 242 | for (Reg reg : m_registers[bank]) { |
| 243 | if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg)) |
| 244 | continue; |
| 245 | if (!m_availableRegs[bank].contains(reg)) |
| 246 | continue; |
| 247 | m_namedUsedRegs.set(reg); // At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs. We just need to mark that we can't use it again. |
| 248 | alloc(tmp, reg, isDef); |
| 249 | tmp = Tmp(reg); |
| 250 | return true; |
| 251 | } |
| 252 | |
| 253 | RELEASE_ASSERT_NOT_REACHED(); |
| 254 | } |
| 255 | |
| 256 | // Nothing was available, let's make some room. |
| 257 | for (Reg reg : m_registers[bank]) { |
| 258 | if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg)) |
| 259 | continue; |
| 260 | |
| 261 | m_namedUsedRegs.set(reg); |
| 262 | |
| 263 | alloc(tmp, reg, isDef); |
| 264 | tmp = Tmp(reg); |
| 265 | return true; |
| 266 | } |
| 267 | |
| 268 | // This can happen if we have a #WarmAnys > #Available registers |
| 269 | return false; |
| 270 | } |
| 271 | |
| 272 | ALWAYS_INLINE bool GenerateAndAllocateRegisters::isDisallowedRegister(Reg reg) |
| 273 | { |
| 274 | return !m_allowedRegisters.get(reg); |
| 275 | } |
| 276 | |
| 277 | void GenerateAndAllocateRegisters::prepareForGeneration() |
| 278 | { |
| 279 | // We pessimistically assume we use all callee saves. |
| 280 | handleCalleeSaves(m_code, RegisterSet::calleeSaveRegisters()); |
| 281 | allocateEscapedStackSlots(m_code); |
| 282 | |
| 283 | // Each Tmp gets its own stack slot. |
| 284 | auto createStackSlot = [&] (const Tmp& tmp) { |
| 285 | TmpData data; |
| 286 | data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill); |
| 287 | data.reg = Reg(); |
| 288 | m_map[tmp] = data; |
| 289 | #if !ASSERT_DISABLED |
| 290 | m_allTmps[tmp.bank()].append(tmp); |
| 291 | #endif |
| 292 | }; |
| 293 | |
| 294 | m_code.forEachTmp([&] (Tmp tmp) { |
| 295 | ASSERT(!tmp.isReg()); |
| 296 | createStackSlot(tmp); |
| 297 | }); |
| 298 | |
| 299 | m_allowedRegisters = RegisterSet(); |
| 300 | |
| 301 | forEachBank([&] (Bank bank) { |
| 302 | m_registers[bank] = m_code.regsInPriorityOrder(bank); |
| 303 | |
| 304 | for (Reg reg : m_registers[bank]) { |
| 305 | m_allowedRegisters.set(reg); |
| 306 | createStackSlot(Tmp(reg)); |
| 307 | } |
| 308 | }); |
| 309 | |
| 310 | { |
| 311 | unsigned nextIndex = 0; |
| 312 | for (StackSlot* slot : m_code.stackSlots()) { |
| 313 | if (slot->isLocked()) |
| 314 | continue; |
| 315 | intptr_t offset = -static_cast<intptr_t>(m_code.frameSize()) - static_cast<intptr_t>(nextIndex) * 8 - 8; |
| 316 | ++nextIndex; |
| 317 | slot->setOffsetFromFP(offset); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | updateFrameSizeBasedOnStackSlots(m_code); |
| 322 | m_code.setStackIsAllocated(true); |
| 323 | |
| 324 | lowerStackArgs(m_code); |
| 325 | |
| 326 | // Verify none of these passes add any tmps. |
| 327 | #if !ASSERT_DISABLED |
| 328 | forEachBank([&] (Bank bank) { |
| 329 | ASSERT(m_allTmps[bank].size() - m_registers[bank].size() == m_code.numTmps(bank)); |
| 330 | }); |
| 331 | #endif |
| 332 | } |
| 333 | |
| 334 | void GenerateAndAllocateRegisters::generate(CCallHelpers& jit) |
| 335 | { |
| 336 | m_jit = &jit; |
| 337 | |
| 338 | TimingScope timingScope("Air::generateAndAllocateRegisters" ); |
| 339 | |
| 340 | insertBlocksForFlushAfterTerminalPatchpoints(); |
| 341 | |
| 342 | DisallowMacroScratchRegisterUsage disallowScratch(*m_jit); |
| 343 | |
| 344 | UnifiedTmpLiveness liveness(m_code); |
| 345 | buildLiveRanges(liveness); |
| 346 | |
| 347 | IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size()); |
| 348 | { |
| 349 | IndexMap<Reg, Tmp> defaultCurrentAllocation(Reg::maxIndex() + 1); |
| 350 | for (BasicBlock* block : m_code) |
| 351 | currentAllocationMap[block] = defaultCurrentAllocation; |
| 352 | |
| 353 | // The only things live that are in registers at the root blocks are |
| 354 | // the explicitly named registers that are live. |
| 355 | |
| 356 | for (unsigned i = m_code.numEntrypoints(); i--;) { |
| 357 | BasicBlock* entrypoint = m_code.entrypoint(i).block(); |
| 358 | for (Tmp tmp : liveness.liveAtHead(entrypoint)) { |
| 359 | if (tmp.isReg()) |
| 360 | currentAllocationMap[entrypoint][tmp.reg()] = tmp; |
| 361 | } |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | // And now, we generate code. |
| 366 | GenerationContext context; |
| 367 | context.code = &m_code; |
| 368 | context.blockLabels.resize(m_code.size()); |
| 369 | for (BasicBlock* block : m_code) |
| 370 | context.blockLabels[block] = Box<CCallHelpers::Label>::create(); |
| 371 | IndexMap<BasicBlock*, CCallHelpers::JumpList> blockJumps(m_code.size()); |
| 372 | |
| 373 | auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { |
| 374 | if (context.blockLabels[target]->isSet()) { |
| 375 | jump.linkTo(*context.blockLabels[target], m_jit); |
| 376 | return; |
| 377 | } |
| 378 | |
| 379 | blockJumps[target].append(jump); |
| 380 | }; |
| 381 | |
| 382 | Disassembler* disassembler = m_code.disassembler(); |
| 383 | |
| 384 | m_globalInstIndex = 0; |
| 385 | |
| 386 | for (BasicBlock* block : m_code) { |
| 387 | context.currentBlock = block; |
| 388 | context.indexInBlock = UINT_MAX; |
| 389 | blockJumps[block].link(m_jit); |
| 390 | CCallHelpers::Label label = m_jit->label(); |
| 391 | *context.blockLabels[block] = label; |
| 392 | |
| 393 | if (disassembler) |
| 394 | disassembler->startBlock(block, *m_jit); |
| 395 | |
| 396 | if (Optional<unsigned> entrypointIndex = m_code.entrypointIndex(block)) { |
| 397 | ASSERT(m_code.isEntrypoint(block)); |
| 398 | if (disassembler) |
| 399 | disassembler->startEntrypoint(*m_jit); |
| 400 | |
| 401 | m_code.prologueGeneratorForEntrypoint(*entrypointIndex)->run(*m_jit, m_code); |
| 402 | |
| 403 | if (disassembler) |
| 404 | disassembler->endEntrypoint(*m_jit); |
| 405 | } else |
| 406 | ASSERT(!m_code.isEntrypoint(block)); |
| 407 | |
| 408 | auto startLabel = m_jit->labelIgnoringWatchpoints(); |
| 409 | |
| 410 | { |
| 411 | auto iter = m_blocksAfterTerminalPatchForSpilling.find(block); |
| 412 | if (iter != m_blocksAfterTerminalPatchForSpilling.end()) { |
| 413 | auto& data = iter->value; |
| 414 | data.jump = m_jit->jump(); |
| 415 | data.continueLabel = m_jit->label(); |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | forEachBank([&] (Bank bank) { |
| 420 | #if !ASSERT_DISABLED |
| 421 | // By default, everything is spilled at block boundaries. We do this after we process each block |
| 422 | // so we don't have to walk all Tmps, since #Tmps >> #Available regs. Instead, we walk the register file at |
| 423 | // each block boundary and clear entries in this map. |
| 424 | for (Tmp tmp : m_allTmps[bank]) |
| 425 | ASSERT(m_map[tmp].reg == Reg()); |
| 426 | #endif |
| 427 | |
| 428 | RegisterSet availableRegisters; |
| 429 | for (Reg reg : m_registers[bank]) |
| 430 | availableRegisters.set(reg); |
| 431 | m_availableRegs[bank] = WTFMove(availableRegisters); |
| 432 | }); |
| 433 | |
| 434 | IndexMap<Reg, Tmp>& currentAllocation = currentAllocationMap[block]; |
| 435 | m_currentAllocation = ¤tAllocation; |
| 436 | |
| 437 | for (unsigned i = 0; i < currentAllocation.size(); ++i) { |
| 438 | Tmp tmp = currentAllocation[i]; |
| 439 | if (!tmp) |
| 440 | continue; |
| 441 | Reg reg = Reg::fromIndex(i); |
| 442 | m_map[tmp].reg = reg; |
| 443 | m_availableRegs[tmp.bank()].clear(reg); |
| 444 | } |
| 445 | |
| 446 | bool isReplayingSameInst = false; |
| 447 | for (size_t instIndex = 0; instIndex < block->size(); ++instIndex) { |
| 448 | checkConsistency(); |
| 449 | |
| 450 | if (instIndex && !isReplayingSameInst) |
| 451 | startLabel = m_jit->labelIgnoringWatchpoints(); |
| 452 | |
| 453 | context.indexInBlock = instIndex; |
| 454 | |
| 455 | Inst& inst = block->at(instIndex); |
| 456 | |
| 457 | m_didAlreadyFreeDeadSlots = false; |
| 458 | |
| 459 | m_namedUsedRegs = RegisterSet(); |
| 460 | m_namedDefdRegs = RegisterSet(); |
| 461 | |
| 462 | bool needsToGenerate = ([&] () -> bool { |
| 463 | // FIXME: We should consider trying to figure out if we can also elide Mov32s |
| 464 | if (!(inst.kind.opcode == Move || inst.kind.opcode == MoveDouble)) |
| 465 | return true; |
| 466 | |
| 467 | ASSERT(inst.args.size() >= 2); |
| 468 | Arg source = inst.args[0]; |
| 469 | Arg dest = inst.args[1]; |
| 470 | if (!source.isTmp() || !dest.isTmp()) |
| 471 | return true; |
| 472 | |
| 473 | // FIXME: We don't track where the last use of a reg is globally so we don't know where we can elide them. |
| 474 | ASSERT(source.isReg() || m_liveRangeEnd[source.tmp()] >= m_globalInstIndex); |
| 475 | if (source.isReg() || m_liveRangeEnd[source.tmp()] != m_globalInstIndex) |
| 476 | return true; |
| 477 | |
| 478 | // If we are doing a self move at the end of the temps liveness we can trivially elide the move. |
| 479 | if (source == dest) |
| 480 | return false; |
| 481 | |
| 482 | Reg sourceReg = m_map[source.tmp()].reg; |
| 483 | // If the value is not already materialized into a register we may still move it into one so let the normal generation code run. |
| 484 | if (!sourceReg) |
| 485 | return true; |
| 486 | |
| 487 | ASSERT(m_currentAllocation->at(sourceReg) == source.tmp()); |
| 488 | |
| 489 | if (dest.isReg() && dest.reg() != sourceReg) |
| 490 | return true; |
| 491 | |
| 492 | if (Reg oldReg = m_map[dest.tmp()].reg) |
| 493 | release(dest.tmp(), oldReg); |
| 494 | |
| 495 | m_map[dest.tmp()].reg = sourceReg; |
| 496 | m_currentAllocation->at(sourceReg) = dest.tmp(); |
| 497 | m_map[source.tmp()].reg = Reg(); |
| 498 | return false; |
| 499 | })(); |
| 500 | checkConsistency(); |
| 501 | |
| 502 | inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) { |
| 503 | if (!arg.isTmp()) |
| 504 | return; |
| 505 | |
| 506 | Tmp tmp = arg.tmp(); |
| 507 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
| 508 | return; |
| 509 | |
| 510 | if (tmp.isReg()) { |
| 511 | if (Arg::isAnyUse(role)) |
| 512 | m_namedUsedRegs.set(tmp.reg()); |
| 513 | if (Arg::isAnyDef(role)) |
| 514 | m_namedDefdRegs.set(tmp.reg()); |
| 515 | } |
| 516 | |
| 517 | // We convert any cold uses that are already in the stack to just point to |
| 518 | // the canonical stack location. |
| 519 | if (!Arg::isColdUse(role)) |
| 520 | return; |
| 521 | |
| 522 | if (!inst.admitsStack(arg)) |
| 523 | return; |
| 524 | |
| 525 | auto& entry = m_map[tmp]; |
| 526 | if (!entry.reg) { |
| 527 | // We're a cold use, and our current location is already on the stack. Just use that. |
| 528 | arg = Arg::addr(Tmp(GPRInfo::callFrameRegister), entry.spillSlot->offsetFromFP()); |
| 529 | } |
| 530 | }); |
| 531 | |
| 532 | RegisterSet clobberedRegisters; |
| 533 | { |
| 534 | Inst* nextInst = block->get(instIndex + 1); |
| 535 | if (inst.kind.opcode == Patch || (nextInst && nextInst->kind.opcode == Patch)) { |
| 536 | if (inst.kind.opcode == Patch) |
| 537 | clobberedRegisters.merge(inst.extraClobberedRegs()); |
| 538 | if (nextInst && nextInst->kind.opcode == Patch) |
| 539 | clobberedRegisters.merge(nextInst->extraEarlyClobberedRegs()); |
| 540 | |
| 541 | clobberedRegisters.filter(m_allowedRegisters); |
| 542 | clobberedRegisters.exclude(m_namedDefdRegs); |
| 543 | |
| 544 | m_namedDefdRegs.merge(clobberedRegisters); |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | auto allocNamed = [&] (const RegisterSet& named, bool isDef) { |
| 549 | for (Reg reg : named) { |
| 550 | if (Tmp occupyingTmp = currentAllocation[reg]) { |
| 551 | if (occupyingTmp == Tmp(reg)) |
| 552 | continue; |
| 553 | } |
| 554 | |
| 555 | freeDeadTmpsIfNeeded(); // We don't want to spill a dead tmp. |
| 556 | alloc(Tmp(reg), reg, isDef); |
| 557 | } |
| 558 | }; |
| 559 | |
| 560 | allocNamed(m_namedUsedRegs, false); // Must come before the defd registers since we may use and def the same register. |
| 561 | allocNamed(m_namedDefdRegs, true); |
| 562 | |
| 563 | if (needsToGenerate) { |
| 564 | auto tryAllocate = [&] { |
| 565 | Vector<Tmp*, 8> usesToAlloc; |
| 566 | Vector<Tmp*, 8> defsToAlloc; |
| 567 | |
| 568 | inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) { |
| 569 | if (tmp.isReg()) |
| 570 | return; |
| 571 | |
| 572 | // We treat Use+Def as a use. |
| 573 | if (Arg::isAnyUse(role)) |
| 574 | usesToAlloc.append(&tmp); |
| 575 | else if (Arg::isAnyDef(role)) |
| 576 | defsToAlloc.append(&tmp); |
| 577 | }); |
| 578 | |
| 579 | auto tryAllocateTmps = [&] (auto& vector, bool isDef) { |
| 580 | bool success = true; |
| 581 | for (Tmp* tmp : vector) |
| 582 | success &= assignTmp(*tmp, tmp->bank(), isDef); |
| 583 | return success; |
| 584 | }; |
| 585 | |
| 586 | // We first handle uses, then defs. We want to be able to tell the register allocator |
| 587 | // which tmps need to be loaded from memory into their assigned register. Those such |
| 588 | // tmps are uses. Defs don't need to be reloaded since we're defining them. However, |
| 589 | // some tmps may both be used and defd. So we handle uses first since forEachTmp could |
| 590 | // walk uses/defs in any order. |
| 591 | bool success = true; |
| 592 | success &= tryAllocateTmps(usesToAlloc, false); |
| 593 | success &= tryAllocateTmps(defsToAlloc, true); |
| 594 | return success; |
| 595 | }; |
| 596 | |
| 597 | // We first allocate trying to give any Tmp a register. If that makes us exhaust the |
| 598 | // available registers, we convert anything that accepts stack to be a stack addr |
| 599 | // instead. This can happen for programs Insts that take in many args, but most |
| 600 | // args can just be stack values. |
| 601 | bool success = tryAllocate(); |
| 602 | if (!success) { |
| 603 | RELEASE_ASSERT(!isReplayingSameInst); // We should only need to do the below at most once per inst. |
| 604 | |
| 605 | // We need to capture the register state before we start spilling things |
| 606 | // since we may have multiple arguments that are the same register. |
| 607 | IndexMap<Reg, Tmp> allocationSnapshot = currentAllocation; |
| 608 | |
| 609 | // We rewind this Inst to be in its previous state, however, if any arg admits stack, |
| 610 | // we move to providing that arg in stack form. This will allow us to fully allocate |
| 611 | // this inst when we rewind. |
| 612 | inst.forEachArg([&] (Arg& arg, Arg::Role, Bank, Width) { |
| 613 | if (!arg.isTmp()) |
| 614 | return; |
| 615 | |
| 616 | Tmp tmp = arg.tmp(); |
| 617 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
| 618 | return; |
| 619 | |
| 620 | if (tmp.isReg()) { |
| 621 | Tmp originalTmp = allocationSnapshot[tmp.reg()]; |
| 622 | if (originalTmp.isReg()) { |
| 623 | ASSERT(tmp.reg() == originalTmp.reg()); |
| 624 | // This means this Inst referred to this reg directly. We leave these as is. |
| 625 | return; |
| 626 | } |
| 627 | tmp = originalTmp; |
| 628 | } |
| 629 | |
| 630 | if (!inst.admitsStack(arg)) { |
| 631 | arg = tmp; |
| 632 | return; |
| 633 | } |
| 634 | |
| 635 | auto& entry = m_map[tmp]; |
| 636 | if (Reg reg = entry.reg) |
| 637 | spill(tmp, reg); |
| 638 | |
| 639 | arg = Arg::addr(Tmp(GPRInfo::callFrameRegister), entry.spillSlot->offsetFromFP()); |
| 640 | }); |
| 641 | |
| 642 | --instIndex; |
| 643 | isReplayingSameInst = true; |
| 644 | continue; |
| 645 | } |
| 646 | |
| 647 | isReplayingSameInst = false; |
| 648 | } |
| 649 | |
| 650 | if (m_code.needsUsedRegisters() && inst.kind.opcode == Patch) { |
| 651 | freeDeadTmpsIfNeeded(); |
| 652 | RegisterSet registerSet; |
| 653 | for (size_t i = 0; i < currentAllocation.size(); ++i) { |
| 654 | if (currentAllocation[i]) |
| 655 | registerSet.set(Reg::fromIndex(i)); |
| 656 | } |
| 657 | inst.reportUsedRegisters(registerSet); |
| 658 | } |
| 659 | |
| 660 | if (inst.isTerminal() && block->numSuccessors()) { |
| 661 | // By default, we spill everything between block boundaries. However, we have a small |
| 662 | // heuristic to pass along register state. We should eventually make this better. |
| 663 | // What we do now is if we have a successor with a single predecessor (us), and we |
| 664 | // haven't yet generated code for it, we give it our register state. If all our successors |
| 665 | // can take on our register state, we don't flush at the end of this block. |
| 666 | |
| 667 | bool everySuccessorGetsOurRegisterState = true; |
| 668 | for (unsigned i = 0; i < block->numSuccessors(); ++i) { |
| 669 | BasicBlock* successor = block->successorBlock(i); |
| 670 | if (successor->numPredecessors() == 1 && !context.blockLabels[successor]->isSet()) |
| 671 | currentAllocationMap[successor] = currentAllocation; |
| 672 | else |
| 673 | everySuccessorGetsOurRegisterState = false; |
| 674 | } |
| 675 | if (!everySuccessorGetsOurRegisterState) { |
| 676 | for (Tmp tmp : liveness.liveAtTail(block)) { |
| 677 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
| 678 | continue; |
| 679 | if (Reg reg = m_map[tmp].reg) |
| 680 | flush(tmp, reg); |
| 681 | } |
| 682 | } |
| 683 | } |
| 684 | |
| 685 | if (!inst.isTerminal()) { |
| 686 | CCallHelpers::Jump jump; |
| 687 | if (needsToGenerate) |
| 688 | jump = inst.generate(*m_jit, context); |
| 689 | ASSERT_UNUSED(jump, !jump.isSet()); |
| 690 | |
| 691 | for (Reg reg : clobberedRegisters) { |
| 692 | Tmp tmp(reg); |
| 693 | ASSERT(currentAllocation[reg] == tmp); |
| 694 | m_availableRegs[tmp.bank()].set(reg); |
| 695 | m_currentAllocation->at(reg) = Tmp(); |
| 696 | m_map[tmp].reg = Reg(); |
| 697 | } |
| 698 | } else { |
| 699 | ASSERT(needsToGenerate); |
| 700 | if (inst.kind.opcode == Jump && block->successorBlock(0) == m_code.findNextBlock(block)) |
| 701 | needsToGenerate = false; |
| 702 | |
| 703 | if (isReturn(inst.kind.opcode)) { |
| 704 | needsToGenerate = false; |
| 705 | |
| 706 | // We currently don't represent the full epilogue in Air, so we need to |
| 707 | // have this override. |
| 708 | if (m_code.frameSize()) { |
| 709 | m_jit->emitRestore(m_code.calleeSaveRegisterAtOffsetList()); |
| 710 | m_jit->emitFunctionEpilogue(); |
| 711 | } else |
| 712 | m_jit->emitFunctionEpilogueWithEmptyFrame(); |
| 713 | m_jit->ret(); |
| 714 | } |
| 715 | |
| 716 | if (needsToGenerate) { |
| 717 | CCallHelpers::Jump jump = inst.generate(*m_jit, context); |
| 718 | |
| 719 | // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have |
| 720 | // any successors. |
| 721 | if (jump.isSet()) { |
| 722 | switch (block->numSuccessors()) { |
| 723 | case 1: |
| 724 | link(jump, block->successorBlock(0)); |
| 725 | break; |
| 726 | case 2: |
| 727 | link(jump, block->successorBlock(0)); |
| 728 | if (block->successorBlock(1) != m_code.findNextBlock(block)) |
| 729 | link(m_jit->jump(), block->successorBlock(1)); |
| 730 | break; |
| 731 | default: |
| 732 | RELEASE_ASSERT_NOT_REACHED(); |
| 733 | break; |
| 734 | } |
| 735 | } |
| 736 | } |
| 737 | } |
| 738 | |
| 739 | auto endLabel = m_jit->labelIgnoringWatchpoints(); |
| 740 | if (disassembler) |
| 741 | disassembler->addInst(&inst, startLabel, endLabel); |
| 742 | |
| 743 | ++m_globalInstIndex; |
| 744 | } |
| 745 | |
| 746 | // Registers usually get spilled at block boundaries. We do it this way since we don't |
| 747 | // want to iterate the entire TmpMap, since usually #Tmps >> #Regs. We may not actually spill |
| 748 | // all registers, but at the top of this loop we handle that case by pre-populating register |
| 749 | // state. Here, we just clear this map. After this loop, this map should contain only |
| 750 | // null entries. |
| 751 | for (size_t i = 0; i < currentAllocation.size(); ++i) { |
| 752 | if (Tmp tmp = currentAllocation[i]) |
| 753 | m_map[tmp].reg = Reg(); |
| 754 | } |
| 755 | } |
| 756 | |
| 757 | for (auto& entry : m_blocksAfterTerminalPatchForSpilling) { |
| 758 | entry.value.jump.linkTo(m_jit->label(), m_jit); |
| 759 | const HashMap<Tmp, Arg*>& spills = entry.value.defdTmps; |
| 760 | for (auto& entry : spills) { |
| 761 | Arg* arg = entry.value; |
| 762 | if (!arg->isTmp()) |
| 763 | continue; |
| 764 | Tmp originalTmp = entry.key; |
| 765 | Tmp currentTmp = arg->tmp(); |
| 766 | ASSERT_WITH_MESSAGE(currentTmp.isReg(), "We already did register allocation so we should have assigned this Tmp to a register." ); |
| 767 | flush(originalTmp, currentTmp.reg()); |
| 768 | } |
| 769 | m_jit->jump().linkTo(entry.value.continueLabel, m_jit); |
| 770 | } |
| 771 | |
| 772 | context.currentBlock = nullptr; |
| 773 | context.indexInBlock = UINT_MAX; |
| 774 | |
| 775 | Vector<CCallHelpers::Label> entrypointLabels(m_code.numEntrypoints()); |
| 776 | for (unsigned i = m_code.numEntrypoints(); i--;) |
| 777 | entrypointLabels[i] = *context.blockLabels[m_code.entrypoint(i).block()]; |
| 778 | m_code.setEntrypointLabels(WTFMove(entrypointLabels)); |
| 779 | |
| 780 | if (disassembler) |
| 781 | disassembler->startLatePath(*m_jit); |
| 782 | |
| 783 | // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689 |
| 784 | for (auto& latePath : context.latePaths) |
| 785 | latePath->run(*m_jit, context); |
| 786 | |
| 787 | if (disassembler) |
| 788 | disassembler->endLatePath(*m_jit); |
| 789 | } |
| 790 | |
| 791 | } } } // namespace JSC::B3::Air |
| 792 | |
| 793 | #endif // ENABLE(B3_JIT) |
| 794 | |