From 05ec5bd6713b831bd0c3a45c3ec0593773c444d4 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Wed, 19 Mar 2025 12:07:22 +0100 Subject: [PATCH 01/24] add missing conditional branches to emitjmps.h --- src/coreclr/jit/emitjmps.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index 6c9861c91a1b17..21ffc0bddaed41 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -54,10 +54,17 @@ JMP_SMALL(eq , ne , beq ) // EQ JMP_SMALL(ne , eq , bne ) // NE #elif defined(TARGET_RISCV64) -// TODO-RISCV64: adding other condition branches -JMP_SMALL(jmp , jmp , j ) +// jump reverse instruction +JMP_SMALL(jmp , jmp , j ) // always jump JMP_SMALL(eq , ne , beq ) // EQ JMP_SMALL(ne , eq , bne ) // NE +JMP_SMALL(lt , ge , blt ) // LT +JMP_SMALL(ge , lt , bge ) // GE +JMP_SMALL(ltu , geu , bltu ) // LTU +JMP_SMALL(geu , ltu , bgeu ) // GEU +// C extension +JMP_SMALL(eqz , nez , beqz ) // EQZ +JMP_SMALL(nez , eqz , bnez ) // NEZ #else #error Unsupported or unset target architecture From 19bf289a90ba303599793a24e371c5305757aa6a Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Wed, 19 Mar 2025 12:13:02 +0100 Subject: [PATCH 02/24] add jmp instructions checks and reversing methods --- src/coreclr/jit/emitriscv64.cpp | 48 ++++++++++++++++++++++++++++----- src/coreclr/jit/emitriscv64.h | 27 +++++++++++++++++++ 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index a9360794bc4f65..6b0e5367ddd422 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -50,6 +50,26 @@ const emitJumpKind emitReverseJumpKinds[] = { return emitJumpKindInstructions[jumpKind]; } +/***************************************************************************** + * Look up the (conditional) jump kind for an instruction. + */ + +/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) +{ + assert(emitter::isCondJumpInstruction(ins)); + + for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++) + { + if (ins == emitJumpKindInstructions[i]) + { + emitJumpKind ret = (emitJumpKind)i; + assert(EJ_NONE < ret && ret < EJ_COUNT); + return ret; + } + } + unreached(); +} + /***************************************************************************** * Reverse the conditional jump */ @@ -60,6 +80,23 @@ const emitJumpKind emitReverseJumpKinds[] = { return emitReverseJumpKinds[jumpKind]; } +/***************************************************************************** + * Reverse the conditional jump instruction + */ + +/*static*/ instruction emitter::emitReverseJumpIns(instruction ins) +{ + assert(emitter::isCondJumpInstruction(ins)); + + return emitJumpKindToIns( + emitReverseJumpKind( + emitInsToJumpKind( + ins + ) + ) + ); +} + /***************************************************************************** * * Return the allocated size (in bytes) of the given instruction descriptor. @@ -1176,15 +1213,14 @@ void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNu void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) { - assert(dst != nullptr); - // - // INS_OPTS_J: placeholders. 1-ins: if the dst outof-range will be replaced by INS_OPTS_JALR. - // jal/j/jalr/bnez/beqz/beq/bne/blt/bge/bltu/bgeu dst + assert(isCondJumpInstruction(ins) || isJumpInstruction(ins)); - assert(dst->HasFlag(BBF_HAS_LABEL)); + if (dst != nullptr) + { + assert(dst->HasFlag(BBF_HAS_LABEL)); + } instrDescJmp* id = emitNewInstrJmp(); - assert((INS_jal <= ins) && (ins <= INS_bgeu)); id->idIns(ins); id->idReg1((regNumber)(instrCount & 0x1f)); id->idReg2((regNumber)((instrCount >> 5) & 0x1f)); diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index bcc87538f3b18d..0670dbba0c840e 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -265,6 +265,31 @@ inline static bool isFloatReg(regNumber reg) return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST); } +inline static bool isCondJumpInstruction(instruction ins) +{ + switch (ins) + { + case INS_beq: + case INS_bne: + case INS_blt: + case INS_bge: + case INS_bltu: + case INS_bgeu: + // C extension + case INS_beqz: + case INS_bnez: + return true; + default: + return false; + } + unreached(); +} + +inline static bool isJumpInstruction(instruction ins) +{ + return ins == INS_j || ins == INS_jal || ins == INS_jalr; +} + /************************************************************************/ /* Output target-independent instructions */ /************************************************************************/ @@ -276,6 +301,8 @@ void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); /************************************************************************/ public: +inline static instruction emitReverseJumpIns(instruction ins); + void emitIns(instruction ins); void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); From 72d9f01984ae9b655b81cc568a0df5712f791308 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Wed, 19 Mar 2025 14:29:19 +0100 Subject: [PATCH 03/24] simplify instruction encoding for SBJ types --- src/coreclr/jit/emitriscv64.cpp | 56 +++++++++++++++------------------ 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 6b0e5367ddd422..89d3c29fe753b1 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -2338,9 +2338,6 @@ static inline void assertCodeLength(size_t code, uint8_t size) /*static*/ emitter::code_t emitter::insEncodeSTypeInstr( unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, unsigned imm12) { - static constexpr unsigned kLoMask = 0x1f; // 0b00011111 - static constexpr unsigned kHiMask = 0x7f; // 0b01111111 - assertCodeLength(opcode, 7); assertCodeLength(funct3, 3); assertCodeLength(rs1, 5); @@ -2348,10 +2345,15 @@ static inline void assertCodeLength(size_t code, uint8_t size) // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers assertCodeLength(imm12, 12); - unsigned imm12Lo = imm12 & kLoMask; - unsigned imm12Hi = (imm12 >> 5) & kHiMask; + code_t code = opcode; + + code |= (imm12 & 0xFE0) << 20; // imm[11:5] -> 31:25 + code |= rs2 << 20; // rs2 -> 24:20 + code |= rs1 << 15; // rs1 -> 19:15 + code |= funct3 << 12; // funct3 -> 14:12 + code |= (imm12 & 0x1F) << 7; // imm[4:0] -> 11:7 - return opcode | (imm12Lo << 7) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) | (imm12Hi << 25); + return code; } /***************************************************************************** @@ -2389,26 +2391,24 @@ static inline void assertCodeLength(size_t code, uint8_t size) /*static*/ emitter::code_t emitter::insEncodeBTypeInstr( unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, unsigned imm13) { - static constexpr unsigned kLoSectionMask = 0x0f; // 0b00001111 - static constexpr unsigned kHiSectionMask = 0x3f; // 0b00111111 - static constexpr unsigned kBitMask = 0x01; - assertCodeLength(opcode, 7); assertCodeLength(funct3, 3); assertCodeLength(rs1, 5); assertCodeLength(rs2, 5); // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers assertCodeLength(imm13, 13); - assert((imm13 & 0x01) == 0); - unsigned imm12 = imm13 >> 1; - unsigned imm12LoSection = imm12 & kLoSectionMask; - unsigned imm12LoBit = (imm12 >> 10) & kBitMask; - unsigned imm12HiSection = (imm12 >> 4) & kHiSectionMask; - unsigned imm12HiBit = (imm12 >> 11) & kBitMask; + code_t code = opcode; + + code |= (imm13 & 0x1000) << 19; // imm[12] -> 31 + code |= (imm13 & 0x7E0) << 20; // imm[10:5] -> 30:25 + code |= (imm13 & 0x1E) << 7; // imm[4:1] -> 11:8 + code |= (imm13 & 0x800) >> 4; // imm[11] -> 7 + code |= funct3 << 12; // funct3 -> 14:12 + code |= rs1 << 15; // rs1 -> 19:15 + code |= rs2 << 20; // rs2 -> 24:20 - return opcode | (imm12LoBit << 7) | (imm12LoSection << 8) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) | - (imm12HiSection << 25) | (imm12HiBit << 31); + return code; } /***************************************************************************** @@ -2424,24 +2424,20 @@ static inline void assertCodeLength(size_t code, uint8_t size) /*static*/ emitter::code_t emitter::insEncodeJTypeInstr(unsigned opcode, unsigned rd, unsigned imm21) { - static constexpr unsigned kHiSectionMask = 0x3ff; // 0b1111111111 - static constexpr unsigned kLoSectionMask = 0xff; // 0b11111111 - static constexpr unsigned kBitMask = 0x01; - assertCodeLength(opcode, 7); assertCodeLength(rd, 5); // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers assertCodeLength(imm21, 21); - assert((imm21 & 0x01) == 0); - unsigned imm20 = imm21 >> 1; - unsigned imm20HiSection = imm20 & kHiSectionMask; - unsigned imm20HiBit = (imm20 >> 19) & kBitMask; - unsigned imm20LoSection = (imm20 >> 11) & kLoSectionMask; - unsigned imm20LoBit = (imm20 >> 10) & kBitMask; + code_t code = opcode; - return opcode | (rd << 7) | (imm20LoSection << 12) | (imm20LoBit << 20) | (imm20HiSection << 21) | - (imm20HiBit << 31); + code |= (imm21 & 0x100000) << 11; // imm[20] -> 31 + code |= (imm21 & 0x7FE) << 20; // imm[10:1] -> 30:21 + code |= (imm21 & 0x800) << 9; // imm[11] -> 20 + code |= imm21 & 0xFF000; // imm[19:12] -> 19:12 + code |= rd << 7; + + return code; } static constexpr unsigned kInstructionOpcodeMask = 0x7f; From af61908ea2eea77345e3a839de83f6708b656089 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Thu, 20 Mar 2025 14:27:49 +0100 Subject: [PATCH 04/24] change emitIns_J interface --- src/coreclr/jit/codegenriscv64.cpp | 73 +++++++++++++++++++----------- src/coreclr/jit/emitriscv64.cpp | 44 ++++++++++++------ src/coreclr/jit/emitriscv64.h | 2 +- 3 files changed, 78 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index f20d7cf2395186..8acc8a06bf25ba 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -2453,7 +2453,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) e->emitIns_R_R_R(is4 ? INS_lr_w : INS_lr_d, size, target, loc, REG_R0); // load original value e->emitIns_J_cond_la(INS_bne, fail, target, comparand); // fail if doesn’t match e->emitIns_R_R_R(is4 ? INS_sc_w : INS_sc_d, size, storeErr, loc, val); // try to update - e->emitIns_J(INS_bnez, retry, storeErr); // retry if update failed + e->emitIns_J(INS_bnez, retry, 0, storeErr); // retry if update failed genDefineTempLabel(fail); gcInfo.gcMarkRegSetNpt(locOp->gtGetRegMask()); @@ -3510,12 +3510,14 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) emitter* emit = GetEmitter(); instruction ins = INS_invalid; - int regs = 0; GenCondition cond = tree->gtCondition; - emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); - regNumber regOp1 = op1->GetRegNum(); + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + regNumber regOp1 = op1->GetRegNum(); + regNumber regOp2 = REG_ZERO; + regNumber tmpRegOp1 = REG_ZERO; + regNumber tmpRegOp2 = REG_ZERO; if (op2->isContainedIntOrIImmed()) { @@ -3527,7 +3529,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) { case EA_4BYTE: { - regNumber tmpRegOp1 = rsGetRsvdReg(); + tmpRegOp1 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp1); if (cond.IsUnsigned()) { @@ -3569,13 +3571,13 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) instGen_Set_Reg_To_Imm(attr, REG_RA, imm, INS_FLAGS_DONT_CARE DEBUGARG(con->gtTargetHandle) DEBUGARG(con->gtFlags)); regSet.verifyRegUsed(REG_RA); - regs = (int)REG_RA << 5; + regOp1 = REG_RA; } else { if (cmpSize == EA_4BYTE) { - regNumber tmpRegOp1 = rsGetRsvdReg(); + tmpRegOp1 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp1); if (cond.IsUnsigned()) { @@ -3593,31 +3595,45 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) switch (cond.GetCode()) { case GenCondition::EQ: - regs |= ((int)regOp1); ins = INS_beq; break; case GenCondition::NE: - regs |= ((int)regOp1); ins = INS_bne; break; case GenCondition::UGE: case GenCondition::SGE: - regs |= ((int)regOp1); ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; case GenCondition::UGT: case GenCondition::SGT: - regs = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); + if (imm) + { + regOp2 = regOp1; + regOp1 = REG_RA; + } + else + { + regOp2 = regOp1; + regOp1 = REG_ZERO; + } ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULT: case GenCondition::SLT: - regs |= ((int)regOp1); ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULE: case GenCondition::SLE: - regs = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); + if (imm) + { + regOp2 = regOp1; + regOp1 = REG_RA; + } + else + { + regOp2 = regOp1; + regOp1 = REG_ZERO; + } ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; default: @@ -3627,11 +3643,11 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) } else { - regNumber regOp2 = op2->GetRegNum(); + regOp2 = op2->GetRegNum(); if (cmpSize == EA_4BYTE) { - regNumber tmpRegOp1 = REG_RA; - regNumber tmpRegOp2 = rsGetRsvdReg(); + tmpRegOp1 = REG_RA; + tmpRegOp2 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp2); assert(regOp2 != tmpRegOp2); @@ -3655,31 +3671,37 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) switch (cond.GetCode()) { case GenCondition::EQ: - regs = (((int)regOp1) << 5) | (int)regOp2; + tmpRegOp1 = regOp1; + regOp1 = regOp2; + regOp2 = tmpRegOp1; ins = INS_beq; break; case GenCondition::NE: - regs = (((int)regOp1) << 5) | (int)regOp2; + tmpRegOp1 = regOp1; + regOp1 = regOp2; + regOp2 = tmpRegOp1; ins = INS_bne; break; case GenCondition::UGE: case GenCondition::SGE: - regs = ((int)regOp1 | ((int)regOp2 << 5)); ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; case GenCondition::UGT: case GenCondition::SGT: - regs = (((int)regOp1) << 5) | (int)regOp2; + tmpRegOp1 = regOp1; + regOp1 = regOp2; + regOp2 = tmpRegOp1; ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULT: case GenCondition::SLT: - regs = ((int)regOp1 | ((int)regOp2 << 5)); ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULE: case GenCondition::SLE: - regs = (((int)regOp1) << 5) | (int)regOp2; + tmpRegOp1 = regOp1; + regOp1 = regOp2; + regOp2 = tmpRegOp1; ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; default: @@ -3688,9 +3710,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) } } assert(ins != INS_invalid); - assert(regs != 0); - emit->emitIns_J(ins, compiler->compCurBB->GetTrueTarget(), regs); // 5-bits; + emit->emitIns_J(ins, compiler->compCurBB->GetTrueTarget(), 0, regOp1, regOp2); // 5-bits; // If we cannot fall into the false target, emit a jump to it BasicBlock* falseTarget = compiler->compCurBB->GetFalseTarget(); @@ -5847,7 +5868,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // tempReg = tempReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -8); // if (tempReg != dstReg) goto loop; - GetEmitter()->emitIns_J(INS_bne, loop, (int)tempReg | ((int)dstReg << 5)); + GetEmitter()->emitIns_J(INS_bne, loop, 0, tempReg, dstReg); GetEmitter()->emitEnableGC(); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); @@ -6700,7 +6721,7 @@ void CodeGen::genJumpToThrowHlpBlk_la( noway_assert(excpRaisingBlock != nullptr); // Jump to the exception-throwing block on error. - emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5)); // 5-bits; + emit->emitIns_J(ins, excpRaisingBlock, 0, reg1, reg2); // 5-bits; } else { diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 89d3c29fe753b1..a0a64d2eb3fb73 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -777,7 +777,7 @@ void emitter::emitIns_R_R_I( code |= reg2 << 15; // rs1 code |= (((imm >> 5) & 0x7f) << 25) | ((imm & 0x1f) << 7); // imm } - else if (INS_beq <= ins && INS_bgeu >= ins) + else if (isCondJumpInstruction(ins)) { assert(isGeneralRegister(reg1)); assert(isGeneralRegister(reg2)); @@ -1206,12 +1206,7 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu appendToCurIG(id); } -void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) -{ - NYI_RISCV64("emitIns_J_R-----unimplemented/unused on RISCV64 yet----"); -} - -void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) +void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount, regNumber reg1, regNumber reg2) { assert(isCondJumpInstruction(ins) || isJumpInstruction(ins)); @@ -1220,14 +1215,35 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) assert(dst->HasFlag(BBF_HAS_LABEL)); } + if (!isCondJumpInstruction(ins) && ins != INS_jalr) + { + + } + else if (ins == INS_jalr) + { + + } + else + { + + } + instrDescJmp* id = emitNewInstrJmp(); id->idIns(ins); - id->idReg1((regNumber)(instrCount & 0x1f)); - id->idReg2((regNumber)((instrCount >> 5) & 0x1f)); + id->idReg1(reg1); + id->idReg2(reg2); id->idInsOpt(INS_OPTS_J); emitCounts_INS_OPTS_J++; - id->idAddr()->iiaBBlabel = dst; + + if (dst != nullptr) + { + id->idAddr()->iiaBBlabel = dst; + } + else + { + id->idAddr()->iiaSetInstrCount(instrCount); + } if (emitComp->opts.compReloc) { @@ -1236,18 +1252,18 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) id->idjShort = false; - // TODO-RISCV64: maybe deleted this. - id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); + // long jumps only possible by using auipc+jalr + id->idjKeepLong = false; #ifdef DEBUG if (emitComp->opts.compLongAddress) // Force long branches id->idjKeepLong = 1; #endif // DEBUG - /* Record the jump's IG and offset within it */ + // Record the jump's IG and offset within it id->idjIG = emitCurIG; id->idjOffs = emitCurIGsize; - /* Append this jump to this IG's jump list */ + // Append this jump to this IG's jump list id->idjNext = emitCurIGjmpList; emitCurIGjmpList = id; diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 0670dbba0c840e..483eb6fe6339a7 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -294,7 +294,7 @@ inline static bool isJumpInstruction(instruction ins) /* Output target-independent instructions */ /************************************************************************/ -void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); +void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0, regNumber reg1 = REG_ZERO, regNumber reg2 = REG_ZERO); /************************************************************************/ /* The public entry points to output instructions */ From a458af2e8c432fe1c394aa6b26229fa933acaa59 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Tue, 25 Mar 2025 13:37:21 +0100 Subject: [PATCH 05/24] restore regs manipulation in emitIns_J --- src/coreclr/jit/codegenriscv64.cpp | 73 +++++++++++------------------- src/coreclr/jit/emitriscv64.cpp | 24 ++-------- src/coreclr/jit/emitriscv64.h | 2 +- 3 files changed, 31 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 8acc8a06bf25ba..10e08c8af8273c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -2453,7 +2453,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) e->emitIns_R_R_R(is4 ? INS_lr_w : INS_lr_d, size, target, loc, REG_R0); // load original value e->emitIns_J_cond_la(INS_bne, fail, target, comparand); // fail if doesn’t match e->emitIns_R_R_R(is4 ? INS_sc_w : INS_sc_d, size, storeErr, loc, val); // try to update - e->emitIns_J(INS_bnez, retry, 0, storeErr); // retry if update failed + e->emitIns_J(INS_bnez, retry, storeErr); // retry if update failed genDefineTempLabel(fail); gcInfo.gcMarkRegSetNpt(locOp->gtGetRegMask()); @@ -3510,14 +3510,12 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) emitter* emit = GetEmitter(); instruction ins = INS_invalid; + int regs = 0; GenCondition cond = tree->gtCondition; - emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); - regNumber regOp1 = op1->GetRegNum(); - regNumber regOp2 = REG_ZERO; - regNumber tmpRegOp1 = REG_ZERO; - regNumber tmpRegOp2 = REG_ZERO; + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + regNumber regOp1 = op1->GetRegNum(); if (op2->isContainedIntOrIImmed()) { @@ -3529,7 +3527,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) { case EA_4BYTE: { - tmpRegOp1 = rsGetRsvdReg(); + regNumber tmpRegOp1 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp1); if (cond.IsUnsigned()) { @@ -3571,13 +3569,13 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) instGen_Set_Reg_To_Imm(attr, REG_RA, imm, INS_FLAGS_DONT_CARE DEBUGARG(con->gtTargetHandle) DEBUGARG(con->gtFlags)); regSet.verifyRegUsed(REG_RA); - regOp1 = REG_RA; + regs = (int)REG_RA << 5; } else { if (cmpSize == EA_4BYTE) { - tmpRegOp1 = rsGetRsvdReg(); + regNumber tmpRegOp1 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp1); if (cond.IsUnsigned()) { @@ -3595,45 +3593,31 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) switch (cond.GetCode()) { case GenCondition::EQ: + regs |= ((int)regOp1); ins = INS_beq; break; case GenCondition::NE: + regs |= ((int)regOp1); ins = INS_bne; break; case GenCondition::UGE: case GenCondition::SGE: + regs |= ((int)regOp1); ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; case GenCondition::UGT: case GenCondition::SGT: - if (imm) - { - regOp2 = regOp1; - regOp1 = REG_RA; - } - else - { - regOp2 = regOp1; - regOp1 = REG_ZERO; - } + regs = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULT: case GenCondition::SLT: + regs |= ((int)regOp1); ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULE: case GenCondition::SLE: - if (imm) - { - regOp2 = regOp1; - regOp1 = REG_RA; - } - else - { - regOp2 = regOp1; - regOp1 = REG_ZERO; - } + regs = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5); ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; default: @@ -3643,11 +3627,11 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) } else { - regOp2 = op2->GetRegNum(); + regNumber regOp2 = op2->GetRegNum(); if (cmpSize == EA_4BYTE) { - tmpRegOp1 = REG_RA; - tmpRegOp2 = rsGetRsvdReg(); + regNumber tmpRegOp1 = REG_RA; + regNumber tmpRegOp2 = rsGetRsvdReg(); assert(regOp1 != tmpRegOp2); assert(regOp2 != tmpRegOp2); @@ -3671,37 +3655,31 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) switch (cond.GetCode()) { case GenCondition::EQ: - tmpRegOp1 = regOp1; - regOp1 = regOp2; - regOp2 = tmpRegOp1; + regs = (((int)regOp1) << 5) | (int)regOp2; ins = INS_beq; break; case GenCondition::NE: - tmpRegOp1 = regOp1; - regOp1 = regOp2; - regOp2 = tmpRegOp1; + regs = (((int)regOp1) << 5) | (int)regOp2; ins = INS_bne; break; case GenCondition::UGE: case GenCondition::SGE: + regs = ((int)regOp1 | ((int)regOp2 << 5)); ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; case GenCondition::UGT: case GenCondition::SGT: - tmpRegOp1 = regOp1; - regOp1 = regOp2; - regOp2 = tmpRegOp1; + regs = (((int)regOp1) << 5) | (int)regOp2; ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULT: case GenCondition::SLT: + regs = ((int)regOp1 | ((int)regOp2 << 5)); ins = cond.IsUnsigned() ? INS_bltu : INS_blt; break; case GenCondition::ULE: case GenCondition::SLE: - tmpRegOp1 = regOp1; - regOp1 = regOp2; - regOp2 = tmpRegOp1; + regs = (((int)regOp1) << 5) | (int)regOp2; ins = cond.IsUnsigned() ? INS_bgeu : INS_bge; break; default: @@ -3710,8 +3688,9 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) } } assert(ins != INS_invalid); + assert(regs != 0); - emit->emitIns_J(ins, compiler->compCurBB->GetTrueTarget(), 0, regOp1, regOp2); // 5-bits; + emit->emitIns_J(ins, compiler->compCurBB->GetTrueTarget(), regs); // 5-bits; // If we cannot fall into the false target, emit a jump to it BasicBlock* falseTarget = compiler->compCurBB->GetFalseTarget(); @@ -5868,7 +5847,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // tempReg = tempReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -8); // if (tempReg != dstReg) goto loop; - GetEmitter()->emitIns_J(INS_bne, loop, 0, tempReg, dstReg); + GetEmitter()->emitIns_J(INS_bne, loop, tempReg | (dstReg << 5)); GetEmitter()->emitEnableGC(); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); @@ -6721,7 +6700,7 @@ void CodeGen::genJumpToThrowHlpBlk_la( noway_assert(excpRaisingBlock != nullptr); // Jump to the exception-throwing block on error. - emit->emitIns_J(ins, excpRaisingBlock, 0, reg1, reg2); // 5-bits; + emit->emitIns_J(ins, excpRaisingBlock, reg1 | (reg2 << 5)); // 5-bits; } else { diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index a0a64d2eb3fb73..e1e8abf3a68d90 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -789,7 +789,6 @@ void emitter::emitIns_R_R_I( code |= ((imm >> 1) & 0xf) << 8; code |= ((imm >> 5) & 0x3f) << 25; code |= ((imm >> 12) & 0x1) << 31; - // TODO-RISCV64: Move jump logic to emitIns_J id->idAddr()->iiaSetInstrCount(static_cast(imm / sizeof(code_t))); } else if (ins == INS_csrrs || ins == INS_csrrw || ins == INS_csrrc) @@ -1206,7 +1205,7 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu appendToCurIG(id); } -void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount, regNumber reg1, regNumber reg2) +void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) { assert(isCondJumpInstruction(ins) || isJumpInstruction(ins)); @@ -1215,23 +1214,10 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount, regNum assert(dst->HasFlag(BBF_HAS_LABEL)); } - if (!isCondJumpInstruction(ins) && ins != INS_jalr) - { - - } - else if (ins == INS_jalr) - { - - } - else - { - - } - instrDescJmp* id = emitNewInstrJmp(); id->idIns(ins); - id->idReg1(reg1); - id->idReg2(reg2); + id->idReg1((regNumber)(instrCount & 0x1f)); + id->idReg2((regNumber)((instrCount >> 5) & 0x1f)); id->idInsOpt(INS_OPTS_J); emitCounts_INS_OPTS_J++; @@ -1252,8 +1238,6 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount, regNum id->idjShort = false; - // long jumps only possible by using auipc+jalr - id->idjKeepLong = false; #ifdef DEBUG if (emitComp->opts.compLongAddress) // Force long branches id->idjKeepLong = 1; @@ -2817,7 +2801,7 @@ unsigned emitter::emitOutput_BTypeInstr(BYTE* dst, instruction ins, regNumber rs unsigned emitter::emitOutput_BTypeInstr_InvertComparation( BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm13) const { - unsigned insCode = emitInsCode(ins) ^ 0x1000; + unsigned insCode = emitInsCode(emitReverseJumpIns(ins)); #ifdef DEBUG emitOutput_BTypeInstr_SanityCheck(ins, rs1, rs2); #endif // DEBUG diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 483eb6fe6339a7..0670dbba0c840e 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -294,7 +294,7 @@ inline static bool isJumpInstruction(instruction ins) /* Output target-independent instructions */ /************************************************************************/ -void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0, regNumber reg1 = REG_ZERO, regNumber reg2 = REG_ZERO); +void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); /************************************************************************/ /* The public entry points to output instructions */ From a1099be335eef5bcc96860fac9d7101e84443363 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Tue, 25 Mar 2025 15:02:43 +0100 Subject: [PATCH 06/24] simplify generated loop in genZeroInitFrameUsingBlockInit --- src/coreclr/jit/codegenriscv64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 10e08c8af8273c..dafdf0eceb3584 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -902,15 +902,15 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // argument reg instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); - // TODO-RISCV64: maybe optimize further + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); - // bne rCnt, zero, -4 * 4 - ssize_t imm = -16; GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm); + GetEmitter()->emitIns_J(INS_bnez, loop, rCnt); uCntBytes %= REGSIZE_BYTES * 2; } From e5b6bdd25126f44c50544e1d42cbdd131b4beffd Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Tue, 25 Mar 2025 16:06:25 +0100 Subject: [PATCH 07/24] optimize generated loop in genZeroInitFrameUsingBlockInit --- src/coreclr/jit/codegenriscv64.cpp | 74 ++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index dafdf0eceb3584..b84d0acb1e90a0 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -886,7 +886,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // When it is 10 or greater, we will emit a loop containing a sd instruction. // In both of these cases the sd instruction will write two zeros to memory // and we will use a single str instruction at the end whenever we have an odd count. - if (uCntSlots >= 10) + if (uCntSlots >= 12) useLoop = true; if (useLoop) @@ -900,19 +900,75 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu noway_assert(uCntSlots >= 2); assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming // argument reg - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); + + // + // ; base isa (+C extension) + // addi rCnt, zero, uCntSlots / 2 + // Loop: + // sd zero, (8 + padding)rAddr ; store first elem of block + // sd zero, (0 + padding)rAddr ; store second elem of block + // addi rCnt, rCnt, -1 + // addi rAddr, rAddr, 2 * REGSIZE_BYTES ; go to the next block + // bnez rCnt, Loop ; Anything left? + // + // TODO-RISCV64: maybe use V or cmo extension? + // + // ; V-extension: + // + // vsetvli t0, zero, e64, m1, ta, ma ; SEW=64b LMUL=1 + // vmv.v.i v0, 0 ; fill v0 with zeros + // Loop: + // vsd v0, (padding)rAddr ; store 2 * 8 = 16 bytes of zero + // addi rAddr, rAddr, 2 * REGSIZE_BYTES ; advance by block size (16 bytes) + // addi rCnt, rCnt, -1 + // bnez rCnt, Loop + // + // ; cmo-extension: + // + // addi rCnt, zero, uCntSlots + // addi rAddr, rAddr, padding + // Loop: + // cbo.zero rAddr ; Store zeros to the full set of bytes + // ; corresponding to a cache block + // addi rAddr, rAddr, REGSIZE_BYTES + padding + // addi rCnt, rCnt, -1 + // bnez rCnt, Loop + // BasicBlock* loop = genCreateTempLabel(); - genDefineTempLabel(loop); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); + if (uCntSlots % 4 == 0) + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 4); + + genDefineTempLabel(loop); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); - GetEmitter()->emitIns_J(INS_bnez, loop, rCnt); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 16 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 24 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES + 3 * padding); + GetEmitter()->emitIns_J(INS_bnez, loop, rCnt); + + uCntBytes %= REGSIZE_BYTES * 4; + } + else + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); - uCntBytes %= REGSIZE_BYTES * 2; + genDefineTempLabel(loop); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); + GetEmitter()->emitIns_J(INS_bnez, loop, rCnt); + + uCntBytes %= REGSIZE_BYTES * 2; + } } else { From b048e51a7cfd72b40d87686abcb0c37a24dce624 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Wed, 26 Mar 2025 16:47:25 +0100 Subject: [PATCH 08/24] correct loop gen in genZeroInitFrameUsingBlockInit and add support for bnez/beqz menmonics --- src/coreclr/jit/codegenriscv64.cpp | 45 ++---------------------------- src/coreclr/jit/emitjmps.h | 5 ++-- src/coreclr/jit/emitriscv64.cpp | 4 +++ src/coreclr/jit/emitriscv64.h | 1 - 4 files changed, 9 insertions(+), 46 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index b84d0acb1e90a0..571783c8626b2c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -901,48 +901,10 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming // argument reg - // - // ; base isa (+C extension) - // addi rCnt, zero, uCntSlots / 2 - // Loop: - // sd zero, (8 + padding)rAddr ; store first elem of block - // sd zero, (0 + padding)rAddr ; store second elem of block - // addi rCnt, rCnt, -1 - // addi rAddr, rAddr, 2 * REGSIZE_BYTES ; go to the next block - // bnez rCnt, Loop ; Anything left? - // - // TODO-RISCV64: maybe use V or cmo extension? - // - // ; V-extension: - // - // vsetvli t0, zero, e64, m1, ta, ma ; SEW=64b LMUL=1 - // vmv.v.i v0, 0 ; fill v0 with zeros - // Loop: - // vsd v0, (padding)rAddr ; store 2 * 8 = 16 bytes of zero - // addi rAddr, rAddr, 2 * REGSIZE_BYTES ; advance by block size (16 bytes) - // addi rCnt, rCnt, -1 - // bnez rCnt, Loop - // - // ; cmo-extension: - // - // addi rCnt, zero, uCntSlots - // addi rAddr, rAddr, padding - // Loop: - // cbo.zero rAddr ; Store zeros to the full set of bytes - // ; corresponding to a cache block - // addi rAddr, rAddr, REGSIZE_BYTES + padding - // addi rCnt, rCnt, -1 - // bnez rCnt, Loop - // - - BasicBlock* loop = genCreateTempLabel(); - if (uCntSlots % 4 == 0) { instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 4); - genDefineTempLabel(loop); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 16 + padding); @@ -950,7 +912,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES + 3 * padding); - GetEmitter()->emitIns_J(INS_bnez, loop, rCnt); + GetEmitter()->emitIns_R_I(INS_bnez, EA_PTRSIZE, rCnt, -6 << 2); uCntBytes %= REGSIZE_BYTES * 4; } @@ -958,14 +920,13 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu { instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); - genDefineTempLabel(loop); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); - GetEmitter()->emitIns_J(INS_bnez, loop, rCnt); + + GetEmitter()->emitIns_R_I(INS_bnez, EA_PTRSIZE, rCnt, -4 << 2); uCntBytes %= REGSIZE_BYTES * 2; } diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h index 21ffc0bddaed41..9762ee6f8fc0ce 100644 --- a/src/coreclr/jit/emitjmps.h +++ b/src/coreclr/jit/emitjmps.h @@ -62,9 +62,8 @@ JMP_SMALL(lt , ge , blt ) // LT JMP_SMALL(ge , lt , bge ) // GE JMP_SMALL(ltu , geu , bltu ) // LTU JMP_SMALL(geu , ltu , bgeu ) // GEU -// C extension -JMP_SMALL(eqz , nez , beqz ) // EQZ -JMP_SMALL(nez , eqz , bnez ) // NEZ +JMP_SMALL(eqz , nez , beqz ) // EQZ +JMP_SMALL(nez , eqz , bnez ) // NEZ #else #error Unsupported or unset target architecture diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index e1e8abf3a68d90..1ee0ef62c6d7fc 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -591,6 +591,9 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t code |= ((imm >> 1) & 0x3ff) << 21; code |= ((imm >> 20) & 0x1) << 31; break; + case INS_bnez: + case INS_beqz: + return emitIns_R_R_I(ins, attr, reg, REG_ZERO, imm, opt); default: NO_WAY("illegal ins within emitIns_R_I!"); break; @@ -601,6 +604,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t id->idIns(ins); id->idReg1(reg); id->idAddr()->iiaSetInstrEncode(code); + id->idCodeSize(4); appendToCurIG(id); diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 0670dbba0c840e..9aab3155636864 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -275,7 +275,6 @@ inline static bool isCondJumpInstruction(instruction ins) case INS_bge: case INS_bltu: case INS_bgeu: - // C extension case INS_beqz: case INS_bnez: return true; From 12e9c887696f70843009651d51c7283308a2a467 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Thu, 27 Mar 2025 13:07:04 +0100 Subject: [PATCH 09/24] resolv merge conflicts --- src/coreclr/jit/codegenriscv64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 571783c8626b2c..2ff61757924837 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1162,8 +1162,7 @@ void CodeGen::genCodeForIncSaturate(GenTree* tree) emitAttr attr = emitActualTypeSize(tree); GetEmitter()->emitIns_R_R_I(INS_addi, attr, targetReg, operandReg, 1); - // bne targetReg, zero, 2 * 4 - GetEmitter()->emitIns_R_R_I(INS_bne, attr, targetReg, REG_R0, 8); + GetEmitter()->emitIns_R_I(INS_bnez, attr, targetReg, 2 << 2); GetEmitter()->emitIns_R_R_I(INS_xori, attr, targetReg, targetReg, -1); genProduceReg(tree); @@ -1689,6 +1688,8 @@ void CodeGen::genLclHeap(GenTree* tree) // and localloc size is a multiple of STACK_ALIGN. // Loop: + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16); emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); @@ -1701,8 +1702,7 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16); - // goto Loop - emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2); + emit->emitIns_J(INS_bnez, loop, regCnt); lastTouchDelta = 0; } From ce1043e54e8037a4280c45de9bf0690232378878 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 28 Mar 2025 09:36:31 +0100 Subject: [PATCH 10/24] add label to tickling pages --- src/coreclr/jit/codegenriscv64.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 2ff61757924837..ccde3bf24a8770 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1758,11 +1758,13 @@ void CodeGen::genLclHeap(GenTree* tree) regSet.verifyRegUsed(rPageSize); emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0); + BasicBlock* tickleLoop = genCreateTempLabel(); + genDefineTempLabel(tickleLoop); // tickle the page - this triggers a page fault when on the guard page emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0); emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize); - emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2); + emit->emitIns_J(INS_bgeu, tickleLoop, tempReg | (regCnt << 5)); // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // we're going to assume the worst and probe. From 54e16bb854c22a563a0657ce5ec5f8f5c750ac07 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 28 Mar 2025 10:52:08 +0100 Subject: [PATCH 11/24] apply format.patch --- src/coreclr/jit/emitriscv64.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 1ee0ef62c6d7fc..759b27fab24f91 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -88,13 +88,7 @@ const emitJumpKind emitReverseJumpKinds[] = { { assert(emitter::isCondJumpInstruction(ins)); - return emitJumpKindToIns( - emitReverseJumpKind( - emitInsToJumpKind( - ins - ) - ) - ); + return emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); } /***************************************************************************** From 2ca6b868f799b4fc771833c6db3a27fc0b6fc2b8 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 28 Mar 2025 14:05:21 +0100 Subject: [PATCH 12/24] replace loop counter with pointer-deepened loop --- src/coreclr/jit/codegenriscv64.cpp | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index ccde3bf24a8770..871d371492e883 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -839,7 +839,7 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) { regNumber rAddr; - regNumber rCnt = REG_NA; // Invalid + regNumber rTarget = REG_NA; // Invalid regMaskTP regMask; regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers @@ -880,13 +880,11 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu uCntBytes -= 4; } - unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use. - - // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline. - // When it is 10 or greater, we will emit a loop containing a sd instruction. + // When uCntBytes is 11 * REGSIZE_BYTES or less, we will emit a sequence of sd instructions inline. + // When it is 12 * REGSIZE_BYTES or greater, we will emit a loop containing a sd instruction. // In both of these cases the sd instruction will write two zeros to memory // and we will use a single str instruction at the end whenever we have an odd count. - if (uCntSlots >= 12) + if (uCntBytes >= 12 * REGSIZE_BYTES) useLoop = true; if (useLoop) @@ -894,39 +892,36 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // We pick the next lowest register number for rCnt noway_assert(availMask != RBM_NONE); regMask = genFindLowestBit(availMask); - rCnt = genRegNumFromMask(regMask); + rTarget = genRegNumFromMask(regMask); availMask &= ~regMask; - noway_assert(uCntSlots >= 2); - assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming + noway_assert(uCntBytes >= 2 * REGSIZE_BYTES); + assert((genRegMask(rTarget) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rTarget is not a live incoming // argument reg - if (uCntSlots % 4 == 0) + if (uCntBytes % (4 * REGSIZE_BYTES) == 0) { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 4); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rTarget, (ssize_t)uCntBytes); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 16 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 24 + padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES + 3 * padding); - GetEmitter()->emitIns_R_I(INS_bnez, EA_PTRSIZE, rCnt, -6 << 2); + GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rCnt, -5 << 2); uCntBytes %= REGSIZE_BYTES * 4; } else { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, rTarget, (ssize_t)uCntBytes); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rCnt, rCnt, -1); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); - - GetEmitter()->emitIns_R_I(INS_bnez, EA_PTRSIZE, rCnt, -4 << 2); + GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rCnt, -3 << 2); uCntBytes %= REGSIZE_BYTES * 2; } From 80d2de4577dedc595c613543982bd57a92d7b7f5 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 28 Mar 2025 14:22:55 +0100 Subject: [PATCH 13/24] apply format.patch --- src/coreclr/jit/codegenriscv64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 871d371492e883..c3d6c2324c8434 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -897,7 +897,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu noway_assert(uCntBytes >= 2 * REGSIZE_BYTES); assert((genRegMask(rTarget) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rTarget is not a live incoming - // argument reg + // argument reg if (uCntBytes % (4 * REGSIZE_BYTES) == 0) { From 44f989e33ab24a8d4e27c8765b96af8cea9454dd Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 28 Mar 2025 14:46:42 +0100 Subject: [PATCH 14/24] fix wrong reg name --- src/coreclr/jit/codegenriscv64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index c3d6c2324c8434..24ccf9a7c05205 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -909,7 +909,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 24 + padding); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES + 3 * padding); - GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rCnt, -5 << 2); + GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rTarget, -5 << 2); uCntBytes %= REGSIZE_BYTES * 4; } @@ -921,7 +921,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); - GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rCnt, -3 << 2); + GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rTarget, -3 << 2); uCntBytes %= REGSIZE_BYTES * 2; } From 87f1832152cd99f02403b6c6e055bdb0ccf23d5a Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Tue, 1 Apr 2025 14:41:42 +0200 Subject: [PATCH 15/24] fix loop for odd reg slots --- src/coreclr/jit/codegenriscv64.cpp | 103 +++++++++++------------------ 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 24ccf9a7c05205..1b5d79b414b4e0 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -839,7 +839,6 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) { regNumber rAddr; - regNumber rTarget = REG_NA; // Invalid regMaskTP regMask; regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers @@ -863,101 +862,77 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu else { // Load immediate into the InitReg register - instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, static_cast(untrLclLo)); GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg); *pInitRegZeroed = false; } - bool useLoop = false; - unsigned uCntBytes = untrLclHi - untrLclLo; - assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. + unsigned uLclBytes = untrLclHi - untrLclLo; + assert(uLclBytes % 4 == 0); // The smallest stack slot is always 4 bytes. unsigned int padding = untrLclLo & 0x7; if (padding) { assert(padding == 4); GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, 0); - uCntBytes -= 4; + uLclBytes -= 4; + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, padding); } - // When uCntBytes is 11 * REGSIZE_BYTES or less, we will emit a sequence of sd instructions inline. - // When it is 12 * REGSIZE_BYTES or greater, we will emit a loop containing a sd instruction. - // In both of these cases the sd instruction will write two zeros to memory - // and we will use a single str instruction at the end whenever we have an odd count. - if (uCntBytes >= 12 * REGSIZE_BYTES) - useLoop = true; + unsigned uRegSlots = uLclBytes / REGSIZE_BYTES; - if (useLoop) + if (uRegSlots >= 12) { - // We pick the next lowest register number for rCnt + // ensure loop will have even number of slots + // if there is odd number of slots, the last one will be handled later + ssize_t uLoopBytes = (uRegSlots & ~1) * REGSIZE_BYTES; + + regNumber rEndAddr; noway_assert(availMask != RBM_NONE); - regMask = genFindLowestBit(availMask); - rTarget = genRegNumFromMask(regMask); + regMask = genFindLowestBit(availMask); + rEndAddr = genRegNumFromMask(regMask); availMask &= ~regMask; - noway_assert(uCntBytes >= 2 * REGSIZE_BYTES); - assert((genRegMask(rTarget) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rTarget is not a live incoming - // argument reg + // rEndAddr is not a live incoming argument reg + assert((genRegMask(rEndAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); - if (uCntBytes % (4 * REGSIZE_BYTES) == 0) - { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rTarget, (ssize_t)uCntBytes); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 16 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 24 + padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES + 3 * padding); - GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rTarget, -5 << 2); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -3 << 2); - uCntBytes %= REGSIZE_BYTES * 4; - } - else - { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, rTarget, (ssize_t)uCntBytes); - - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); - GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rTarget, -3 << 2); - - uCntBytes %= REGSIZE_BYTES * 2; - } + uLclBytes %= REGSIZE_BYTES * 2; } else { - while (uCntBytes >= REGSIZE_BYTES * 2) + while (uLclBytes >= REGSIZE_BYTES * 2) { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 8 + padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0 + padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding); - uCntBytes -= REGSIZE_BYTES * 2; - padding = 0; + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); + + uLclBytes -= REGSIZE_BYTES * 2; } } - if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) + if (uLclBytes >= REGSIZE_BYTES) { - if ((uCntBytes - REGSIZE_BYTES) == 0) - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); - } - uCntBytes -= REGSIZE_BYTES; + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); + uLclBytes -= REGSIZE_BYTES; } - if (uCntBytes > 0) + + if (uLclBytes > 0) { - assert(uCntBytes == sizeof(int)); - GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, padding); - uCntBytes -= sizeof(int); + assert(uLclBytes == 4); + GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, 0); + uLclBytes -= 4; } - noway_assert(uCntBytes == 0); + noway_assert(uLclBytes == 0); } void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) From 67383554ec36a51954fd9e6950f0fc5bb6ed17ee Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Wed, 2 Apr 2025 14:21:43 +0200 Subject: [PATCH 16/24] add some comments --- src/coreclr/jit/codegenriscv64.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 1b5d79b414b4e0..255978566463ca 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -883,8 +883,8 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu if (uRegSlots >= 12) { - // ensure loop will have even number of slots - // if there is odd number of slots, the last one will be handled later + // we make sure that the loop will have an even number of slots, + // if there is an odd number of slots, the last one will be handled later ssize_t uLoopBytes = (uRegSlots & ~1) * REGSIZE_BYTES; regNumber rEndAddr; @@ -919,6 +919,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu } } + // check and zero the last register-sized stack slot (odd number) if (uLclBytes >= REGSIZE_BYTES) { GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); From c278cb48209cb355ddb1477d725c0593e011e76a Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Thu, 3 Apr 2025 12:37:15 +0200 Subject: [PATCH 17/24] replace old comment style with a new one --- src/coreclr/jit/emitriscv64.cpp | 55 +++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 759b27fab24f91..5c574ff0a569c5 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -40,20 +40,33 @@ const emitJumpKind emitReverseJumpKinds[] = { #include "emitjmps.h" }; -/***************************************************************************** - * Look up the instruction for a jump kind - */ - +//------------------------------------------------------------------------ +// emitJumpKindToIns: convert jump kind to instruction +// +// Arguments: +// jumpKind - jump kind to convert +// +// Return Value: +// instruction - a jump instruction +// /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind) { assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions)); return emitJumpKindInstructions[jumpKind]; } -/***************************************************************************** - * Look up the (conditional) jump kind for an instruction. - */ - +//------------------------------------------------------------------------ +// emitInsToJumpKind: convert conditional jump instruction to jump kind +// +// Arguments: +// ins - conditional jump instruction +// +// Assumptions: +// Instruction is defined in emitJumpKindInstructions +// +// Return Value: +// emitJumpKind - jump kind +// /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins) { assert(emitter::isCondJumpInstruction(ins)); @@ -70,20 +83,30 @@ const emitJumpKind emitReverseJumpKinds[] = { unreached(); } -/***************************************************************************** - * Reverse the conditional jump - */ - +//------------------------------------------------------------------------ +// emitReverseJumpKind: reverse the jump kind +// +// Arguments: +// jumpKind - jump kind to reverse +// +// Return Value: +// emitJumpKind - an opposite jump kind +// /*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind) { assert(jumpKind < EJ_COUNT); return emitReverseJumpKinds[jumpKind]; } -/***************************************************************************** - * Reverse the conditional jump instruction - */ - +//------------------------------------------------------------------------ +// emitReverseJumpIns: reverse the conditional jump instruction +// +// Arguments: +// ins - conditional jump instruction to reverse +// +// Return Value: +// instruction - an opposite conditional jump instruction +// /*static*/ instruction emitter::emitReverseJumpIns(instruction ins) { assert(emitter::isCondJumpInstruction(ins)); From 62896e04f6491974dd1746b71eebe378871f67ef Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Thu, 3 Apr 2025 15:46:33 +0200 Subject: [PATCH 18/24] optimize loop --- src/coreclr/jit/codegenriscv64.cpp | 40 ++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 255978566463ca..e30ff0d573b100 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -883,10 +883,6 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu if (uRegSlots >= 12) { - // we make sure that the loop will have an even number of slots, - // if there is an odd number of slots, the last one will be handled later - ssize_t uLoopBytes = (uRegSlots & ~1) * REGSIZE_BYTES; - regNumber rEndAddr; noway_assert(availMask != RBM_NONE); regMask = genFindLowestBit(availMask); @@ -896,15 +892,39 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // rEndAddr is not a live incoming argument reg assert((genRegMask(rEndAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); + // we make sure that the loop will have an even number of slots, + // if there is an odd number of slots, the last one will be handled later - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); + ssize_t uLoopBytes = (uRegSlots & ~3) * REGSIZE_BYTES; + + if (uLoopBytes > 0) + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); + + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 3 * REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -3 << 2); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2); - uLclBytes %= REGSIZE_BYTES * 2; + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, rEndAddr, REG_ZERO); + + uLclBytes -= uLoopBytes; + } + + uLoopBytes = (uRegSlots & ~1) * REGSIZE_BYTES - uLoopBytes; + + if (uLoopBytes > 0) + { + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, uLoopBytes); + + uLclBytes -= uLoopBytes; + } } else { From e78cafbfeb32fae1ec6d0da63bce12bd86a5b986 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 4 Apr 2025 16:43:45 +0200 Subject: [PATCH 19/24] remove addr incrementation in no-loop case --- src/coreclr/jit/codegenriscv64.cpp | 50 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index e30ff0d573b100..ae541ad4e21f71 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -876,7 +876,6 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu assert(padding == 4); GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, 0); uLclBytes -= 4; - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, padding); } unsigned uRegSlots = uLclBytes / REGSIZE_BYTES; @@ -896,20 +895,20 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // if there is an odd number of slots, the last one will be handled later ssize_t uLoopBytes = (uRegSlots & ~3) * REGSIZE_BYTES; + + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); if (uLoopBytes > 0) { - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); - - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 2 * REGSIZE_BYTES); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 3 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + 2 * REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + 3 * REGSIZE_BYTES); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES); GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2); - GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, rEndAddr, REG_ZERO); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rEndAddr, 0); uLclBytes -= uLoopBytes; } @@ -918,39 +917,40 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu if (uLoopBytes > 0) { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rEndAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rEndAddr, padding + REGSIZE_BYTES); GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, uLoopBytes); uLclBytes -= uLoopBytes; } + + // check and zero the last register-sized stack slot (odd number) + if (uLclBytes >= REGSIZE_BYTES) + { + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, padding + uLoopBytes); + uLclBytes -= REGSIZE_BYTES; + } } else { - while (uLclBytes >= REGSIZE_BYTES * 2) - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, REGSIZE_BYTES); + ssize_t uCnt = 0; - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES); - - uLclBytes -= REGSIZE_BYTES * 2; + while (uCnt < uLclBytes) + { + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); + uCnt += REGSIZE_BYTES; } - } + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, uCnt); - // check and zero the last register-sized stack slot (odd number) - if (uLclBytes >= REGSIZE_BYTES) - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, 0); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES); - uLclBytes -= REGSIZE_BYTES; + uLclBytes -= uCnt; } if (uLclBytes > 0) { assert(uLclBytes == 4); - GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, padding); uLclBytes -= 4; } noway_assert(uLclBytes == 0); From 848c521ea963fff364c19f15f100f5184814832a Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Fri, 4 Apr 2025 17:06:13 +0200 Subject: [PATCH 20/24] restrict addr incrementation --- src/coreclr/jit/codegenriscv64.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index ae541ad4e21f71..cfdb4202583176 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -879,6 +879,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu } unsigned uRegSlots = uLclBytes / REGSIZE_BYTES; + ssize_t uCnt = 0; if (uRegSlots >= 12) { @@ -908,8 +909,9 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES); GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rEndAddr, 0); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rEndAddr, -uLoopBytes); + uCnt += uLoopBytes; uLclBytes -= uLoopBytes; } @@ -917,40 +919,36 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu if (uLoopBytes > 0) { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rEndAddr, padding); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rEndAddr, padding + REGSIZE_BYTES); - - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, uLoopBytes); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt + REGSIZE_BYTES); + uCnt += uLoopBytes; // += 2 * REGSIZE_BYTES uLclBytes -= uLoopBytes; } // check and zero the last register-sized stack slot (odd number) if (uLclBytes >= REGSIZE_BYTES) { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, padding + uLoopBytes); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); + + uCnt += uLoopBytes; uLclBytes -= REGSIZE_BYTES; } } else { - ssize_t uCnt = 0; - while (uCnt < uLclBytes) { GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); uCnt += REGSIZE_BYTES; } - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, uCnt); - uLclBytes -= uCnt; } if (uLclBytes > 0) { assert(uLclBytes == 4); - GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, padding); + GetEmitter()->emitIns_R_R_I(INS_sw, EA_4BYTE, REG_R0, rAddr, padding + uCnt); uLclBytes -= 4; } noway_assert(uLclBytes == 0); From ddf99012153a9a6e328c942f4796a272a769795c Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Mon, 7 Apr 2025 08:57:33 +0200 Subject: [PATCH 21/24] apply format.patch --- src/coreclr/jit/codegenriscv64.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index cfdb4202583176..52eae96f8a2bb1 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -879,7 +879,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu } unsigned uRegSlots = uLclBytes / REGSIZE_BYTES; - ssize_t uCnt = 0; + ssize_t uCnt = 0; if (uRegSlots >= 12) { @@ -896,7 +896,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // if there is an odd number of slots, the last one will be handled later ssize_t uLoopBytes = (uRegSlots & ~3) * REGSIZE_BYTES; - + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); if (uLoopBytes > 0) @@ -911,7 +911,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rEndAddr, -uLoopBytes); - uCnt += uLoopBytes; + uCnt += uLoopBytes; uLclBytes -= uLoopBytes; } @@ -922,7 +922,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt + REGSIZE_BYTES); - uCnt += uLoopBytes; // += 2 * REGSIZE_BYTES + uCnt += uLoopBytes; // += 2 * REGSIZE_BYTES uLclBytes -= uLoopBytes; } @@ -931,7 +931,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu { GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); - uCnt += uLoopBytes; + uCnt += uLoopBytes; uLclBytes -= REGSIZE_BYTES; } } From fe1558731eb72f4e7346b697931d2f9406ce7957 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Mon, 7 Apr 2025 10:48:00 +0200 Subject: [PATCH 22/24] adjust uCnt --- src/coreclr/jit/codegenriscv64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 52eae96f8a2bb1..f00fa718c0df8f 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -931,7 +931,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu { GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); - uCnt += uLoopBytes; + uCnt += REGSIZE_BYTES; uLclBytes -= REGSIZE_BYTES; } } From a7ac979da53e159b370d50cf4c5354df8ae75a8e Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Mon, 7 Apr 2025 16:05:52 +0200 Subject: [PATCH 23/24] avoid incrementing rAddr outside of the loop --- src/coreclr/jit/codegenriscv64.cpp | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 54d066ca90995d..be2aaa2d5bc56a 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -878,8 +878,9 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu uLclBytes -= 4; } - unsigned uRegSlots = uLclBytes / REGSIZE_BYTES; - ssize_t uCnt = 0; + unsigned uRegSlots = uLclBytes / REGSIZE_BYTES; + ssize_t uCnt = 0; + ssize_t uLoopBytes = 0; if (uRegSlots >= 12) { @@ -895,7 +896,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // we make sure that the loop will have an even number of slots, // if there is an odd number of slots, the last one will be handled later - ssize_t uLoopBytes = (uRegSlots & ~3) * REGSIZE_BYTES; + uLoopBytes = (uRegSlots & ~3) * REGSIZE_BYTES; GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rEndAddr, rAddr, uLoopBytes); @@ -909,28 +910,14 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES); GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2); - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rEndAddr, -uLoopBytes); - - uCnt += uLoopBytes; - uLclBytes -= uLoopBytes; - } - - uLoopBytes = (uRegSlots & ~1) * REGSIZE_BYTES - uLoopBytes; - - if (uLoopBytes > 0) - { - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt + REGSIZE_BYTES); - - uCnt += uLoopBytes; // += 2 * REGSIZE_BYTES + uCnt = -REGSIZE_BYTES; uLclBytes -= uLoopBytes; } - // check and zero the last register-sized stack slot (odd number) - if (uLclBytes >= REGSIZE_BYTES) + // handle remainder from uLoopBytes mod 4; r = {0,1,2,3} + while (uLclBytes >= REGSIZE_BYTES) { GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); - uCnt += REGSIZE_BYTES; uLclBytes -= REGSIZE_BYTES; } From 3bbf6556dbad10b9acc69a53799b03a7e1bfa90a Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Thu, 10 Apr 2025 16:54:49 +0200 Subject: [PATCH 24/24] fix segfault in some tests that use padding in init frames --- src/coreclr/jit/codegenriscv64.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 779afe5ac03700..21061834bf7289 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -910,11 +910,12 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES); GetEmitter()->emitIns_R_R_I(INS_blt, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2); - uCnt = -REGSIZE_BYTES; + // uCnt = 0; rAddr = (uRegSlots // 4 + 4) * REGSIZE_BYTES + uCnt = -4 * REGSIZE_BYTES; uLclBytes -= uLoopBytes; } - // handle remainder from uLoopBytes mod 4; r = {0,1,2,3} + // handle remainder from uRegSlots mod 4; r = {0,1,2,3} while (uLclBytes >= REGSIZE_BYTES) { GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); @@ -929,6 +930,8 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + uCnt); uCnt += REGSIZE_BYTES; } + // rAddr = 0; uCnt = (uRegSlots + 1) * REGSIZE_BYTES + uCnt -= REGSIZE_BYTES; uLclBytes -= uCnt; }