Skip to content

Instantly share code, notes, and snippets.

@arichardson
Last active August 19, 2025 17:07
Show Gist options
  • Select an option

  • Save arichardson/74c5809ff08384662240065357f117f4 to your computer and use it in GitHub Desktop.

Select an option

Save arichardson/74c5809ff08384662240065357f117f4 to your computer and use it in GitHub Desktop.
"Fake" CHERI LLVM support for shift-add instruction (static code size experiment)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td
index 8b0bfd05d277..e45c7eabccdf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td
@@ -410,6 +410,20 @@ def : InstAlias<"csetboundsimm $cd, $cs1, $imm",
(CSetBoundsImm GPCR:$cd, GPCR:$cs1, uimm12:$imm), 0>;
}
+let Predicates = [HasCheri, HasStdExtZba] in {
+def CSH1ADD : Cheri_rr<0x31, "csh1add", GPCR, GPCR, GPR>;
+def CSH2ADD : Cheri_rr<0x32, "csh2add", GPCR, GPCR, GPR>;
+def CSH3ADD : Cheri_rr<0x33, "csh3add", GPCR, GPCR, GPR>;
+def CSH4ADD : Cheri_rr<0x34, "csh4add", GPCR, GPCR, GPR>;
+}
+let Predicates = [HasCheri, HasStdExtZba, IsRV64] in {
+def CSH1ADD_UW : Cheri_rr<0x35, "csh1add.uw", GPCR, GPCR, GPR>;
+def CSH2ADD_UW : Cheri_rr<0x36, "csh2add.uw", GPCR, GPCR, GPR>;
+def CSH3ADD_UW : Cheri_rr<0x37, "csh3add.uw", GPCR, GPCR, GPR>;
+def CSH4ADD_UW : Cheri_rr<0x38, "csh4add.uw", GPCR, GPCR, GPR>;
+def CADD_UW : Cheri_rr<0x39, "add.uw", GPCR, GPCR, GPR>;
+}
+
//===----------------------------------------------------------------------===//
// Pointer-Arithmetic Instructions
//===----------------------------------------------------------------------===//
@@ -1503,6 +1517,85 @@ def : Pat<(load (XLenVT GPR:$rs1)),
let Predicates = [HasCheri, IsCapMode, HasStdExtD, IsRV64] in
def : Pat<(load (XLenVT GPR:$rs1)), (FMV_D_X (LD_DDC (XLenVT GPR:$rs1)))>;
+
+let Predicates = [HasCheri, HasStdExtZba] in {
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl GPR:$rs1, (XLenVT 1)))),
+ (CSH1ADD GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl GPR:$rs1, (XLenVT 2)))),
+ (CSH2ADD GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl GPR:$rs1, (XLenVT 3)))),
+ (CSH3ADD GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl GPR:$rs1, (XLenVT 4)))),
+ (CSH4ADD GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, sh1add_op:$rs1),
+ (CSH1ADD sh1add_op:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, sh2add_op:$rs1),
+ (CSH2ADD sh2add_op:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, sh3add_op:$rs1),
+ (CSH3ADD sh3add_op:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, sh4add_op:$rs1),
+ (CSH4ADD sh4add_op:$rs1, GPCR:$rs2)>;
+
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 6))),
+ (CSH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 10))),
+ (CSH1ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 18))),
+ (CSH1ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 34))),
+ (CSH1ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 12))),
+ (CSH2ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 20))),
+ (CSH2ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 36))),
+ (CSH2ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 68))),
+ (CSH2ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 24))),
+ (CSH3ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 40))),
+ (CSH3ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 72))),
+ (CSH3ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 136))),
+ (CSH3ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 48))),
+ (CSH4ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 80))),
+ (CSH4ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 144))),
+ (CSH4ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (mul_oneuse GPR:$rs1, (XLenVT 272))),
+ (CSH4ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPCR:$rs2)>;
+}
+let Predicates = [HasCheri, HasStdExtZba, IsRV64] in {
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)))), (CSH1ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)))), (CSH2ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)))), (CSH3ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 4)))), (CSH4ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF))), (CSH1ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF))), (CSH2ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF))), (CSH3ADD_UW GPR:$rs1, GPCR:$rs2)>;
+def : Pat<(cptradd GPCR:$rs2, (XLenVT (and (shl GPR:$rs1, (i64 4)), 0xFFFFFFFFF))), (CSH4ADD_UW GPR:$rs1, GPCR:$rs2)>;
+
+def : Pat<(CLenVT (cptradd GPCR:$rs2, sh1add_uw_op:$rs1)),
+ (CSH1ADD_UW sh1add_uw_op:$rs1, GPCR:$rs2)>;
+def : Pat<(CLenVT (cptradd GPCR:$rs2, sh2add_uw_op:$rs1)),
+ (CSH2ADD_UW sh2add_uw_op:$rs1, GPCR:$rs2)>;
+def : Pat<(CLenVT (cptradd GPCR:$rs2, sh3add_uw_op:$rs1)),
+ (CSH3ADD_UW sh3add_uw_op:$rs1, GPCR:$rs2)>;
+def : Pat<(CLenVT (cptradd GPCR:$rs2, sh4add_uw_op:$rs1)),
+ (CSH4ADD_UW sh4add_uw_op:$rs1, GPCR:$rs2)>;
+def : Pat<(CLenVT (cptradd GPCR:$rs2, (XLenVT (and GPR:$rs1, 0xFFFFFFFF)))),
+ (CADD_UW GPR:$rs1, GPCR:$rs2)>;
+// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
+// def : Pat<(cptradd GPCR:$rs2, (XLenVT (and GPR:$rs1, (XLenVT 0x1FFFFFFFE)))), (CSH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>;
+// def : Pat<(cptradd GPCR:$rs2, (XLenVT (and GPR:$rs1, (XLenVT 0x3FFFFFFFC)))), (CSH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
+// def : Pat<(cptradd GPCR:$rs2, (XLenVT (and GPR:$rs1, (XLenVT 0x7FFFFFFF8)))), (CSH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
+// def : Pat<(cptradd GPCR:$rs2, (XLenVT (and GPR:$rs1, (XLenVT 0xFFFFFFFF0)))), (CSH4ADD_UW (SRLI GPR:$rs1, 4), GPR:$rs2)>;
+}
+
/// DDC-relative stores
let Predicates = [HasCheri, IsCapMode] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index caeedfa652e4..66f9c7ef0c5c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -253,10 +253,12 @@ def Shifted32OnesMask : PatLeaf<(imm), [{
def sh1add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<1>", [], [], 6>;
def sh2add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<2>", [], [], 6>;
def sh3add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<3>", [], [], 6>;
+def sh4add_op : ComplexPattern<XLenVT, 1, "selectSHXADDOp<4>", [], [], 6>;
def sh1add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<1>", [], [], 6>;
def sh2add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<2>", [], [], 6>;
def sh3add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<3>", [], [], 6>;
+def sh4add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<4>", [], [], 6>;
//===----------------------------------------------------------------------===//
// Instruction class templates
@@ -307,6 +309,8 @@ def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
+def SH4ADD : ALU_rr<0b0010000, 0b111, "sh4add">,
+ Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
} // Predicates = [HasStdExtZba]
let Predicates = [HasStdExtZba, IsRV64] in {
@@ -320,6 +324,8 @@ def SH2ADD_UW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">,
Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
def SH3ADD_UW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+def SH4ADD_UW : ALUW_rr<0b0010000, 0b111, "sh4add.uw">,
+ Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbbOrZbkb] in {
@@ -662,6 +668,8 @@ def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2),
(SH2ADD GPR:$rs1, GPR:$rs2)>;
def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2),
(SH3ADD GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add (shl GPR:$rs1, (XLenVT 4)), non_imm12:$rs2),
+ (SH4ADD GPR:$rs1, GPR:$rs2)>;
// More complex cases use a ComplexPattern.
def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
@@ -670,6 +678,8 @@ def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
(SH2ADD sh2add_op:$rs1, GPR:$rs2)>;
def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
(SH3ADD sh3add_op:$rs1, GPR:$rs2)>;
+def : Pat<(add sh4add_op:$rs1, non_imm12:$rs2),
+ (SH4ADD sh4add_op:$rs1, GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
(SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
@@ -677,18 +687,32 @@ def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
(SH1ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2),
(SH1ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 34)), GPR:$rs2),
+ (SH1ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2),
(SH2ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2),
(SH2ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2),
(SH2ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 68)), GPR:$rs2),
+ (SH2ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2),
(SH3ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
(SH3ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
(SH3ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 136)), GPR:$rs2),
+ (SH3ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 48)), GPR:$rs2),
+ (SH4ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 80)), GPR:$rs2),
+ (SH4ADD (SH2ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 144)), GPR:$rs2),
+ (SH4ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 272)), GPR:$rs2),
+ (SH4ADD (SH4ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy4:$i),
(SH2ADD (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i)),
@@ -750,6 +774,9 @@ def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
(SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 4)), non_imm12:$rs2)),
+ (SH4ADD_UW GPR:$rs1, GPR:$rs2)>;
+
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
(SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
@@ -757,6 +784,8 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2))
(SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 4)), 0xFFFFFFFFF), non_imm12:$rs2)),
+ (SH4ADD_UW GPR:$rs1, GPR:$rs2)>;
// More complex cases use a ComplexPattern.
def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)),
@@ -765,6 +794,8 @@ def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)),
(SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)),
(SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (add sh4add_uw_op:$rs1, non_imm12:$rs2)),
+ (SH4ADD_UW sh4add_uw_op:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)),
(SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
@@ -772,6 +803,8 @@ def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)),
(SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)),
(SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>;
+def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF0), non_imm12:$rs2)),
+ (SH4ADD (SRLIW GPR:$rs1, 4), GPR:$rs2)>;
// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)),
@@ -780,6 +813,8 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
(SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
(SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
+def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF0), non_imm12:$rs2)),
+ (SH4ADD_UW (SRLI GPR:$rs1, 4), GPR:$rs2)>;
def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)),
(SH1ADD (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)),
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment