[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 04/07: RISC-V Support
From: |
Ludovic Courtès |
Subject: |
[Guile-commits] 04/07: RISC-V Support |
Date: |
Tue, 31 Jan 2023 10:13:07 -0500 (EST) |
civodul pushed a commit to branch wip-lightening-riscv
in repository guile.
commit 2806dd865f56ea96067785b2b705ab8e2c92c13f
Author: Ekaitz Zarraga <ekaitz@elenq.tech>
AuthorDate: Sun May 9 16:39:03 2021 +0200
RISC-V Support
---
lightening.am | 6 +-
lightening.h | 2 +
lightening/endian.h | 2 +
lightening/lightening.c | 33 +
lightening/riscv-cpu.c | 2464 +++++++++++++++++++++++++++++++++++++++++++++++
lightening/riscv-fpu.c | 858 +++++++++++++++++
lightening/riscv.c | 327 +++++++
lightening/riscv.h | 194 ++++
8 files changed, 3885 insertions(+), 1 deletion(-)
diff --git a/lightening.am b/lightening.am
index 2c9089ead..ba55f2c7f 100644
--- a/lightening.am
+++ b/lightening.am
@@ -40,6 +40,7 @@ lightening_extra_files = \
$(lightening)/lightening/mips.h \
$(lightening)/lightening/ppc.h \
$(lightening)/lightening/x86.h \
+ $(lightening)/lightening/riscv.h \
\
$(lightening)/lightening/aarch64.c \
$(lightening)/lightening/aarch64-cpu.c \
@@ -55,4 +56,7 @@ lightening_extra_files = \
$(lightening)/lightening/ppc-fpu.c \
$(lightening)/lightening/x86.c \
$(lightening)/lightening/x86-cpu.c \
- $(lightening)/lightening/x86-sse.c
+ $(lightening)/lightening/x86-sse.c \
+ $(lightening)/lightening/riscv.c \
+ $(lightening)/lightening/riscv-cpu.c \
+ $(lightening)/lightening/riscv-fpu.c
diff --git a/lightening.h b/lightening.h
index efa5dfdf1..fd39a6406 100644
--- a/lightening.h
+++ b/lightening.h
@@ -77,6 +77,8 @@ jit_same_fprs (jit_fpr_t a, jit_fpr_t b)
# include "lightening/aarch64.h"
#elif defined(__s390__) || defined(__s390x__)
# include "lightening/s390.h"
+#elif defined(__riscv__) || defined(__riscv)
+# include "lightening/riscv.h"
#endif
enum jit_reloc_kind
diff --git a/lightening/endian.h b/lightening/endian.h
index 3b34a1518..e3689a117 100644
--- a/lightening/endian.h
+++ b/lightening/endian.h
@@ -38,6 +38,8 @@
# else
# define __WORDSIZE 64
# endif
+# elif defined(__riscv_xlen)
+# define __WORDSIZE __riscv_xlen /* riscv */
# else /* From FreeBSD 9.1
stdint.h */
# if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \
(UINTPTR_MAX == UINT64_MAX)
diff --git a/lightening/lightening.c b/lightening/lightening.c
index ad990eb01..593429dcd 100644
--- a/lightening/lightening.c
+++ b/lightening/lightening.c
@@ -269,6 +269,22 @@ get_temp_gpr(jit_state_t *_jit)
#ifdef JIT_TMP1
case 1:
return JIT_TMP1;
+#endif
+#ifdef JIT_TMP2
+ case 2:
+ return JIT_TMP2;
+#endif
+#ifdef JIT_TMP3
+ case 3:
+ return JIT_TMP3;
+#endif
+#ifdef JIT_TMP4
+ case 4:
+ return JIT_TMP4;
+#endif
+#ifdef JIT_TMP5
+ case 5:
+ return JIT_TMP5;
#endif
default:
abort();
@@ -559,6 +575,8 @@ jit_emit_addr(jit_state_t *j)
# include "aarch64.c"
#elif defined(__s390__) || defined(__s390x__)
# include "s390.c"
+#elif defined(__riscv__) || defined(__riscv)
+# include "riscv.c"
#endif
#define JIT_IMPL_0(stem, ret) \
@@ -1156,6 +1174,9 @@ static const jit_gpr_t user_callee_save_gprs[] = {
#endif
#ifdef JIT_V9
, JIT_V9
+#endif
+#ifdef JIT_V10
+ , JIT_V10
#endif
};
@@ -1184,6 +1205,18 @@ static const jit_fpr_t user_callee_save_fprs[] = {
#ifdef JIT_VF7
, JIT_VF7
#endif
+#ifdef JIT_VF8
+ , JIT_VF8
+#endif
+#ifdef JIT_VF9
+ , JIT_VF9
+#endif
+#ifdef JIT_VF10
+ , JIT_VF10
+#endif
+#ifdef JIT_VF11
+ , JIT_VF11
+#endif
};
#define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
diff --git a/lightening/riscv-cpu.c b/lightening/riscv-cpu.c
new file mode 100644
index 000000000..37c252a78
--- /dev/null
+++ b/lightening/riscv-cpu.c
@@ -0,0 +1,2464 @@
+/*
+ * Copyright (C) 2012-2021 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ * Ekaitz Zarraga <ekaitz@elenq.tech>
+ */
+#define stack_framesize (200 + 64)
+#define simm6_p(im) ((im) <= 31 && (im) >= -32)
+#define simm12_p(im) ((im) <= 2047 && (im) >= -2048)
+#define simm20_p(im) ((im) <= 524287 && (im) >= -524288)
+#define simm32_p(im) ((im) <= 2147483647LL && (im) >= -2147483648LL)
+
+typedef union {
+ struct {
+ uint32_t opcode : 7;
+ uint32_t rd : 5;
+ uint32_t funct3 : 3;
+ uint32_t rs1 : 5;
+ uint32_t rs2 : 5;
+ uint32_t funct7 : 7;
+ } R;
+ struct {
+ uint32_t opcode : 7;
+ uint32_t rd : 5;
+ uint32_t funct3 : 3;
+ uint32_t rs1 : 5;
+ uint32_t rs2 : 5;
+ uint32_t rl : 1;
+ uint32_t aq : 1;
+ uint32_t funct5 : 5;
+ } R4;
+ struct {
+ uint32_t opcode : 7;
+ uint32_t rd : 5;
+ uint32_t funct3 : 3;
+ uint32_t rs1 : 5;
+ uint32_t imm11_0 : 12;
+ } I;
+#if __WORDSIZE == 64
+ struct {
+ uint32_t opcode : 7;
+ uint32_t rd : 5;
+ uint32_t funct3 : 3;
+ uint32_t rs1 : 5;
+ uint32_t shamt : 6;
+ uint32_t imm6_0 : 6;
+ } IS;
+#endif
+ struct {
+ uint32_t opcode : 7;
+ uint32_t imm4_0 : 5;
+ uint32_t funct3 : 3;
+ uint32_t rs1 : 5;
+ uint32_t rs2 : 5;
+ uint32_t imm11_5 : 7;
+ } S;
+ struct {
+ uint32_t opcode : 7;
+ uint32_t imm11 : 1;
+ uint32_t imm4_1 : 4;
+ uint32_t funct3 : 3;
+ uint32_t rs1 : 5;
+ uint32_t rs2 : 5;
+ uint32_t imm10_5 : 6;
+ uint32_t imm12 : 1;
+ } B;
+ struct {
+ uint32_t opcode : 7;
+ uint32_t rd : 5;
+ uint32_t imm31_12 : 20;
+ } U;
+ struct {
+ uint32_t opcode : 7;
+ uint32_t rd : 5;
+ uint32_t imm19_12 : 8;
+ uint32_t imm11 : 1;
+ uint32_t imm10_1 : 10;
+ uint32_t imm20 : 1;
+ } J;
+ uint32_t w;
+} instr_t;
+
+
+// TODO: Compressed instruction support
+
+static uint32_t
+Rtype(int32_t op, int32_t rd, int32_t fct, int32_t rs1, int32_t rs2,
+ int32_t fct2)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(rd & ~0x1f));
+ assert(!(fct & ~0x07));
+ assert(!(rs1 & ~0x1f));
+ assert(!(rs2 & ~0x1f));
+ assert(!(fct2 & ~0x7f));
+ i.R.opcode = op;
+ i.R.rd = rd;
+ i.R.funct3 = fct;
+ i.R.rs1 = rs1;
+ i.R.rs2 = rs2;
+ i.R.funct7 = fct2;
+ return i.w;
+}
+
+static uint32_t
+R4type(int32_t op, int32_t rd, int32_t fct, int32_t rs1, int32_t rs2,
+ int32_t aq, int32_t rl, int32_t fct5)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(rd & ~0x1f));
+ assert(!(fct & ~0x07));
+ assert(!(rs1 & ~0x1f));
+ assert(!(rs2 & ~0x1f));
+ assert(!(fct5 & ~0x1f));
+ assert(!(aq & ~0x01));
+ assert(!(rl & ~0x01));
+ i.R4.opcode = op;
+ i.R4.rd = rd;
+ i.R4.funct3 = fct;
+ i.R4.rs1 = rs1;
+ i.R4.rs2 = rs2;
+ i.R4.aq = aq;
+ i.R4.rl = rl;
+ i.R4.funct5 = fct5;
+ return i.w;
+}
+
+static uint32_t
+Itype(int32_t op, int32_t rd, int32_t fct, int32_t rs1, int32_t imm)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(rd & ~0x1f));
+ assert(!(fct & ~0x07));
+ assert(!(rs1 & ~0x1f));
+ assert(simm12_p(imm));
+ i.I.opcode = op;
+ i.I.rd = rd;
+ i.I.funct3 = fct;
+ i.I.rs1 = rs1;
+ i.I.imm11_0 = imm;
+ return i.w;
+}
+
+# if __WORDSIZE == 64
+ static uint32_t
+IStype(int32_t op, int32_t rd, int32_t fct, int32_t rs1, int32_t sh,
+ int32_t imm)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(rd & ~0x1f));
+ assert(!(fct & ~0x07));
+ assert(!(rs1 & ~0x1f));
+ assert(!(sh & ~0x3f));
+ assert(simm6_p(imm));
+ i.IS.opcode = op;
+ i.IS.rd = rd;
+ i.IS.funct3 = fct;
+ i.IS.rs1 = rs1;
+ i.IS.shamt = sh;
+ i.IS.imm6_0 = imm;
+ return i.w;
+}
+# endif
+
+static uint32_t
+Stype(int32_t op, int32_t fct, int32_t rs1, int32_t rs2, int32_t imm)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(fct & ~0x07));
+ assert(!(rs1 & ~0x1f));
+ assert(!(rs2 & ~0x1f));
+ assert(simm12_p(imm));
+ i.S.opcode = op;
+ i.S.imm4_0 = imm & 0x1f;
+ i.S.funct3 = fct;
+ i.S.rs1 = rs1;
+ i.S.rs2 = rs2;
+ i.S.imm11_5 = (imm >> 5) & 0x7f;
+ return i.w;
+}
+
+static uint32_t
+Btype(int32_t op, int32_t fct, int32_t rs1, int32_t rs2, int32_t imm)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(fct & ~0x07));
+ assert(!(rs1 & ~0x1f));
+ assert(!(rs2 & ~0x1f));
+ assert(!(imm & 1) && simm12_p(imm));
+ i.B.opcode = op;
+ i.B.imm11 = (imm >> 11) & 0x1;
+ i.B.imm4_1 = (imm >> 1) & 0xf;
+ i.B.funct3 = fct;
+ i.B.rs1 = rs1;
+ i.B.rs2 = rs2;
+ i.B.imm10_5 = (imm >> 5) & 0x3f;
+ i.B.imm12 = (imm >> 12) & 0x1;
+ return i.w;
+}
+
+static uint32_t
+Utype(int32_t op, int32_t rd, int32_t imm)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(rd & ~0x1f));
+ assert(simm20_p(imm));
+ i.U.opcode = op;
+ i.U.rd = rd;
+ i.U.imm31_12= imm;
+ return i.w;
+}
+
+static uint32_t
+Jtype(int32_t op, int32_t rd, int32_t imm)
+{
+ instr_t i;
+ assert(!(op & ~0x7f));
+ assert(!(rd & ~0x1f));
+ assert(!(imm & 1) && imm <= 1048575 && imm >= -1048576);
+ i.J.opcode = op;
+ i.J.rd = rd;
+ i.J.imm19_12= (imm >> 12) & 0xff;
+ i.J.imm11 = (imm >> 11) & 0x1;
+ i.J.imm10_1 = (imm >> 1) & 0x3ff;
+ i.J.imm20 = (imm >> 20) & 0x1;
+ return i.w;
+}
+
+/*
+ * RV32I Base Instruction Set
+ */
+#define _LUI(rd, imm) Utype(55, rd, imm)
+#define _AUIPC(rd, imm) Utype(23, rd, imm)
+#define _JAL(rd, imm) Jtype(111, rd, imm)
+#define _JALR(rd, rs1, imm) Itype(103, rd, 0, rs1, imm)
+#define _BEQ(rs1, rs2, imm) Btype(99, 0, rs1, rs2, imm)
+#define _BNE(rs1, rs2, imm) Btype(99, 1, rs1, rs2, imm)
+#define _BLT(rs1, rs2, imm) Btype(99, 4, rs1, rs2, imm)
+#define _BGE(rs1, rs2, imm) Btype(99, 5, rs1, rs2, imm)
+#define _BLTU(rs1, rs2, imm) Btype(99, 6, rs1, rs2, imm)
+#define _BGEU(rs1, rs2, imm) Btype(99, 7, rs1, rs2, imm)
+#define _LB(rd, rs1, imm) Itype(3, rd, 0, rs1, imm)
+#define _LH(rd, rs1, imm) Itype(3, rd, 1, rs1, imm)
+#define _LW(rd, rs1, imm) Itype(3, rd, 2, rs1, imm)
+#define _LBU(rd, rs1, imm) Itype(3, rd, 4, rs1, imm)
+#define _LHU(rd, rs1, imm) Itype(3, rd, 5, rs1, imm)
+#define _SB(rs1, rs2, imm) Stype(35, 0, rs1, rs2, imm)
+#define _SH(rs1, rs2, imm) Stype(35, 1, rs1, rs2, imm)
+#define _SW(rs1, rs2, imm) Stype(35, 2, rs1, rs2, imm)
+#define _ADDI(rd, rs1, imm) Itype(19, rd, 0, rs1, imm)
+#define _SLTI(rd, rs1, imm) Itype(19, rd, 2, rs1, imm)
+#define _SLTIU(rd, rs1, imm) Itype(19, rd, 3, rs1, imm)
+#define _XORI(rd, rs1, imm) Itype(19, rd, 4, rs1, imm)
+#define _ORI(rd, rs1, imm) Itype(19, rd, 6, rs1, imm)
+#define _ANDI(rd, rs1, imm) Itype(19, rd, 7, rs1, imm)
+#if __WORDSIZE == 32
+# define _SLLI(rd, rs1, imm) Rtype(19, rd, 1, rs1, imm, 0)
+# define _SRLI(rd, rs1, imm) Rtype(19, rd, 5, rs1, imm, 0)
+# define _SRAI(rd, rs1, imm) Rtype(19, rd, 5, rs1, imm, 32)
+#endif
+#define _ADD(rd, rs1, rs2) Rtype(51, rd, 0, rs1, rs2, 0)
+#define _SUB(rd, rs1, rs2) Rtype(51, rd, 0, rs1, rs2, 32)
+#define _SLL(rd, rs1, rs2) Rtype(51, rd, 1, rs1, rs2, 0)
+#define _SLT(rd, rs1, rs2) Rtype(51, rd, 2, rs1, rs2, 0)
+#define _SLTU(rd, rs1, rs2) Rtype(51, rd, 3, rs1, rs2, 0)
+#define _XOR(rd, rs1, rs2) Rtype(51, rd, 4, rs1, rs2, 0)
+#define _SRL(rd, rs1, rs2) Rtype(51, rd, 5, rs1, rs2, 0)
+#define _SRA(rd, rs1, rs2) Rtype(51, rd, 5, rs1, rs2, 32)
+#define _OR(rd, rs1, rs2) Rtype(51, rd, 6, rs1, rs2, 0)
+#define _AND(rd, rs1, rs2) Rtype(51, rd, 7, rs1, rs2, 0)
+#define _FENCE(imm) Itype( 15, 0, 0, 0, imm)
+#define _FENCE_I(imm) Itype( 15, 0, 1, 0, imm)
+#define _ECALL() Itype(115, 0, 0, 0, 0)
+#define _EBREAK() Itype(115, 0, 0, 0, 1)
+#define _CSRRW(rd, rs1, csr) Itype(115, rd, 1, rs1, csr)
+#define _CSRRS(rd, rs1, csr) Itype(115, rd, 2, rs1, csr)
+#define _CSRRC(rd, rs1, csr) Itype(115, rd, 3, rs1, csr)
+#define _CSRRWI(rd, zimm, csr) Itype(115, rd, 5, zimm, csr)
+#define _CSRRSI(rd, zimm, csr) Itype(115, rd, 6, zimm, csr)
+#define _CSRRCI(rd, zimm, csr) Itype(115, rd, 7, zimm, csr)
+/*
+ * RV64I Base Instruction Set (in addition to RV32I)
+ */
+#define _LWU(rd, rs1, imm) Itype(3, rd, 6, rs1, imm)
+#define _LD(rd, rs1, imm) Itype(3, rd, 3, rs1, imm)
+#define _SD(rs1, rs2, imm) Stype(35, 3, rs1, rs2, imm)
+#if __WORDSIZE == 64
+# define _SLLI(rd, rs1, sh) IStype(19, rd, 1, rs1, sh, 0)
+# define _SRLI(rd, rs1, sh) IStype(19, rd, 5, rs1, sh, 0)
+# define _SRAI(rd, rs1, sh) IStype(19, rd, 5, rs1, sh, 16)
+#endif
+#define _ADDIW(rd, rs1, imm) Itype(27, rd, 0, rs1, imm)
+#define _SLLIW(rd, rs1, imm) Rtype(27, rd, 1, rs1, imm, 0)
+#define _SRLIW(rd, rs1, imm) Rtype(27, rd, 3, rs1, imm, 0)
+#define _SRAIW(rd, rs1, imm) Rtype(27, rd, 3, rs1, imm, 32)
+#define _ADDW(rd, rs1, imm) Rtype(59, rd, 0, rs1, imm, 0)
+#define _SUBW(rd, rs1, imm) Rtype(59, rd, 0, rs1, imm, 32)
+#define _SLLW(rd, rs1, imm) Rtype(59, rd, 1, rs1, imm, 0)
+#define _SRLW(rd, rs1, imm) Rtype(59, rd, 5, rs1, imm, 0)
+#define _SRAW(rd, rs1, imm) Rtype(59, rd, 5, rs1, imm, 32)
+/*
+ * RV32M Standard Extension
+ */
+#define _MUL(rd, rs1, rs2) Rtype(51, rd, 0, rs1, rs2, 1)
+#define _MULH(rd, rs1, rs2) Rtype(51, rd, 1, rs1, rs2, 1)
+#define _MULHSU(rd, rs1, rs2) Rtype(51, rd, 2, rs1, rs2, 1)
+#define _MULHU(rd, rs1, rs2) Rtype(51, rd, 3, rs1, rs2, 1)
+#define _DIV(rd, rs1, rs2) Rtype(51, rd, 4, rs1, rs2, 1)
+#define _DIVU(rd, rs1, rs2) Rtype(51, rd, 5, rs1, rs2, 1)
+#define _REM(rd, rs1, rs2) Rtype(51, rd, 6, rs1, rs2, 1)
+#define _REMU(rd, rs1, rs2) Rtype(51, rd, 7, rs1, rs2, 1)
+/*
+ * RV64M Standard Extension (in addition to RV32M)
+ */
+#define _MULW(rd, rs1, rs2) Rtype(59, rd, 0, rs1, rs2, 1)
+#define _DIVW(rd, rs1, rs2) Rtype(59, rd, 4, rs1, rs2, 1)
+#define _DIVUW(rd, rs1, rs2) Rtype(59, rd, 5, rs1, rs2, 1)
+#define _REMW(rd, rs1, rs2) Rtype(59, rd, 6, rs1, rs2, 1)
+#define _REMUW(rd, rs1, rs2) Rtype(59, rd, 7, rs1, rs2, 1)
+/*
+ * RV32A Standard Extension
+ */
+#define _LR_W(rd, rs1, rl, aq) R4type(47, rd, 2, rs1, 0, rl, aq,
2)
+#define _SC_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
3)
+#define _AMOSWAP_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
1)
+#define _AMOADD_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
0)
+#define _AMOXOR_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
4)
+#define _AMOAND_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
12)
+#define _AMOOR_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
8)
+#define _AMOMIN_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
16)
+#define _AMOMAX_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
20)
+#define _AMOMINU_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
24)
+#define _AMOMAXU_W(rd, rs1, rs2, rl, aq) R4type(47, rd, 2, rs1, rs2, rl, aq,
28)
+/*
+ * RV64A Standard Extension (in addition to RV32A)
+ */
+#define _LR_D(rd, rs1, rl, aq) R4type(47, rd, 3, rs1, 0, rl, aq,
2)
+#define _SC_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
3)
+#define _AMOSWAP_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
1)
+#define _AMOADD_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
0)
+#define _AMOXOR_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
4)
+#define _AMOAND_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
12)
+#define _AMOOR_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
8)
+#define _AMOMIN_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
16)
+#define _AMOMAX_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
20)
+#define _AMOMINU_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
24)
+#define _AMOMAXU_D(rd, rs1, rs2, rl, aq) R4type(47, rd, 3, rs1, rs2, rl, aq,
28)
+/*
+ * Pseudo Instructions
+ */
+#define _NOP() _ADDI((jit_gpr_regno(_ZERO)),\
+ (jit_gpr_regno(_ZERO)), 0)
+#define _MV(r0, r1) _ADDI(r0, r1, 0)
+#define _NOT(r0, r1) _XORI(r0, r1, -1)
+#define _NEG(r0, r1) _SUB(r0, (jit_gpr_regno(_ZERO)), r1)
+#define _NEGW(r0, r1) _SUBW(r0, (jit_gpr_regno(_ZERO)), r1)
+#define _SEXT_W(r0, r1) _ADDIW(r0, r1, 0)
+#define _RET() _JALR((jit_gpr_regno(_ZERO)),\
+ (jit_gpr_regno(_RA)), 0)
+
+
+
+// Help to make all easier
+#define em_wp(jit, inst) emit_u32_with_pool(jit, inst)
+
+/*
+ * JIT INSTRUCTIONS
+ */
+
+// Binary ALU operations
+static void addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+
+static void subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+
+static void muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+
+static void divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+
+static void remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+
+static void andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void rshi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0);
+static void rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0);
+
+
+// Four operand ALU operations
+static void qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
int32_t r3);
+static void qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
int32_t r3);
+static void qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
jit_word_t i0);
+static void qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
jit_word_t i0);
+
+static void qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
int32_t r3);
+static void qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
int32_t r3);
+static void qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
jit_word_t i0);
+static void qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
jit_word_t i0);
+
+
+// Unary ALU operations
+static void negr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void comr(jit_state_t *_jit, int32_t r0, int32_t r1);
+
+
+// Transfer operations
+static void movr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void movi(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+
+static uint64_t patch_load_from_pool(uint64_t instrs, uint32_t off);
+static jit_reloc_t emit_load_from_pool(jit_state_t *_jit, uint64_t insts);
+static jit_reloc_t mov_addr(jit_state_t *_jit, int32_t r0);
+static jit_reloc_t movi_from_pool(jit_state_t *_jit, int32_t r0);
+
+static void extr_c(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void extr_s(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void extr_us(jit_state_t *_jit, int32_t r0, int32_t r1);
+
+# if __WORDSIZE == 64
+static void extr_i(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1);
+#endif
+
+
+// Branch instructions
+static uint32_t patch_cc_jump(uint32_t inst, int32_t offset);
+static jit_reloc_t emit_cc_jump(jit_state_t *_jit, uint32_t inst);
+
+static jit_reloc_t bltr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t blti(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bler(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t blei(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bler_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t beqr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bger(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bger_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bgtr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bner(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+
+static jit_reloc_t bmsr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bmcr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t boaddr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bosubr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1);
+
+
+// Store operations
+static void str_c(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void str_uc(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void str_s(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void str_i(jit_state_t *_jit, int32_t r0, int32_t r1);
+#if __WORDSIZE == 64
+static void str_i(jit_state_t *_jit, int32_t r0, int32_t r1);
+#endif
+
+static void sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0);
+static void sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0);
+static void sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0);
+#if __WORDSIZE == 64
+static void sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0);
+#endif
+
+static void stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+# if __WORDSIZE == 64
+static void stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+#endif
+
+static void stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1);
+static void stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1);
+static void stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1);
+# if __WORDSIZE == 64
+static void stxi_l(jit_state_t *_jit,jit_word_t i0,int32_t r0,int32_t r1);
+# endif
+
+
+// Load operations
+static void ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1);
+# if __WORDSIZE == 64
+static void ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1);
+# endif
+
+static void ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+# if __WORDSIZE == 64
+static void ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+# endif
+
+static void ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+# if __WORDSIZE == 64
+static void ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+#endif
+
+static void ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+# if __WORDSIZE == 64
+static void ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void ldxi_l(jit_state_t *_jit,int32_t r0,int32_t r1,jit_word_t i0);
+#endif
+
+
+// Argument management
+//static void pushr(jit_state_t *_jit, int32_t r0);
+//static void popr(jit_state_t *_jit, int32_t r0);
+static void ret(jit_state_t *_jit);
+static void retr(jit_state_t *_jit, int32_t r0);
+static void reti(jit_state_t *_jit, jit_word_t i0);
+static void retval_c(jit_state_t *_jit, int32_t r0);
+static void retval_uc(jit_state_t *_jit, int32_t r0);
+static void retval_s(jit_state_t *_jit, int32_t r0);
+static void retval_us(jit_state_t *_jit, int32_t r0);
+static void retval_i(jit_state_t *_jit, int32_t r0);
+# if __WORDSIZE == 64
+static void retval_ui(jit_state_t *_jit, int32_t r0);
+static void retval_l(jit_state_t *_jit, int32_t r0);
+#endif
+
+// Jump and return
+static uint32_t patch_jump(uint32_t inst, int32_t offset);
+static jit_reloc_t emit_jump(jit_state_t *_jit, uint32_t inst);
+
+static void callr(jit_state_t *_jit, int32_t r0);
+static void calli(jit_state_t *_jit, jit_word_t i0);
+static void jmpi_with_link(jit_state_t *_jit, jit_word_t i0);
+static void pop_link_register(jit_state_t *_jit);
+static void push_link_register(jit_state_t *_jit);
+static void jmpr(jit_state_t *_jit, int32_t r0);
+static void jmpi(jit_state_t *_jit, jit_word_t i0);
+static jit_reloc_t jmp(jit_state_t *_jit);
+
+
+// Atomic operations
+static void ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc);
+static void str_atomic(jit_state_t *_jit, int32_t loc, int32_t val);
+static void swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc,
+ int32_t val);
+static void cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc,
+ int32_t expected, int32_t desired);
+
+// Byte swapping operations
+static void bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1);
+# if __WORDSIZE == 64
+static void
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1);
+#endif
+
+// Others
+static void nop(jit_state_t *_jit, int32_t im);
+static void mfence(jit_state_t *_jit);
+static void breakpoint(jit_state_t *_jit);
+
+
+
+/*
+ * Binary ALU operations
+ */
+static void
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _ADD(r0, r1, r2));
+}
+static void
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _ADDI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ addr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ // TODO: Not sure if this is correct
+ jit_gpr_t t0;
+ if (r0 == r1) {
+ t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), jit_gpr_regno(t0), r1));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ addr(_jit, r0, r1, r2);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), r0, r1));
+ }
+}
+
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0;
+ if (r0 == r1) {
+ t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), jit_gpr_regno(t0), r1));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ addi(_jit, r0, r1, i0);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), r0, r1));
+ }
+}
+
+static void
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0;
+ t0 = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(t0), jit_gpr_regno(JIT_CARRY));
+ addcr(_jit, r0, r1, r2);
+ addcr(_jit, r0, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0;
+ t0 = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(t0), jit_gpr_regno(JIT_CARRY));
+ addci(_jit, r0, r1, i0);
+ addcr(_jit, r0, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SUB(r0, r1, r2));
+}
+
+static void
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ addi(_jit, r0, r1, -i0);
+}
+
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+
+ jit_gpr_t t0;
+ if (r0 == r1) {
+ t0 = get_temp_gpr(_jit);
+ subr(_jit, jit_gpr_regno(t0), r1, r2);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), r1, jit_gpr_regno(t0)));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ addr(_jit, r0, r1, r2);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), r1, r0));
+ }
+}
+
+static void
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+
+ jit_gpr_t t0;
+ if (r0 == r1) {
+ t0 = get_temp_gpr(_jit);
+ subi(_jit, jit_gpr_regno(t0), r1, i0);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), r1, jit_gpr_regno(t0)));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ addi(_jit, r0, r1, i0);
+ em_wp(_jit, _SLTU(jit_gpr_regno(JIT_CARRY), r1, r0));
+ }
+}
+
+static void
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0;
+ t0 = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(t0), jit_gpr_regno(JIT_CARRY));
+ subcr(_jit, r0, r1, r2);
+ subcr(_jit, r0, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0;
+ t0 = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(t0), jit_gpr_regno(JIT_CARRY));
+ subci(_jit, r0, r1, i0);
+ subcr(_jit, r0, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ mulr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _MUL(r0, r1, r2));
+}
+
+static void
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _DIV(r0, r1, r2));
+}
+
+static void
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ divr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _DIVU(r0, r1, r2));
+}
+
+static void
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ divr_u(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ remr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _REM(r0, r1, r2));
+}
+static void
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ remr_u(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _REMU(r0, r1, r2));
+}
+
+static void
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _AND(r0, r1, r2));
+}
+
+static void
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _ANDI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ em_wp(_jit, _AND(r0, r1, jit_gpr_regno(t0)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _OR(r0, r1, r2));
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _ORI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ orr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _XOR(r0, r1, r2));
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _XORI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ xorr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SLL(r0, r1, r2));
+}
+
+static void
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _SLLI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ lshr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SRA(r0, r1, r2));
+}
+
+static void
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _SRAI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ rshr(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SRL(r0, r1, r2));
+}
+
+static void
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ if (simm12_p(i0)){
+ em_wp(_jit, _SRLI(r0, r1, i0));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ rshr_u(_jit, r0, r1, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+
+/*
+ * Four operand ALU operations
+ */
+static void
+iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3,
+ jit_bool_t sign){
+ if(r0 == r2 || r0 == r3){
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _MUL(jit_gpr_regno(t0), r2, r3));
+ if(sign)
+ em_wp(_jit, _MULH(r1, r2, r3));
+ else
+ em_wp(_jit, _MULHU(r1, r2, r3));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+ em_wp(_jit, _MUL(r0, r2, r3));
+ if(sign)
+ em_wp(_jit, _MULH(r1, r2, r3));
+ else
+ em_wp(_jit, _MULHU(r1, r2, r3));
+}
+
+static void
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqmulr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqmulr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ iqmulr(_jit, r0, r1, r2, jit_gpr_regno(t0), 1);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ iqmulr(_jit, r0, r1, r2, jit_gpr_regno(t0), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3,
+ jit_bool_t sign){
+ if(r0 == r2 || r0 == r3){
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ if(sign){
+ em_wp(_jit, _DIV(jit_gpr_regno(t0), r2, r3));
+ em_wp(_jit, _REM(r1, r2, r3));
+ } else {
+ em_wp(_jit, _DIVU(jit_gpr_regno(t0), r2, r3));
+ em_wp(_jit, _REMU(r1, r2, r3));
+ }
+ movr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+ if(sign){
+ em_wp(_jit, _DIV(r0, r2, r3));
+ em_wp(_jit, _REM(r1, r2, r3));
+ } else {
+ em_wp(_jit, _DIVU(r0, r2, r3));
+ em_wp(_jit, _REMU(r1, r2, r3));
+ }
+}
+
+static void
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqdivr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqdivr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ iqdivr(_jit, r0, r1, r2, jit_gpr_regno(t0), 1);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ iqdivr(_jit, r0, r1, r2, jit_gpr_regno(t0), 0);
+ unget_temp_gpr(_jit);
+}
+
+
+/*
+ * Unary ALU operations
+ */
+static void
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _NEG(r0, r1));
+}
+
+static void
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _NOT(r0, r1));
+}
+
+
+/*
+ * Branch instructions
+ */
+static uint32_t
+patch_cc_jump(uint32_t inst, int32_t offset){
+ instr_t i;
+ i.w = inst;
+ i.B.imm11 = (offset >> 11) & 0x1;
+ i.B.imm4_1 = (offset >> 1) & 0xf;
+ i.B.imm10_5 = (offset >> 5) & 0x3f;
+ i.B.imm12 = (offset >> 12) & 0x1;
+ return i.w;
+}
+
+static jit_reloc_t
+emit_cc_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc; // Offset is from current PC
+ int32_t off = (uint8_t*)jit_address(_jit) - pc_base;
+ jit_reloc_t ret =
+ jit_reloc (_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc, pc_base, 0);
+ uint8_t cc_jump_width = 12;
+ if (add_pending_literal(_jit, ret, cc_jump_width - 1)) {
+ em_wp(_jit, patch_cc_jump(inst, off));
+ return ret;
+ }
+ }
+}
+
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BLT(r0, r1, 0));
+}
+
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bltr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BLTU(r0, r1, 0));
+}
+
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bltr_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BGE(r1, r0, 0));
+}
+
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bler(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BGEU(r1, r0, 0));
+}
+
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bler_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BEQ(r0, r1, 0));
+}
+
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = beqr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BGE(r0, r1, 0));
+}
+
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bger(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BGEU(r0, r1, 0));
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bger_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bltr(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bgtr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bltr_u(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bgtr_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return emit_cc_jump(_jit, _BNE(r0, r1, 0));
+}
+
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bner(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andr(_jit, jit_gpr_regno(t0), r0, r1);
+ jit_reloc_t ret = bner(_jit, jit_gpr_regno(t0), jit_gpr_regno(_ZERO));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andi(_jit, jit_gpr_regno(t0), r0, i0);
+ jit_reloc_t ret = bner(_jit, jit_gpr_regno(t0), jit_gpr_regno(_ZERO));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andr(_jit, jit_gpr_regno(t0), r0, r1);
+ jit_reloc_t ret = beqr(_jit, jit_gpr_regno(t0), jit_gpr_regno(_ZERO));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andi(_jit, jit_gpr_regno(t0), r0, i0);
+ jit_reloc_t ret = beqr(_jit, jit_gpr_regno(t0), jit_gpr_regno(_ZERO));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ // NOTE: We need tons of temporaries because RISC-V doesn't provide any
+ // easy way to solve this. We need to do it in software.
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTI(jit_gpr_regno(t1), r1, 0));
+ em_wp(_jit, _SLT(jit_gpr_regno(t2), jit_gpr_regno(t0), r0));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ jit_reloc_t ret = bner(_jit, jit_gpr_regno(t1), jit_gpr_regno(t2));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = boaddr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(jit_gpr_regno(t1), jit_gpr_regno(t0), r0));
+ movr(_jit, r0, jit_gpr_regno(t0));
+
+ jit_reloc_t ret = bnei(_jit, jit_gpr_regno(t1), 0);
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = boaddr_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTI(jit_gpr_regno(t1), r1, 0));
+ em_wp(_jit, _SLT(jit_gpr_regno(t2), jit_gpr_regno(t0), r0));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ jit_reloc_t ret = beqr(_jit, jit_gpr_regno(t1), jit_gpr_regno(t2));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bxaddr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(jit_gpr_regno(t1), jit_gpr_regno(t0), r0));
+ movr(_jit, r0, jit_gpr_regno(t0));
+
+ jit_reloc_t ret = beqi(_jit, jit_gpr_regno(t1), 0);
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bxaddr_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ subr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTI(jit_gpr_regno(t1), r1, 0));
+ em_wp(_jit, _SLT(jit_gpr_regno(t2), r0, jit_gpr_regno(t0)));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ jit_reloc_t ret = bner(_jit, jit_gpr_regno(t1), jit_gpr_regno(t2));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bosubr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ subr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(jit_gpr_regno(t1), r0, jit_gpr_regno(t0)));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ jit_reloc_t ret = beqi(_jit, jit_gpr_regno(t1), 1);
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bosubr_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ subr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTI(jit_gpr_regno(t1), r1, 0));
+ em_wp(_jit, _SLT(jit_gpr_regno(t2), r0, jit_gpr_regno(t0)));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ jit_reloc_t ret = beqr(_jit, jit_gpr_regno(t1), jit_gpr_regno(t2));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bxsubr(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ subr(_jit, jit_gpr_regno(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(jit_gpr_regno(t1), r0, jit_gpr_regno(t0)));
+ movr(_jit, r0, jit_gpr_regno(t0));
+ jit_reloc_t ret = beqi(_jit, jit_gpr_regno(t1), 0);
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jit_reloc_t ret = bxsubr_u(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+
+/*
+ * Transfer operations
+ */
+static void
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _MV(r0, r1));
+}
+
+static void
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ int32_t srcreg = jit_gpr_regno(_ZERO);
+ if (simm32_p(i0)){
+
+ int64_t hi = ((i0 + 0x800) >> 12) & 0xFFFFF;
+ int64_t lo = (int32_t)i0<<20>>20;
+
+ if(hi){
+ em_wp(_jit, _LUI(r0, hi));
+ srcreg = r0;
+ }
+
+ if(lo || hi == 0){
+ em_wp(_jit, _ADDI(r0, srcreg, lo));
+ }
+
+ } else {
+ // 64 bits: load in various steps
+ // lui, addi, slli, addi, slli, addi, slli, addi
+ int64_t hh = (i0>>44);
+ int64_t hl = (i0>>33) - (hh<<11);
+ int64_t lh = (i0>>22) - ((hh<<22) + (hl<<11));
+ int64_t lm = (i0>>11) - ((hh<<33) + (hl<<22) + (lh<<11));
+ int64_t ll = i0 - ((hh<<44) + (hl<<33) + (lh<<22) + (lm<<11));
+
+
+ em_wp(_jit, _LUI(r0, hh));
+ em_wp(_jit, _SLLI(r0, r0, 32));
+ em_wp(_jit, _SRLI(r0, r0, 33));
+ em_wp(_jit, _ADDI(r0, r0, hl));
+
+ em_wp(_jit, _SLLI(r0, r0, 11));
+ em_wp(_jit, _ADDI(r0, r0, lh));
+
+ em_wp(_jit, _SLLI(r0, r0, 11));
+ em_wp(_jit, _ADDI(r0, r0, lm));
+
+ em_wp(_jit, _SLLI(r0, r0, 11));
+ em_wp(_jit, _ADDI(r0, r0, ll));
+ }
+}
+
+typedef union{
+ struct{
+ instr_t auipc;
+ instr_t load; // `ld` in RV64 and `lw` in RV32
+ } inst;
+ uint64_t l;
+} load_from_pool_t;
+
+static uint64_t
+patch_load_from_pool(uint64_t instrs, uint32_t off){
+ load_from_pool_t i;
+ i.l = instrs;
+ i.inst.auipc.U.imm31_12 = off & 0xFFFFF000;
+ i.inst.load.I.imm11_0 = off & 0x00000FFF;
+ return i.l;
+}
+
+static jit_reloc_t
+emit_load_from_pool(jit_state_t *_jit, uint64_t insts)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc; // Offset is from current PC
+ int32_t off = (_jit->pc.uc - pc_base);
+ jit_reloc_t ret =
+ jit_reloc (_jit, JIT_RELOC_LOAD_FROM_POOL, 0, _jit->pc.uc, pc_base, 0);
+ uint8_t load_from_pool_width = 32;
+ if (add_pending_literal(_jit, ret, load_from_pool_width)) {
+ emit_u64(_jit, patch_load_from_pool(insts, off));
+ return ret;
+ }
+ }
+}
+static jit_reloc_t
+movi_from_pool(jit_state_t *_jit, int32_t r0)
+{
+ load_from_pool_t insts;
+ insts.inst.auipc.w = _AUIPC(r0, 0);
+#if __WORDSIZE == 64
+ insts.inst.load.w = _LD(r0, r0, 0);
+#elif __WORDSIZE == 32
+ insts.inst.load.w = _LW(r0, r0, 0);
+#endif
+ return emit_load_from_pool(_jit, insts.l);
+}
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
+{
+ return movi_from_pool(_jit, r0);
+}
+
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int rot = __WORDSIZE - 8;
+ lshi(_jit, r0, r1, rot);
+ rshi(_jit, r0, r0, rot);
+}
+
+static void
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int rot = __WORDSIZE - 8;
+ lshi(_jit, r0, r1, rot);
+ rshi_u(_jit, r0, r0, rot);
+}
+
+static void
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int rot = __WORDSIZE - 16;
+ lshi(_jit, r0, r1, rot);
+ rshi(_jit, r0, r0, rot);
+}
+
+static void
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int rot = __WORDSIZE - 16;
+ lshi(_jit, r0, r1, rot);
+ rshi_u(_jit, r0, r0, rot);
+}
+
+# if __WORDSIZE == 64
+static void
+extr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int rot = __WORDSIZE - 32;
+ lshi(_jit, r0, r1, rot);
+ rshi(_jit, r0, r0, rot);
+}
+static void
+extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int rot = __WORDSIZE - 32;
+ lshi(_jit, r0, r1, rot);
+ rshi_u(_jit, r0, r0, rot);
+}
+#endif
+
+/*
+ * Store operations
+ */
+static void
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SB(r0, r1, 0));
+}
+static void
+str_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SB(r0, r1, 0));
+}
+static void
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SH(r0, r1, 0));
+}
+static void
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SW(r0, r1, 0));
+}
+#if __WORDSIZE == 64
+static void
+str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SD(r0, r1, 0));
+}
+#endif
+
+static void
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ str_c(_jit, jit_gpr_regno(t0), r0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ str_s(_jit, jit_gpr_regno(t0), r0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ str_i(_jit, jit_gpr_regno(t0), r0);
+ unget_temp_gpr(_jit);
+}
+
+#if __WORDSIZE == 64
+static void
+sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ str_l(_jit, jit_gpr_regno(t0), r0);
+ unget_temp_gpr(_jit);
+}
+#endif
+
+static void
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+ str_c(_jit, jit_gpr_regno(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+ str_s(_jit, jit_gpr_regno(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+ str_i(_jit, jit_gpr_regno(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+# if __WORDSIZE == 64
+static void
+stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+ str_l(_jit, jit_gpr_regno(t0), r2);
+ unget_temp_gpr(_jit);
+}
+#endif
+
+static void
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _SB(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r0, i0);
+ str_c(_jit, jit_gpr_regno(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+
+static void
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _SH(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r0, i0);
+ str_s(_jit, jit_gpr_regno(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+
+static void
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _SW(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r0, i0);
+ str_i(_jit, jit_gpr_regno(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+# if __WORDSIZE == 64
+static void
+stxi_l(jit_state_t *_jit,jit_word_t i0,int32_t r0,int32_t r1)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _SD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r0, i0);
+ str_l(_jit, jit_gpr_regno(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+# endif
+
+
+/*
+ * Load operations
+ */
+static void
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LB(r0, r1, 0));
+}
+
+static void
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LBU(r0, r1, 0));
+}
+
+static void
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LH(r0, r1, 0));
+}
+
+static void
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LHU(r0, r1, 0));
+}
+
+static void
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LW(r0, r1, 0));
+}
+
+# if __WORDSIZE == 64
+static void
+ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LWU(r0, r1, 0));
+}
+
+static void
+ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LD(r0, r1, 0));
+}
+# endif
+
+
+static void
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_c(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_uc(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_s(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_us(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_i(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+# if __WORDSIZE == 64
+static void
+ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_ui(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_l(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+#endif
+
+
+
+
+static void
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_c(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_uc(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_s(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_us(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_i(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+# if __WORDSIZE == 64
+static void
+ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_ui(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_l(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+#endif
+
+
+
+
+static void
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_c(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_uc(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_us(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_s(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_i(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+# if __WORDSIZE == 64
+static void
+ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_ui(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+ldxi_l(jit_state_t *_jit,int32_t r0,int32_t r1,jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _LD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_l(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+
+/*
+ * Argument management
+ */
+
+// static void
+// pushr(jit_state_t *_jit, int32_t r0)
+// {
+// #if __WORDSIZE == 64
+// addi(jit_gpr_regno(_SP), -8);
+// em_wp(_SD(r0, jit_gpr_regno(_SP), 0));
+// #elif __WORDSIZE == 32
+// addi(jit_gpr_regno(_SP), -4);
+// em_wp(_SW(r0, jit_gpr_regno(_SP), 0));
+// #endif
+// }
+// static void
+// popr(jit_state_t *_jit, int32_t r0)
+// {
+// #if __WORDSIZE == 64
+// em_wp(_jit, _LD(r0, jit_gpr_regno(_SP), 0));
+// addi(jit_gpr_regno(_SP), 8);
+// #elif __WORDSIZE == 32
+// em_wp(_jit, _LW(r0, jit_gpr_regno(_SP), 0));
+// addi(jit_gpr_regno(_SP), 4);
+// #endif
+// }
+
+static void
+ret(jit_state_t *_jit)
+{
+ em_wp(_jit, _RET());
+}
+
+static void
+retr(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, jit_gpr_regno(_A0), r0);
+ ret(_jit);
+}
+
+static void
+reti(jit_state_t *_jit, jit_word_t i0)
+{
+ movi(_jit, jit_gpr_regno(_A0), i0);
+ ret(_jit);
+}
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+ extr_c(_jit, r0, jit_gpr_regno(_A0));
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+ extr_uc(_jit, r0, jit_gpr_regno(_A0));
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+ extr_s(_jit, r0, jit_gpr_regno(_A0));
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+ extr_us(_jit, r0, jit_gpr_regno(_A0));
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+ extr_i(_jit, r0, jit_gpr_regno(_A0));
+}
+
+# if __WORDSIZE == 64
+static void
+retval_ui(jit_state_t *_jit, int32_t r0)
+{
+ extr_ui(_jit, r0, jit_gpr_regno(_A0));
+}
+
+static void
+retval_l(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, r0, jit_gpr_regno(_A0));
+}
+#endif
+
+/*
+ * Jump and return instructions
+ */
+static uint32_t
+patch_jump(uint32_t inst, int32_t offset)
+{
+ instr_t i;
+ i.w = inst;
+ i.J.imm20 = (offset >> 20) & 0x1;
+ i.J.imm19_12= (offset >> 12) & 0xff;
+ i.J.imm11 = (offset >> 11) & 0x1;
+ i.J.imm10_1 = (offset >> 1) & 0x3ff;
+ return i.w;
+}
+static jit_reloc_t
+emit_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc; // Offset is from current PC
+ int32_t off = (uint8_t*)jit_address(_jit) - pc_base;
+ jit_reloc_t ret =
+ jit_reloc (_jit, JIT_RELOC_JMP_WITH_VENEER, 0, _jit->pc.uc, pc_base, 0);
+ uint8_t jump_width = 20;
+ if (add_pending_literal(_jit, ret, jump_width - 1)) {
+ em_wp(_jit, patch_jump(inst, off));
+ return ret;
+ }
+ }
+}
+
+static void
+callr(jit_state_t *_jit, int32_t r0)
+{
+ em_wp(_jit, _JALR(jit_gpr_regno(_RA), r0, 0));
+}
+
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t jumpoffset = i0 - (jit_word_t)(_jit->pc.uc);
+ if (simm20_p(jumpoffset)){
+ em_wp(_jit, _JAL(jit_gpr_regno(_RA), jumpoffset));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ callr(_jit, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+jmpi_with_link(jit_state_t *_jit, jit_word_t i0)
+{
+ calli(_jit, i0);
+}
+
+static void
+pop_link_register(jit_state_t *_jit)
+{
+}
+
+static void
+push_link_register(jit_state_t *_jit)
+{
+}
+
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+ em_wp(_jit, _JALR(jit_gpr_regno(_ZERO), r0, 0));
+}
+
+static void
+jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t jumpoffset = i0 - (jit_word_t)(_jit->pc.uc);
+ if (simm20_p(jumpoffset)){
+ em_wp(_jit, _JAL(jit_gpr_regno(_ZERO), jumpoffset));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ jmpr(_jit, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+jmp(jit_state_t *_jit)
+{
+ return emit_jump(_jit, _JAL(jit_gpr_regno(_ZERO), 0));
+}
+
+
+
+/*
+ * Atomic operations
+ */
+
+static void
+ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc)
+{
+ em_wp(_jit, _FENCE(0xFF));
+ ldr_i(_jit, dst, loc);
+ em_wp(_jit, _FENCE(0xFF));
+}
+
+static void
+str_atomic(jit_state_t *_jit, int32_t loc, int32_t val)
+{
+ em_wp(_jit, _FENCE(0xFF));
+ str_i(_jit, loc, val);
+ em_wp(_jit, _FENCE(0xFF));
+}
+
+static void
+swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val)
+{
+#if __WORDSIZE == 64
+ em_wp(_jit, _AMOSWAP_D(dst, loc, val, 1, 1));
+#elif __WORDSIZE == 32
+ em_wp(_jit, _AMOSWAP_W(dst, loc, val, 1, 1));
+#endif
+}
+
+static void
+cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected,
+ int32_t desired)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ void *retry = jit_address(_jit);
+
+#if __WORDSIZE == 64
+ em_wp(_jit, _LR_D(t0, loc, 0,0));
+#elif __WORDSIZE == 32
+ em_wp(_jit, _LR_W(t0, loc, 0,0));
+#endif
+
+ jit_reloc_t fail = bner(_jit, t0, expected);
+
+#if __WORDSIZE == 64
+ em_wp(_jit, _SC_D(t1, desired, loc, 0,0));
+#elif __WORDSIZE == 32
+ em_wp(_jit, _SC_W(t1, desired, loc, 0,0));
+#endif
+
+ jit_patch_there(_jit, bner(_jit, t1, jit_gpr_regno(_ZERO)), retry);
+
+ jit_patch_here(_jit, fail);
+ em_wp(_jit, _FENCE(0xFF));
+ movr(_jit, dst, t0);
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+}
+
+
+/*
+ * Byte swapping operations
+ * RISC-V Doesn't provide them by default.
+ * There's a B extension (Standard Extension for Bit Manipulation) draft, but
+ * it's not official yet:
+ * https://github.com/riscv/riscv-bitmanip
+ * Meanwhile, we need to implement them in software.
+ */
+static void
+bswapr_uany(jit_state_t *_jit, int32_t r0, int32_t r1, size_t size)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ andi(_jit, r0, r1, 0xFF);
+ for(int i = 1; i < size; i++){
+ lshi(_jit, r0, r0, 8);
+ rshi(_jit, t0, r1, 8*i);
+ andi(_jit, t0, t0, 0xFF);
+ orr(_jit, r0, r0, t0);
+ }
+ unget_temp_gpr(_jit);
+}
+
+static void
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ bswapr_uany(_jit, r0, r1, 2);
+}
+
+static void
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ bswapr_uany(_jit, r0, r1, 4);
+}
+
+# if __WORDSIZE == 64
+static void
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ bswapr_uany(_jit, r0, r1, 8);
+}
+#endif
+
+
+
+/*
+ * Others
+ * TODO
+ */
+static void
+nop(jit_state_t *_jit, int32_t im)
+{
+ for (; im > 0; im -= 4)
+ em_wp(_jit, _NOP());
+ assert(im == 0);
+}
+static void
+mfence(jit_state_t *_jit)
+{
+ // TODO: we may need it for atomic operations?
+}
+
+static void
+breakpoint(jit_state_t *_jit)
+{
+ em_wp(_jit, _EBREAK());
+}
diff --git a/lightening/riscv-fpu.c b/lightening/riscv-fpu.c
new file mode 100644
index 000000000..315ed8d14
--- /dev/null
+++ b/lightening/riscv-fpu.c
@@ -0,0 +1,858 @@
+/*
+ * RV32F Standard Extension
+ */
+#define _FLW(rd, rs1, im) Itype(7, rd, 2, rs1, im)
+#define _FSW(rs1, rs2, imm) Stype(39, 2, rs1, rs2, imm)
+#define _FMADD_S(rd, rs1, rs2, rs3) R4type(67, rd, 0, rs1, rs2, 0, rs3)
+#define _FMSUB_S(rd, rs1, rs2, rs3) R4type(71, rd, 0, rs1, rs2, 0, rs3)
+#define _FNMSUB_S(rd, rs1, rs2, rs3) R4type(75, rd, 0, rs1, rs2, 0, rs3)
+#define _FNMADD_S(rd, rs1, rs2, rs3) R4type(79, rd, 0, rs1, rs2, 0, rs3)
+#define _FADD_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 0)
+#define _FSUB_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 4)
+#define _FMUL_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 8)
+#define _FDIV_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 12)
+#define _FSQRT_S(rd, rs1) Rtype(83, rd, 0, rs1, 0, 44)
+#define _FSGNJ_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 16)
+#define _FSGNJN_S(rd, rs1, rs2) Rtype(83, rd, 1, rs1, rs2, 16)
+#define _FSGNJX_S(rd, rs1, rs2) Rtype(83, rd, 2, rs1, rs2, 16)
+#define _FMIN_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 20)
+#define _FMAX_S(rd, rs1, rs2) Rtype(83, rd, 1, rs1, rs2, 20)
+#define _FCVT_W_S(rd, rs1, rm) Rtype(83, rd, rm, rs1, 0, 96)
+#define _FCVT_WU_S(rd, rs1, rm) Rtype(83, rd, rm, rs1, 1, 96)
+#define _FMV_X_W(rd, rs1) Rtype(83, rd, 0, rs1, 0, 112)
+#define _FEQ_S(rd, rs1, rs2) Rtype(83, rd, 2, rs1, rs2, 80)
+#define _FLT_S(rd, rs1, rs2) Rtype(83, rd, 1, rs1, rs2, 80)
+#define _FLE_S(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 80)
+#define _FCLASS_S(rd, rs1) Rtype(83, rd, 1, rs1, 0, 112)
+#define _FCVT_S_W(rd, rs1, rm) Rtype(83, rd, rm, rs1, 0, 104)
+#define _FCVT_S_WU(rd, rs1, rm) Rtype(83, rd, rm, rs1, 1, 104)
+#define _FMV_W_X(rd, rs1) Rtype(83, rd, 0, rs1, 0, 120)
+/*
+ * RV64F Standard Extension (in addition to RV32F)
+ */
+#define _FCVT_L_S(rd, rs1, rm) Rtype(83, rd, rm, rs1, 2, 96)
+#define _FCVT_LU_S(rd, rs1, rm) Rtype(83, rd, rm, rs1, 3, 96)
+#define _FCVT_S_L(rd, rs1, rm) Rtype(83, rd, rm, rs1, 2, 104)
+#define _FCVT_S_LU(rd, rs1, rm) Rtype(83, rd, rm, rs1, 3, 104)
+/*
+ * RV32D Standard Extension
+ */
+#define _FLD(rd, rs1, im) Itype(7, rd, 3, rs1, im)
+#define _FSD(rs1, rs2, imm) Stype(39, 3, rs1, rs2, imm)
+#define _FMADD_D(rd, rs1, rs2, rs3) R4type(67, rd, 0, rs1, rs2, 1, rs3)
+#define _FMSUB_D(rd, rs1, rs2, rs3) R4type(71, rd, 0, rs1, rs2, 1, rs3)
+#define _FNMSUB_D(rd, rs1, rs2, rs3) R4type(75, rd, 0, rs1, rs2, 1, rs3)
+#define _FNMADD_D(rd, rs1, rs2, rs3) R4type(79, rd, 0, rs1, rs2, 1, rs3)
+#define _FADD_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 1)
+#define _FSUB_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 5)
+#define _FMUL_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 9)
+#define _FDIV_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 13)
+#define _FSQRT_D(rd, rs1) Rtype(83, rd, 0, rs1, 0, 45)
+#define _FSGNJ_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 17)
+#define _FSGNJN_D(rd, rs1, rs2) Rtype(83, rd, 1, rs1, rs2, 17)
+#define _FSGNJX_D(rd, rs1, rs2) Rtype(83, rd, 2, rs1, rs2, 17)
+#define _FMIN_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 21)
+#define _FMAX_D(rd, rs1, rs2) Rtype(83, rd, 1, rs1, rs2, 21)
+#define _FCVT_S_D(rd, rs1, rm) Rtype(83, rd, rm, rs1, 1, 32)
+#define _FCVT_D_S(rd, rs1, rm) Rtype(83, rd, rm, rs1, 0, 33)
+#define _FEQ_D(rd, rs1, rs2) Rtype(83, rd, 2, rs1, rs2, 81)
+#define _FLT_D(rd, rs1, rs2) Rtype(83, rd, 1, rs1, rs2, 81)
+#define _FLE_D(rd, rs1, rs2) Rtype(83, rd, 0, rs1, rs2, 81)
+#define _FCLASS_D(rd, rs1) Rtype(83, rd, 1, rs1, 0, 113)
+#define _FCVT_W_D(rd, rs1, rm) Rtype(83, rd, rm, rs1, 0, 97)
+#define _FCVT_WU_D(rd, rs1, rm) Rtype(83, rd, rm, rs1, 1, 97)
+#define _FCVT_D_W(rd, rs1, rm) Rtype(83, rd, rm, rs1, 0, 105)
+#define _FCVT_D_WU(rd, rs1, rm) Rtype(83, rd, rm, rs1, 1, 105)
+/*
+ * RV64D Standard Extension (in addition to RV32D)
+ */
+#define _FCVT_L_D(rd, rs1, rm) Rtype(83, rd, rm, rs1, 2, 97)
+#define _FCVT_LU_D(rd, rs1, rm) Rtype(83, rd, rm, rs1, 3, 97)
+#define _FMV_X_D(rd, rs1) Rtype(83, rd, 0, rs1, 0, 113)
+#define _FCVT_D_L(rd, rs1, rm) Rtype(83, rd, rm, rs1, 2, 105)
+#define _FCVT_D_LU(rd, rs1, rm) Rtype(83, rd, rm, rs1, 3, 105)
+#define _FMV_D_X(rd, rs1) Rtype(83, rd, 0, rs1, 0, 121)
+/*
+ * Pseudo instructions
+ */
+#define _FMV_S(r0, r1) _FSGNJ_S(r0, r1, r1)
+#define _FABS_S(r0, r1) _FSGNJX_S(r0, r1, r1)
+#define _FNEG_S(r0, r1) _FSGNJN_S(r0, r1, r1)
+#define _FMV_D(r0, r1) _FSGNJ_D(r0, r1, r1)
+#define _FABS_D(r0, r1) _FSGNJX_D(r0, r1, r1)
+#define _FNEG_D(r0, r1) _FSGNJN_D(r0, r1, r1)
+
+// Binary ALU operations
+static void addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+
+// Unary ALU operations
+static void sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void negr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void negr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void absr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void absr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+
+// Transfer operations
+static void movr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void movr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+
+// Argument management
+static void retr_f(jit_state_t *_jit, int32_t u);
+static void retr_d(jit_state_t *_jit, int32_t u);
+
+// Load operations
+static void ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0);
+static void ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0);
+
+// Store operations
+static void str_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void str_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static void sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0);
+static void stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1);
+static void sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0);
+static void stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1);
+
+// Branch instructions
+static jit_reloc_t bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bler_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bger_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bner_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bler_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bger_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bner_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+static jit_reloc_t bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1);
+
+/*
+ * Binary ALU operations
+ */
+static void
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FADD_S(r0, r1, r2));
+}
+static void
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FADD_D(r0, r1, r2));
+}
+static void
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FSUB_S(r0, r1, r2));
+}
+static void
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FSUB_D(r0, r1, r2));
+}
+static void
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FMUL_S(r0, r1, r2));
+}
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FMUL_D(r0, r1, r2));
+}
+static void
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FDIV_S(r0, r1, r2));
+}
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FDIV_D(r0, r1, r2));
+}
+
+/*
+ * Unary ALU operations
+ */
+static void
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FSQRT_S(r0, r1));
+}
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FSQRT_D(r0, r1));
+}
+static void
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FNEG_S(r0, r1));
+}
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FNEG_D(r0, r1));
+}
+static void
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FABS_S(r0, r1));
+}
+
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FABS_D(r0, r1));
+}
+
+
+/*
+ * Load operations
+ */
+static void
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FLW(r0, r1, 0));
+}
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FLD(r0, r1, 0));
+}
+static void
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_f(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_f(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+static void
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _FLW(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_f(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ ldr_d(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r1, r2);
+ ldr_d(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _FLD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r1, i0);
+ ldr_d(_jit, r0, jit_gpr_regno(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+
+
+/*
+ * Store operations
+ */
+static void
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FSW(r0, r1, 0));
+}
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FSD(r0, r1, 0));
+}
+static void
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ str_f(_jit, jit_gpr_regno(t0), r0);
+ unget_temp_gpr(_jit);
+}
+static void
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+ str_f(_jit, jit_gpr_regno(t0), r2);
+ unget_temp_gpr(_jit);
+}
+static void
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _FSW(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r0, i0);
+ str_f(_jit, jit_gpr_regno(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+static void
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(t0), i0);
+ str_d(_jit, jit_gpr_regno(t0), r0);
+ unget_temp_gpr(_jit);
+}
+static void
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(t0), r0, r1);
+ str_d(_jit, jit_gpr_regno(t0), r2);
+ unget_temp_gpr(_jit);
+}
+static void
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (simm12_p(i0))
+ em_wp(_jit, _FSD(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(t0), r0, i0);
+ str_d(_jit, jit_gpr_regno(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+
+/*
+ * Transfer operations
+ */
+static void
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _FMV_S(r0, r1));
+}
+
+static void
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _FMV_D(r0, r1));
+}
+static void
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCVT_W_S(r0, r1, 1));
+}
+static void
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCVT_W_D(r0, r1, 1));
+}
+static void
+truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCVT_L_S(r0, r1, 1));
+}
+static void
+truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCVT_L_D(r0, r1, 1));
+}
+
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+#if __WORDSIZE == 64
+ em_wp(_jit, _FCVT_S_L(r0, r1, 0));
+#elif __WORDSIZE == 32
+ em_wp(_jit, _FCVT_S_W(r0, r1, 0));
+#endif
+}
+static void
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+#if __WORDSIZE == 64
+ em_wp(_jit, _FCVT_D_L(r0, r1, 0));
+#elif __WORDSIZE == 32
+ em_wp(_jit, _FCVT_D_W(r0, r1, 0));
+#endif
+}
+
+static void
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCVT_D_S(r0, r1, 0));
+}
+static void
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCVT_S_D(r0, r1, 0));
+}
+
+static void
+movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
+{
+ union { int32_t i; jit_float32_t f; } u = { .f = i0 };
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), u.i);
+ em_wp(_jit, _FMV_W_X(r0, jit_gpr_regno(reg)));
+ unget_temp_gpr(_jit);
+}
+static void
+movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+{
+ // TODO: How to move a 64 bit value from a 32 bit X register?
+ // ATM only works on RV64
+ union { int64_t i; jit_float64_t f; } u = { .f = i0 };
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), u.i);
+ em_wp(_jit, _FMV_D_X(r0, jit_gpr_regno(reg)));
+ unget_temp_gpr(_jit);
+}
+
+
+/*
+ * Argument management
+ */
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+ movr_f(_jit, jit_fpr_regno(_FA0), r0);
+}
+
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+ movr_d(_jit, jit_fpr_regno(_FA0), r0);
+}
+
+static void
+retr_f(jit_state_t *_jit, int32_t u)
+{
+ movr_f(_jit, jit_fpr_regno(_FA0), u);
+ ret(_jit);
+}
+
+static void
+retr_d(jit_state_t *_jit, int32_t u)
+{
+ movr_d(_jit, jit_fpr_regno(_FA0), u);
+ ret(_jit);
+}
+
+
+/*
+ * Branch instructions
+ */
+
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLT_S(t0, r0, r1));
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLE_S(t0, r0, r1));
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FEQ_S(t0, r0, r1));
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bler_f(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bltr_f(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FEQ_S(t0, r0, r1));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLE_S(t0, r1, r0));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLT_S(t0, r1, r0));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FLT_S(t0, r0, r1));
+ em_wp(_jit, _FLT_S(t1, r1, r0));
+ orr(_jit, t0, t0, t1);
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLT_S(t0, r0, r1));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLE_S(t0, r0, r1));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FLT_S(t0, r1, r0));
+ em_wp(_jit, _FLT_S(t1, r0, r1));
+ orr(_jit, t0, t0, t1);
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FEQ_S(t0, r0, r0));
+ em_wp(_jit, _FEQ_S(t1, r1, r1));
+ andr(_jit, t0, t0, t1);
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FEQ_S(t0, r1, r1));
+ em_wp(_jit, _FEQ_S(t1, r0, r0));
+ andr(_jit, t0, t0, t1);
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLT_D(t0, r0, r1));
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLE_D(t0, r0, r1));
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FEQ_D(t0, r0, r1));
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bler_d(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bltr_d(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FEQ_D(t0, r0, r1));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLE_D(t0, r1, r0));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLT_D(t0, r1, r0));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FLT_D(t0, r0, r1));
+ em_wp(_jit, _FLT_D(t1, r1, r0));
+ orr(_jit, t0, t0, t1);
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLT_D(t0, r0, r1));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t tmp1 = get_temp_gpr(_jit);
+ int32_t t0 = jit_gpr_regno(tmp1);
+
+ em_wp(_jit, _FLE_D(t0, r0, r1));
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FLT_D(t0, r1, r0));
+ em_wp(_jit, _FLT_D(t1, r0, r1));
+ orr(_jit, t0, t0, t1);
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FEQ_D(t0, r0, r0));
+ em_wp(_jit, _FEQ_D(t1, r1, r1));
+ andr(_jit, t0, t0, t1);
+ jit_reloc_t ret = bner(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t t0 = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t t1 = jit_gpr_regno(get_temp_gpr(_jit));
+
+ em_wp(_jit, _FEQ_D(t0, r1, r1));
+ em_wp(_jit, _FEQ_D(t1, r0, r0));
+ andr(_jit, t0, t0, t1);
+ jit_reloc_t ret = beqr(_jit, t0, jit_gpr_regno(_ZERO));
+
+ unget_temp_gpr(_jit);
+ return ret;
+}
diff --git a/lightening/riscv.c b/lightening/riscv.c
new file mode 100644
index 000000000..808192fae
--- /dev/null
+++ b/lightening/riscv.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2012-2021 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Ekaitz Zarraga <ekaitz@elenq.tech>
+ */
+
+#include "riscv-cpu.c"
+#include "riscv-fpu.c"
+
+static const jit_gpr_t abi_gpr_args[] = {
+ _A0, _A1, _A2, _A3, _A4, _A5, _A6, _A7
+};
+static const jit_fpr_t abi_fpr_args[] = {
+ _FA0, _FA1, _FA2, _FA3, _FA4, _FA5, _FA6, _FA7
+};
+static const int abi_gpr_arg_count = sizeof(abi_gpr_args) /
sizeof(abi_gpr_args[0]);
+static const int abi_fpr_arg_count = sizeof(abi_fpr_args) /
sizeof(abi_fpr_args[0]);
+
+struct abi_arg_iterator
+{
+ const jit_operand_t *args;
+ size_t argc;
+
+ size_t arg_idx;
+ size_t gpr_idx;
+ size_t fpr_idx;
+ uint32_t vfp_used_registers;
+ size_t stack_size;
+ size_t stack_padding;
+};
+
+static size_t page_size;
+
+jit_bool_t
+jit_get_cpu(void)
+{
+ page_size = sysconf(_SC_PAGE_SIZE);
+ // FIXME check version, extensions, hardware fp support
+ //
+ // List of macro definitions for riscv support:
+ // -------------------------------------------
+ // __riscv: defined for any RISC-V target. Older versions of the GCC
+ // toolchain defined __riscv__.
+ //
+ // __riscv_xlen: 32 for RV32 and 64 for RV64.
+ //
+ // __riscv_float_abi_soft, __riscv_float_abi_single,
+ // __riscv_float_abi_double: one of these three will be defined, depending on
+ // target ABI.
+ //
+ // __riscv_cmodel_medlow, __riscv_cmodel_medany: one of these two will be
+ // defined, depending on the target code model.
+ //
+ // __riscv_mul: defined when targeting the 'M' ISA extension.
+ //
+ // __riscv_muldiv: defined when targeting the 'M' ISA extension and -mno-div
+ // has not been used.
+ //
+ // __riscv_div: defined when targeting the 'M' ISA extension and -mno-div has
+ // not been used.
+ //
+ // __riscv_atomic: defined when targeting the 'A' ISA extension.
+ //
+ // __riscv_flen: 32 when targeting the 'F' ISA extension (but not 'D') and 64
+ // when targeting 'FD'.
+ //
+ // __riscv_fdiv: defined when targeting the 'F' or 'D' ISA extensions and
+ // -mno-fdiv has not been used.
+ //
+ // __riscv_fsqrt: defined when targeting the 'F' or 'D' ISA extensions and
+ // -mno-fdiv has not been used.
+ //
+ // __riscv_compressed: defined when targeting the 'C' ISA extension.
+ return 1;
+}
+
+jit_bool_t
+jit_init(jit_state_t *_jit)
+{
+ return 1;
+}
+
+static size_t
+jit_initial_frame_size (void)
+{
+ return 0;
+}
+
+static void
+reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args)
+{
+ memset(iter, 0, sizeof *iter);
+ iter->argc = argc;
+ iter->args = args;
+}
+
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg)
+{
+ ASSERT(iter->arg_idx < iter->argc);
+ enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+ iter->arg_idx++;
+ if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+ *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]);
+ return;
+ }
+ if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) {
+ *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]);
+ return;
+ }
+ *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size);
+#if __WORDSIZE == 32
+ iter->stack_size += 4;
+#elif __WORDSIZE == 64
+ iter->stack_size += 8;
+#endif
+}
+
+static void
+jit_flush(void *fptr, void *tptr)
+{
+ jit_word_t f = (jit_word_t)fptr & -page_size;
+ jit_word_t t = (((jit_word_t)tptr) + page_size - 1) & -page_size;
+ __clear_cache((void *)f, (void *)t);
+}
+
+static inline size_t
+jit_stack_alignment(void)
+{
+ return 8;
+ // NOTE: See: https://github.com/riscv/riscv-gcc/issues/61
+}
+
+static void
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+}
+
+static void*
+bless_function_pointer(void *ptr)
+{
+ return ptr;
+}
+
+
+/*
+ * Veneers
+ */
+struct veneer{
+ instr_t auipc;
+ instr_t load; // `ld` in RV64 and `lw` in RV32
+ instr_t jalr;
+#if __WORDSIZE == 64
+ uint64_t address;
+#elif __WORDSIZE == 32
+ uint32_t address;
+#endif
+};
+
+static void
+emit_veneer(jit_state_t *_jit, jit_pointer_t target)
+{
+ // We need to generate something like this (RV64):
+ // ----------------------------------------------
+ // auipc t0, 0
+ // ld t0, 12(t0)
+ // jalr zero, 0(t0)
+ // ADDRESS_LITERAL
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ emit_u32(_jit, _AUIPC(jit_gpr_regno(t0), 0));
+#if __WORDSIZE == 64
+ emit_u32(_jit, _LD(jit_gpr_regno(t0), jit_gpr_regno(t0), 12));
+#elif __WORDSIZE == 32
+ emit_u32(_jit, _LW(jit_gpr_regno(t0), jit_gpr_regno(t0), 12));
+#endif
+ emit_u32(_jit, _JALR(jit_gpr_regno(_ZERO), jit_gpr_regno(t0), 0));
+#if __WORDSIZE == 64
+ emit_u64(_jit, (uint64_t) target);
+#elif __WORDSIZE == 32
+ emit_u32(_jit, (uint32_t) target);
+#endif
+ unget_temp_gpr(_jit);
+}
+
+static void
+patch_veneer(uint32_t *loc, jit_pointer_t addr)
+{
+ struct veneer *v = (struct veneer*) loc;
+#if __WORDSIZE == 64
+ v->address = (uint64_t) addr;
+#elif __WORDSIZE == 32
+ v->address = (uint32_t) addr;
+#endif
+}
+
+
+/*
+ * Conditional jumps
+ */
+static void
+patch_jcc_offset(uint32_t *loc, ptrdiff_t v)
+{
+
+ instr_t *i = (instr_t *) loc;
+ i->B.imm11 = (v >> 11) & 0x1;
+ i->B.imm4_1 = (v >> 1) & 0xf;
+ i->B.imm10_5 = (v >> 5) & 0x3f;
+ i->B.imm12 = (v >> 12) & 0x1;
+}
+static void
+patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset){
+ patch_jcc_offset(loc, offset);
+}
+
+static int32_t
+read_jcc_offset(uint32_t *loc)
+{
+ instr_t i;
+ i.w = *loc;
+
+ int32_t offset = i.B.imm12 << 31;
+ offset >>= 20;
+ offset |= (i.B.imm11 << 11);
+ offset |= (i.B.imm10_5 << 5);
+ offset |= (i.B.imm4_1 << 1);
+
+ return offset;
+}
+static int
+offset_in_jcc_range(ptrdiff_t offset, int flags)
+{
+ if(offset & 1)
+ return 0;
+ else
+ return -0x1000 <= offset && offset <= 0xFFF;
+}
+
+/*
+ * Unconditional jumps
+ */
+static int32_t read_jmp_offset(uint32_t *loc)
+{
+ instr_t i;
+ i.w = *loc;
+
+ int32_t offset = i.J.imm20 << 31;
+ offset >>= 12;
+ offset |= (i.J.imm19_12 << 12);
+ offset |= (i.J.imm11 << 11);
+ offset |= (i.J.imm10_1 << 1);
+ return offset;
+}
+static int
+offset_in_jmp_range(ptrdiff_t offset, int flags)
+{
+ if(offset & 1)
+ return 0;
+ else
+ return -0x100000 <= offset && offset <= 0xFFFFF;
+}
+
+static void
+patch_jmp_offset(uint32_t *loc, ptrdiff_t v)
+{
+ instr_t *i = (instr_t *) loc;
+ i->J.imm20 = (v >> 20) & 0x1;
+ i->J.imm19_12= (v >> 12) & 0xff;
+ i->J.imm11 = (v >> 11) & 0x1;
+ i->J.imm10_1 = (v >> 1) & 0x3ff;
+}
+
+static void
+patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset)
+{
+ patch_jmp_offset(loc, offset);
+}
+
+
+/*
+ * Jumps around the veneer
+ */
+static void
+patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc)
+{
+ patch_jmp_offset(loc, _jit->pc.ui - loc);
+}
+static uint32_t*
+jmp_without_veneer(jit_state_t *_jit)
+{
+ uint32_t *loc = _jit->pc.ui;
+ emit_u32(_jit, _JAL(jit_gpr_regno(_ZERO), 0));
+ return loc;
+}
+
+
+/*
+ * Load from pool offset
+ */
+static void
+patch_load_from_pool_offset(uint32_t *loc, int32_t v)
+{
+ load_from_pool_t *i = (load_from_pool_t *) loc;
+ int32_t hi20 = v >>12;
+ i->inst.auipc.U.imm31_12 = hi20;
+ i->inst.load.I.imm11_0 = v - (hi20<<12);
+}
+static int32_t
+read_load_from_pool_offset(uint32_t *loc)
+{
+ load_from_pool_t *i = (load_from_pool_t*) loc;
+ return i->inst.auipc.U.imm31_12 + i->inst.load.I.imm11_0;
+}
+
diff --git a/lightening/riscv.h b/lightening/riscv.h
new file mode 100644
index 000000000..653d74bf9
--- /dev/null
+++ b/lightening/riscv.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2012-2021 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Ekaitz Zarraga <ekaitz@elenq.tech>
+ */
+
+#ifndef _jit_riscv_h
+#define _jit_riscv_h
+
+#define JIT_NEEDS_LITERAL_POOL 1
+
+// x registers
+// Special registers
+#define _RA JIT_GPR(1) // Return address
+#define _SP JIT_GPR(2) // Stack pointer
+#define _GP JIT_GPR(3) // Global pointer
+#define _TP JIT_GPR(4) // Thread pointer
+#define _FP JIT_GPR(8) // Frame pointer
+#define _ZERO JIT_GPR(0) // Always zero
+// Argument passing
+#define _A0 JIT_GPR(10)
+#define _A1 JIT_GPR(11)
+#define _A2 JIT_GPR(12)
+#define _A3 JIT_GPR(13)
+#define _A4 JIT_GPR(14)
+#define _A5 JIT_GPR(15)
+#define _A6 JIT_GPR(16)
+#define _A7 JIT_GPR(17)
+// Saved registers
+#define _S0 _FP // S0 is the frame pointer normally
+#define _S1 JIT_GPR(9)
+#define _S2 JIT_GPR(18)
+#define _S3 JIT_GPR(19)
+#define _S4 JIT_GPR(20)
+#define _S5 JIT_GPR(21)
+#define _S6 JIT_GPR(22)
+#define _S7 JIT_GPR(23)
+#define _S8 JIT_GPR(24)
+#define _S9 JIT_GPR(25)
+#define _S10 JIT_GPR(26)
+#define _S11 JIT_GPR(27)
+// Temporaries
+#define _T0 JIT_GPR(5)
+#define _T1 JIT_GPR(6)
+#define _T2 JIT_GPR(7)
+#define _T3 JIT_GPR(28)
+#define _T4 JIT_GPR(29)
+#define _T5 JIT_GPR(30)
+#define _T6 JIT_GPR(31)
+
+// f registers
+// Termporaries
+#define _FT0 JIT_FPR(0)
+#define _FT1 JIT_FPR(1)
+#define _FT2 JIT_FPR(2)
+#define _FT3 JIT_FPR(3)
+#define _FT4 JIT_FPR(4)
+#define _FT5 JIT_FPR(5)
+#define _FT6 JIT_FPR(6)
+#define _FT7 JIT_FPR(7)
+#define _FT8 JIT_FPR(28)
+#define _FT9 JIT_FPR(29)
+#define _FT10 JIT_FPR(30)
+#define _FT11 JIT_FPR(31)
+// Saved registers
+#define _FS0 JIT_FPR(8)
+#define _FS1 JIT_FPR(9)
+#define _FS2 JIT_FPR(18)
+#define _FS3 JIT_FPR(19)
+#define _FS4 JIT_FPR(20)
+#define _FS5 JIT_FPR(21)
+#define _FS6 JIT_FPR(22)
+#define _FS7 JIT_FPR(23)
+#define _FS8 JIT_FPR(24)
+#define _FS9 JIT_FPR(25)
+#define _FS10 JIT_FPR(26)
+#define _FS11 JIT_FPR(27)
+// Argument passing
+#define _FA0 JIT_FPR(10)
+#define _FA1 JIT_FPR(11)
+#define _FA2 JIT_FPR(12)
+#define _FA3 JIT_FPR(13)
+#define _FA4 JIT_FPR(14)
+#define _FA5 JIT_FPR(15)
+#define _FA6 JIT_FPR(16)
+#define _FA7 JIT_FPR(17)
+
+
+// JIT Registers
+// ----------------------------------------------------------------------
+// Caller-save registers JIT_R${NUM}
+// Callee-save registers JIT_V${NUM}
+// Caller-save temporary registers JIT_TMP${NUM}
+// Caller-save floating point registers JIT_F${NUM}
+// Callee-save floating point registers JIT_VF${NUM}
+// Caller-save floating point temporary registers JIT_FTMP${NUM}
+
+// Caller-save registers
+#define JIT_R0 _A0
+#define JIT_R1 _A1
+#define JIT_R2 _A2
+#define JIT_R3 _A3
+#define JIT_R4 _A4
+#define JIT_R5 _A5
+#define JIT_R6 _A6
+#define JIT_R7 _A7
+
+// Use this as a CARRY
+#define JIT_CARRY _T0
+#define JIT_TMP0 _T1
+#define JIT_TMP1 _T2
+#define JIT_TMP2 _T3
+
+#define JIT_TMP3 _T4
+// Temporaries
+#define JIT_TMP4 _T5
+#define JIT_TMP5 _T6
+
+// Callee-save registers
+#define JIT_V0 _S1
+#define JIT_V1 _S2
+#define JIT_V2 _S3
+#define JIT_V3 _S4
+#define JIT_V4 _S5
+#define JIT_V5 _S6
+#define JIT_V6 _S7
+#define JIT_V7 _S8
+#define JIT_V8 _S9
+#define JIT_V9 _S10
+#define JIT_V10 _S11
+
+
+// Callee-save floating point registers
+#define JIT_VF0 _FS0
+#define JIT_VF1 _FS1
+#define JIT_VF2 _FS2
+#define JIT_VF3 _FS3
+#define JIT_VF4 _FS4
+#define JIT_VF5 _FS5
+#define JIT_VF6 _FS6
+#define JIT_VF7 _FS7
+#define JIT_VF8 _FS8
+#define JIT_VF9 _FS9
+#define JIT_VF10 _FS10
+#define JIT_VF11 _FS11
+
+// Caller save floating point registers
+#define JIT_F0 _FA0
+#define JIT_F1 _FA1
+#define JIT_F2 _FA2
+#define JIT_F3 _FA3
+#define JIT_F4 _FA4
+#define JIT_F5 _FA5
+#define JIT_F6 _FA6
+#define JIT_F7 _FA7
+// NOTE: These are temporaries, but we can use them as general purpose
+// registers as there's only one temporary JIT_FTMP supported by lightening.c
+#define JIT_F8 _FT0
+#define JIT_F9 _FT1
+#define JIT_F10 _FT2
+#define JIT_F11 _FT3
+#define JIT_F12 _FT4
+#define JIT_F13 _FT5
+#define JIT_F14 _FT6
+#define JIT_F15 _FT7
+#define JIT_F16 _FT8
+#define JIT_F17 _FT9
+#define JIT_F18 _FT10
+
+// Floating point temporary register
+#define JIT_FTMP _FT11
+
+// Special purpose registers
+#define JIT_FP _FP
+#define JIT_LR _RA
+#define JIT_SP _SP
+
+// TODO: Make sure this is correct
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS JIT_LR
+
+#endif
- [Guile-commits] branch wip-lightening-riscv created (now a88ebcc1e), Ludovic Courtès, 2023/01/31
- [Guile-commits] 01/07: Makefile: RISCV support and optional vars, Ludovic Courtès, 2023/01/31
- [Guile-commits] 07/07: Merge remote-tracking branch 'ekaitz/main' into wip-lightening-riscv, Ludovic Courtès, 2023/01/31
- [Guile-commits] 06/07: Fix CI, Ludovic Courtès, 2023/01/31
- [Guile-commits] 04/07: RISC-V Support,
Ludovic Courtès <=
- [Guile-commits] 02/07: Add tags to gitignore, Ludovic Courtès, 2023/01/31
- [Guile-commits] 03/07: Add editorconfig, Ludovic Courtès, 2023/01/31
- [Guile-commits] 05/07: Add RISCV to CI and makefile, Ludovic Courtès, 2023/01/31