lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Add bswapr_us, bswapr_ui, and bswapr_ul API functions


From: Paul Cercueil
Subject: [PATCH] Add bswapr_us, bswapr_ui, and bswapr_ul API functions
Date: Wed, 25 May 2022 19:18:20 +0100

The difference between bswapr and htonr/ntohr, is that the operation
performed by bswap does not depend on the endianness of the host CPU.

On little-endian however, the new bswapr functions are functionally
equal to the htonr/ntohr ones. The implementations of the latter ones
have then be removed, and will resolve to the corresponding bswapr
function on little-endian, or to movr/extr functions on big-endian.

Optional generic bswapr implementations are provided in lightning.c,
which can be used (or not) by the arch cpu.c files when there's nothing
better to do.

The bswapr implementations are generally following the previous htonr
implementations on little-endian systems, with the exception of the
IA-64 architecture, where the htonr implementation seemed sub-par, and
a better (yet untested) implementation has been added.

This change has been tested on MIPS, x86 and PowerPC.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
---
 check/.gitignore       |   1 +
 check/Makefile.am      |  13 ++--
 check/bswap.ok         |   1 +
 check/bswap.tst        | 154 +++++++++++++++++++++++++++++++++++++++++
 check/lightning.c      |  15 ++++
 doc/body.texi          |   8 +++
 include/lightning.h.in |  12 ++++
 lib/jit_aarch64-cpu.c  |  26 +++----
 lib/jit_aarch64-sz.c   |   3 +
 lib/jit_aarch64.c      |   3 +
 lib/jit_alpha-cpu.c    |  24 +++----
 lib/jit_alpha-sz.c     |   3 +
 lib/jit_alpha.c        |   3 +
 lib/jit_arm-cpu.c      |  28 ++------
 lib/jit_arm-sz.c       |   6 ++
 lib/jit_arm.c          |   2 +
 lib/jit_hppa-cpu.c     |   8 +--
 lib/jit_hppa-sz.c      |   3 +
 lib/jit_hppa.c         |   2 +
 lib/jit_ia64-cpu.c     |  72 +++++--------------
 lib/jit_ia64-sz.c      |  11 +--
 lib/jit_ia64.c         |   3 +
 lib/jit_mips-cpu.c     |  78 +++------------------
 lib/jit_mips-sz.c      |  13 +++-
 lib/jit_mips.c         |   5 ++
 lib/jit_names.c        |   2 +
 lib/jit_ppc-cpu.c      |  54 ++-------------
 lib/jit_ppc-sz.c       |  12 ++++
 lib/jit_ppc.c          |   5 ++
 lib/jit_riscv-cpu.c    |  62 +----------------
 lib/jit_riscv-sz.c     |   3 +
 lib/jit_riscv.c        |   3 +
 lib/jit_s390-cpu.c     |  10 +--
 lib/jit_s390-sz.c      |  10 ++-
 lib/jit_s390.c         |   5 ++
 lib/jit_sparc-cpu.c    |  10 ++-
 lib/jit_sparc-sz.c     |  10 ++-
 lib/jit_sparc.c        |   5 ++
 lib/jit_x86-cpu.c      |  18 ++---
 lib/jit_x86-sz.c       |  12 ++++
 lib/jit_x86.c          |   5 ++
 lib/lightning.c        |  70 +++++++++++++++++++
 42 files changed, 468 insertions(+), 325 deletions(-)
 create mode 100644 check/bswap.ok
 create mode 100644 check/bswap.tst

diff --git a/check/.gitignore b/check/.gitignore
index 470aeaa..a0047bb 100644
--- a/check/.gitignore
+++ b/check/.gitignore
@@ -4,6 +4,7 @@ nodata
 *.trs
 
 3to2
+bswap
 add
 align
 allocai
diff --git a/check/Makefile.am b/check/Makefile.am
index f1155d7..fc9f232 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -65,6 +65,7 @@ EXTRA_DIST =                          \
        ldstxi-c.tst    ldstxi-c.ok     \
        cvt.tst         cvt.ok          \
        hton.tst        hton.ok         \
+       bswap.tst       bswap.ok        \
        branch.tst      branch.ok       \
        alu.inc                         \
        alu_add.tst     alu_add.ok      \
@@ -117,7 +118,7 @@ base_TESTS =                                \
        ldstr ldsti                     \
        ldstxr ldstxi                   \
        ldstr-c ldstxr-c ldstxi-c       \
-       cvt hton branch                 \
+       cvt hton bswap branch           \
        alu_add alux_add                \
        alu_sub alux_sub alu_rsb        \
        alu_mul alu_div alu_rem         \
@@ -196,7 +197,7 @@ arm_TESTS =                                 \
        rpn.arm ldstr.arm ldsti.arm             \
        ldstxr.arm ldstxi.arm                   \
        ldstr-c.arm ldstxr-c.arm ldstxi-c.arm   \
-       cvt.arm hton.arm branch.arm             \
+       cvt.arm hton.arm bswap.arm branch.arm   \
        alu_add.arm alux_add.arm                \
        alu_sub.arm alux_sub.arm alu_rsb.arm    \
        alu_mul.arm alu_div.arm alu_rem.arm     \
@@ -222,7 +223,7 @@ swf_TESTS =                                 \
        rpn.swf ldstr.swf ldsti.swf             \
        ldstxr.swf ldstxi.swf                   \
        ldstr-c.swf ldstxr-c.swf ldstxi-c.swf   \
-       cvt.swf hton.swf branch.swf             \
+       cvt.swf hton.swf bswap.swf branch.swf   \
        alu_add.swf alux_add.swf                \
        alu_sub.swf alux_sub.swf alu_rsb.swf    \
        alu_mul.swf alu_div.swf alu_rem.swf     \
@@ -246,7 +247,7 @@ arm_swf_TESTS =                                             
        \
        rpn.arm.swf ldstr.arm.swf ldsti.arm.swf                 \
        ldstxr.arm.swf ldstxi.arm.swf                           \
        ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf       \
-       cvt.arm.swf hton.arm.swf branch.arm.swf                 \
+       cvt.arm.swf hton.arm.swf bswap.arm.swf branch.arm.swf   \
        alu_add.arm.swf alux_add.arm.swf                        \
        alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf        \
        alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf         \
@@ -271,8 +272,8 @@ arm4_swf_TESTS =                                            
\
        rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf              \
        ldstxr.arm4.swf ldstxi.arm4.swf                         \
        ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf    \
-       cvt.arm4.swf hton.arm4.swf branch.arm4.swf              \
-       alu_add.arm4.swf alux_add.arm4.swf                      \
+       cvt.arm4.swf hton.arm4.swf bswap.arm4.swf               \
+       branch.arm4.swf alu_add.arm4.swf alux_add.arm4.swf      \
        alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf     \
        alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf      \
        alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf       \
diff --git a/check/bswap.ok b/check/bswap.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/bswap.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/bswap.tst b/check/bswap.tst
new file mode 100644
index 0000000..f123e95
--- /dev/null
+++ b/check/bswap.tst
@@ -0,0 +1,154 @@
+.data  16
+ok:
+.c     "ok\n"
+
+#define us12_i         0x1234
+#define us7f_i         0x7ff7
+#define us80_i         0x8008
+#define usff_i         0xffff
+#define ui12_i         0x01234567
+#define ui7f_i         0x7f7ff7f7
+#define ui80_i         0x80800808
+#define uiff_i         0xffffffff
+#define ul12_i         0x0123456789abcdef
+#define ul7f_i         0x7f7f7f7ff7f7f7f7
+#define ul80_i         0x8080808008080808
+#define ulff_i         0xffffffffffffffff
+
+#if __WORDSIZE == 32
+#  define xus12_i      0xffff1234
+#  define xus7f_i      0x10107ff7
+#  define xus80_i      0x81188008
+#  define xusff_i      0xeaaeffff
+#else
+#  define xus12_i      0xffffffffffff1234
+#  define xus7f_i      0x1010100101017ff7
+#  define xus80_i      0x8181811818818008
+#  define xusff_i      0xeaeaeaaeaeaeffff
+#  define xui12_i      0xffffffff01234567
+#  define xui7f_i      0x101001017f7ff7f7
+#  define xui80_i      0x8181181880800808
+#  define xuiff_i      0xeaeaaeaeffffffff
+#endif
+
+#  define us12_o       0x3412
+#  define us7f_o       0xf77f
+#  define us80_o       0x0880
+#  define usff_o       0xffff
+#  define ui12_o       0x67452301
+#  define ui7f_o       0xf7f77f7f
+#  define ui80_o       0x08088080
+#  define uiff_o       0xffffffff
+#  define ul12_o       0xefcdab8967452301
+#  define ul7f_o       0xf7f7f7f77f7f7f7f
+#  define ul80_o       0x0808080880808080
+#  define ulff_o       0xffffffffffffffff
+
+#define BSWAP4(I, O, T, R0, R1)                                \
+       movi %R0 I                                      \
+       bswapr_##T %R1 %R0                              \
+       beqi T##R0##R1##I %R1 O                         \
+       calli @abort                                    \
+T##R0##R1##I:
+
+#define BSWAP3(T, R0, R1)                              \
+       BSWAP4(T##12_i, T##12_o, T, R0, R1)             \
+       BSWAP4(x##T##12_i, T##12_o, T, R0, R1)          \
+       BSWAP4(T##7f_i, T##7f_o, T, R0, R1)             \
+       BSWAP4(x##T##7f_i, T##7f_o, T, R0, R1)          \
+       BSWAP4(T##80_i, T##80_o, T, R0, R1)             \
+       BSWAP4(x##T##80_i, T##80_o, T, R0, R1)          \
+       BSWAP4(T##ff_i, T##ff_o, T, R0, R1)             \
+       BSWAP4(x##T##ff_i, T##ff_o, T, R0, R1)
+
+#define BSWAP3x(T, R0, R1)                             \
+       BSWAP4(T##12_i, T##12_o, T, R0, R1)             \
+       BSWAP4(T##7f_i, T##7f_o, T, R0, R1)             \
+       BSWAP4(T##80_i, T##80_o, T, R0, R1)             \
+       BSWAP4(T##ff_i, T##ff_o, T, R0, R1)
+
+#define BSWAP2(T, V0, V1, V2, R0, R1, R2)              \
+       BSWAP3(T, V0, V0)                               \
+       BSWAP3(T, V0, V1)                               \
+       BSWAP3(T, V0, V2)                               \
+       BSWAP3(T, V0, R0)                               \
+       BSWAP3(T, V0, R1)                               \
+       BSWAP3(T, V0, R2)                               \
+
+#define BSWAP2x(T, V0, V1, V2, R0, R1, R2)             \
+       BSWAP3x(T, V0, V0)                              \
+       BSWAP3x(T, V0, V1)                              \
+       BSWAP3x(T, V0, V2)                              \
+       BSWAP3x(T, V0, R0)                              \
+       BSWAP3x(T, V0, R1)                              \
+       BSWAP3x(T, V0, R2)                              \
+
+#define BSWAP1(T, V0, V1, V2, R0, R1, R2)              \
+       BSWAP2(T, V0, V1, V2, R0, R1, R2)               \
+       BSWAP2(T, V1, V2, R0, R1, R2, V0)               \
+       BSWAP2(T, V2, R0, R1, R2, V0, V1)               \
+       BSWAP2(T, R0, R1, R2, V0, V1, V2)               \
+       BSWAP2(T, R1, R2, V0, V1, V2, R0)               \
+       BSWAP2(T, R2, V0, V1, V2, R0, R1)
+
+#define BSWAP1x(T, V0, V1, V2, R0, R1, R2)             \
+       BSWAP2x(T, V0, V1, V2, R0, R1, R2)              \
+       BSWAP2x(T, V1, V2, R0, R1, R2, V0)              \
+       BSWAP2x(T, V2, R0, R1, R2, V0, V1)              \
+       BSWAP2x(T, R0, R1, R2, V0, V1, V2)              \
+       BSWAP2x(T, R1, R2, V0, V1, V2, R0)              \
+       BSWAP2x(T, R2, V0, V1, V2, R0, R1)
+
+#if __WORDSIZE == 32
+#  define BSWAP(V0, V1, V2, R0, R1, R2)                        \
+       BSWAP1(us, V0, V1, V2, R0, R1, R2)              \
+       BSWAP1x(ui, V0, V1, V2, R0, R1, R2)
+#else
+#  define BSWAP(V0, V1, V2, R0, R1, R2)                        \
+       BSWAP1(us, V0, V1, V2, R0, R1, R2)              \
+       BSWAP1(ui, V0, V1, V2, R0, R1, R2)              \
+       BSWAP1x(ul, V0, V1, V2, R0, R1, R2)
+#endif
+
+.code
+       prolog
+       /* simple sequence for easier disassembly reading and encoding check */
+       movi %r0 us12_i
+       bswapr_us %r1 %r0
+       beqi us %r1 us12_o
+       calli @abort
+us:
+
+       movi %r0 xus12_i
+       bswapr_us %r1 %r0
+       beqi xus %r1 us12_o
+       calli @abort
+xus:
+       movi %r0 ui12_i
+       bswapr_ui %r1 %r0
+       beqi ui %r1 ui12_o
+       calli @abort
+ui:
+#if __WORDSIZE == 64
+       movi %r0 xui12_i
+       bswapr_ui %r1 %r0
+       beqi xui %r1 ui12_o
+       calli @abort
+xui:
+       movi %r0 ul12_i
+       bswapr_ul %r1 %r0
+       beqi ul %r1 ul12_o
+       calli @abort
+ul:
+#endif
+
+       BSWAP(v0, v1, v2, r0, r1, r2)
+
+       // just to know did not abort
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+
+       ret
+       epilog
diff --git a/check/lightning.c b/check/lightning.c
index 9798a5b..3cf3e70 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -328,6 +328,11 @@ static void htonr_ui(void);        static void 
ntohr_ui(void);
 static void htonr_ul(void);    static void ntohr_ul(void);
 #endif
 static void htonr(void);       static void ntohr(void);
+static void bswapr_us(void);   static void bswapr_ui(void);
+#if __WORDSIZE == 64
+static void bswapr_ul(void);
+#endif
+static void bswapr(void);
 static void movnr(void);       static void movzr(void);
 static void ldr_c(void);       static void ldi_c(void);
 static void ldr_uc(void);      static void ldi_uc(void);
@@ -643,6 +648,11 @@ static instr_t               instr_vector[] = {
     entry(htonr_ul),   entry(ntohr_ul),
 #endif
     entry(htonr),      entry(ntohr),
+    entry(bswapr_us),  entry(bswapr_ui),
+#if __WORDSIZE == 64
+    entry(bswapr_ul),
+#endif
+    entry(bswapr),
     entry(movnr),      entry(movzr),
     entry(ldr_c),      entry(ldi_c),
     entry(ldr_uc),     entry(ldi_uc),
@@ -1492,6 +1502,11 @@ entry_ir_ir(htonr_ui)            entry_ir_ir(ntohr_ui)
 entry_ir_ir(htonr_ul)          entry_ir_ir(ntohr_ul)
 #endif
 entry_ir_ir(htonr)             entry_ir_ir(ntohr)
+entry_ir_ir(bswapr_us)         entry_ir_ir(bswapr_ui)
+#if __WORDSIZE == 64
+entry_ir_ir(bswapr_ul)
+#endif
+entry_ir_ir(bswapr)
 entry_ir_ir_ir(movnr)          entry_ir_ir_ir(movzr)
 entry_ir_ir(ldr_c)             entry_ir_pm(ldi_c)
 entry_ir_ir(ldr_uc)            entry_ir_pm(ldi_uc)
diff --git a/doc/body.texi b/doc/body.texi
index 48bfb9e..c174fcf 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -372,6 +372,14 @@ htonr    _us _ui _ul @r{Host-to-network (big endian) order}
 ntohr    _us _ui _ul @r{Network-to-host order }
 @end example
 
+@code{bswapr} can be used to unconditionally byte-swap an operand.
+On little-endian architectures, @code{htonr} and @code{ntohr} resolve
+to this.
+The @code{_ul} variant is only available in 64-bit architectures.
+@example
+bswapr    _us _ui _ul  01 = byte_swap(02)
+@end example
+
 @item Load operations
 @code{ld} accepts two operands while @code{ldx} accepts three;
 in both cases, the last can be either a register or an immediate
diff --git a/include/lightning.h.in b/include/lightning.h.in
index 50c6ee8..2c540cb 100644
--- a/include/lightning.h.in
+++ b/include/lightning.h.in
@@ -896,6 +896,18 @@ typedef enum {
 #define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
 #define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
 
+#define jit_bswapr_us(u,v)     jit_new_node_ww(jit_code_bswapr_us,u,v)
+    jit_code_bswapr_us,
+#define jit_bswapr_ui(u,v)     jit_new_node_ww(jit_code_bswapr_ui,u,v)
+    jit_code_bswapr_ui,
+#define jit_bswapr_ul(u,v)     jit_new_node_ww(jit_code_bswapr_ul,u,v)
+    jit_code_bswapr_ul,
+#if __WORDSIZE == 32
+#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#else
+#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
+#endif
+
     jit_code_last_code
 } jit_code_t;
 
diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c
index 53698b0..7d2a99d 100644
--- a/lib/jit_aarch64-cpu.c
+++ b/lib/jit_aarch64-cpu.c
@@ -663,17 +663,11 @@ static void 
_stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define stxr_l(r0,r1,r2)             STR(r2,r1,r0)
 #  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#    define htonr_ul(r0,r1)            REV(r0,r1)
-#  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
-#  endif
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ul(r0,r1)             REV(r0,r1)
 #  define extr_c(r0,r1)                        SXTB(r0,r1)
 #  define extr_uc(r0,r1)               UXTB(r0,r1)
 #  define extr_s(r0,r1)                        SXTH(r0,r1)
@@ -1461,21 +1455,19 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
 }
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    htonr_ul(r0, r1);
+    bswapr_ul(r0, r1);
     rshi_u(r0, r0, 48);
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    htonr_ul(r0, r1);
+    bswapr_ul(r0, r1);
     rshi_u(r0, r0, 32);
 }
-#endif
 
 static void
 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c
index 33a0410..e1f6d96 100644
--- a/lib/jit_aarch64-sz.c
+++ b/lib/jit_aarch64-sz.c
@@ -401,4 +401,7 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c
index 369408c..f0be046 100644
--- a/lib/jit_aarch64.c
+++ b/lib/jit_aarch64.c
@@ -1128,6 +1128,9 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+               case_rr(bswap, _ul);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c
index d8ca34a..2dd701d 100644
--- a/lib/jit_alpha-cpu.c
+++ b/lib/jit_alpha-cpu.c
@@ -626,18 +626,12 @@ static void 
_extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_ui(r0,r1)               _extr_ui(_jit,r0,r1)
 static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#    define htonr_ul(r0,r1)            _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
-#  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
-#  endif
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ul(r0,r1)             _bswapr_ul(_jit,r0,r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define jmpr(r0)                     JMP(_R31_REGNO,r0,0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
 static void _jmpi(jit_state_t*, jit_word_t);
@@ -2475,7 +2469,7 @@ _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 }
 
 static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                t0;
     t0 = jit_get_reg(jit_class_gpr);
@@ -2487,7 +2481,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                t0;
     jit_int32_t                t1;
@@ -2513,7 +2507,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 }
 
 static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                t0;
     jit_int32_t                t1;
diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c
index f37b748..ecfeba3 100644
--- a/lib/jit_alpha-sz.c
+++ b/lib/jit_alpha-sz.c
@@ -401,4 +401,7 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    16,        /* bswapr_us */
+    36,        /* bswapr_ui */
+    36,        /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c
index 4957f1a..d7bb3ec 100644
--- a/lib/jit_alpha.c
+++ b/lib/jit_alpha.c
@@ -1086,6 +1086,9 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+               case_rr(bswap, _ul);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index c4a550d..14ba36b 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -1095,15 +1095,10 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
 static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define stxi_i(r0,r1,i0)             _stxi_i(_jit,r0,r1,i0)
 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    define htonr(r0,r1)               movr(r0,r1)
-#  endif
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_uc(r0,r1)               _extr_uc(_jit,r0,r1)
@@ -3609,11 +3604,9 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
     }
 }
 
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    jit_int32_t                t0;
     if (jit_thumb_p()) {
        if ((r0|r1) < 8)
            T1_REV(r0, r1);
@@ -3627,20 +3620,14 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1)
            rshi_u(r0, r0, 16);
        }
        else {
-           t0 = jit_get_reg(jit_class_gpr);
-           rshi(rn(t0), r1, 8);
-           andi(r0, r1, 0xff);
-           andi(rn(t0), rn(t0), 0xff);
-           lshi(r0, r0, 8);
-           orr(r0, r0, rn(t0));
-           jit_unget_reg(t0);
+               generic_bswapr_us(_jit, r0, r1);
        }
     }
 }
 
 /* inline glibc htonl (without register clobber) */
 static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_thumb_p()) {
@@ -3662,7 +3649,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
        }
     }
 }
-#endif
 
 static void
 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c
index 008e6f9..293d306 100644
--- a/lib/jit_arm-sz.c
+++ b/lib/jit_arm-sz.c
@@ -402,6 +402,9 @@
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    8, /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 
@@ -808,5 +811,8 @@
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_arm.c b/lib/jit_arm.c
index 051f84d..0fdd1a7 100644
--- a/lib/jit_arm.c
+++ b/lib/jit_arm.c
@@ -1498,6 +1498,8 @@ _emit_code(jit_state_t *_jit)
                case_wrr(stx, _i);
                case_rr(hton, _us);
                case_rr(hton, _ui);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c
index e304ee0..6ca54f3 100644
--- a/lib/jit_hppa-cpu.c
+++ b/lib/jit_hppa-cpu.c
@@ -658,12 +658,8 @@ static void 
_movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define extr_uc(r0,r1)         EXTRWR_U(r1,31,8,r0)
 #define extr_s(r0,r1)          EXTRWR(r1,31,16,r0)
 #define extr_us(r0,r1)         EXTRWR_U(r1,31,16,r0)
-#if __BYTE_ORDER == __BIG_ENDIAN
-#  define htonr_us(r0,r1)      extr_us(r0,r1)
-#  define htonr_ui(r0,r1)      movr(r0,r1)
-#else
-#  error need htonr implementation
-#endif
+#define bswapr_us(r0,r1)       generic_bswapr_us(_jit,r0,r1)
+#define bswapr_ui(r0,r1)       generic_bswapr_ui(_jit,r0,r1)
 #define addr(r0,r1,r2)         ADD(r1,r2,r0)
 #define addi(r0,r1,i0)         _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c
index 544926f..1bfb7e6 100644
--- a/lib/jit_hppa-sz.c
+++ b/lib/jit_hppa-sz.c
@@ -401,4 +401,7 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    36,        /* bswapr_us */
+    80,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c
index 8d22ede..2668842 100644
--- a/lib/jit_hppa.c
+++ b/lib/jit_hppa.c
@@ -1054,6 +1054,8 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _us);
                case_rr(hton, _us);
                case_rr(hton, _ui);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 9953875..63bb92d 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -1311,17 +1311,11 @@ static jit_word_t 
_movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define htonr_us(r0,r1)              _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ui(r0,r1)              _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ul(r0,r1)              MUX1(r0,r1,MUX_REV)
-#else
-#  define htonr_us(r0,r1)              extr_us(r0,r1)
-#  define htonr_ui(r0,r1)              extr_ui(r0,r1)
-#  define htonr_ul(r0,r1)              movr(r0,r1)
-#endif
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ul(r0,r1)             MUX1(r0,r1,MUX_REV)
 #define extr_c(r0,r1)                  SXT1(r0,r1)
 #define extr_uc(r0,r1)                 ZXT1(r0,r1)
 #define extr_s(r0,r1)                  SXT2(r0,r1)
@@ -3505,6 +3499,20 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       bswapr_ul(r0, r1);
+       rshi_u(r0, r0, 48);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       bswapr_ul(r0, r1);
+       rshi_u(r0, r0, 32);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -3971,48 +3979,6 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     }
 }
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr);
-    rshi(rn(t0), r1, 8);
-    andi(r0, r1, 0xff);
-    andi(rn(t0), rn(t0), 0xff);
-    lshi(r0, r0, 8);
-    orr(r0, r0, rn(t0));
-    jit_unget_reg(t0);
-}
-
-static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                t0;
-    jit_int32_t                t1;
-    jit_int32_t                t2;
-    t0 = jit_get_reg(jit_class_gpr);
-    t1 = jit_get_reg(jit_class_gpr);
-    t2 = jit_get_reg(jit_class_gpr);
-    rshi(rn(t0), r1, 24);
-    rshi(rn(t1), r1, 16);
-    rshi(rn(t2), r1,  8);
-    andi(rn(t0), rn(t0), 0xff);
-    andi(rn(t1), rn(t1), 0xff);
-    andi(rn(t2), rn(t2), 0xff);
-    andi(r0, r1, 0xff);
-    lshi(r0, r0, 24);
-    lshi(rn(t1), rn(t1), 8);
-    orr(r0, r0, rn(t0));
-    lshi(rn(t2), rn(t2), 16);
-    orr(r0, r0, rn(t1));
-    orr(r0, r0, rn(t2));
-    jit_unget_reg(t2);
-    jit_unget_reg(t1);
-    jit_unget_reg(t0);
-}
-#endif
-
 static void
 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c
index 7906c7b..c81b3ea 100644
--- a/lib/jit_ia64-sz.c
+++ b/lib/jit_ia64-sz.c
@@ -68,9 +68,9 @@
     16,        /* lshr */
     16,        /* lshi */
     16,        /* rshr */
-    16,        /* rshi */
+    32,        /* rshi */
     16,        /* rshr_u */
-    16,        /* rshi_u */
+    32,        /* rshi_u */
     16,        /* negr */
     16,        /* comr */
     32,        /* ltr */
@@ -103,8 +103,8 @@
     16,        /* extr_us */
     16,        /* extr_i */
     16,        /* extr_ui */
-    64,        /* htonr_us */
-    160,       /* htonr_ui */
+    48,        /* htonr_us */
+    48,        /* htonr_ui */
     16,        /* htonr_ul */
     16,        /* ldr_c */
     32,        /* ldi_c */
@@ -401,4 +401,7 @@
     0, /* movi_d_ww */
     16,        /* movr_d_w */
     32,        /* movi_d_w */
+    48,        /* bswapr_us */
+    48,        /* bswapr_ui */
+    16,        /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c
index c388907..8b4cd00 100644
--- a/lib/jit_ia64.c
+++ b/lib/jit_ia64.c
@@ -1198,6 +1198,9 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+               case_rr(bswap, _ul);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 57b0c88..0625589 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -607,23 +607,12 @@ static void 
_stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #    define stxi_l(i0,r0,r1)           _stxi_l(_jit,i0,r0,r1)
 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  endif
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#    if __WORDSIZE == 64
-#      define htonr_ul(r0,r1)          _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
-#    endif
-#  else
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    if __WORDSIZE == 32
-#      define htonr_ui(r0,r1)          movr(r0,r1)
-#    else
-#      define htonr_ui(r0,r1)          extr_ui(r0,r1)
-#      define htonr_ul(r0,r1)          movr(r0,r1)
-#    endif
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define bswapr_ul(r0,r1)           generic_bswapr_ul(_jit,r0,r1)
 #  endif
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -1765,33 +1754,20 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
 }
 #endif
 
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    jit_int32_t                t0;
-
     if (jit_mips2_p()) {
         extr_us(r0, r1);
         WSBH(r0, r0);
     } else {
-        t0 = jit_get_reg(jit_class_gpr);
-        rshi(rn(t0), r1, 8);
-        andi(r0, r1, 0xff);
-        andi(rn(t0), rn(t0), 0xff);
-        lshi(r0, r0, 8);
-        orr(r0, r0, rn(t0));
-        jit_unget_reg(t0);
+        generic_bswapr_us(_jit, r0, r1);
     }
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    jit_int32_t                t0;
-    jit_int32_t                t1;
-    jit_int32_t                t2;
-
     if (jit_mips2_p()) {
         if (__WORDSIZE == 64) {
             SLL(r0, r1, 0);
@@ -1803,44 +1779,10 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1)
             ROTR(r0, r0, 16);
         }
     } else {
-        t0 = jit_get_reg(jit_class_gpr);
-        t1 = jit_get_reg(jit_class_gpr);
-        t2 = jit_get_reg(jit_class_gpr);
-
-        rshi(rn(t0), r1, 24);
-        rshi(rn(t1), r1, 16);
-        rshi(rn(t2), r1,  8);
-        andi(rn(t0), rn(t0), 0xff);
-        andi(rn(t1), rn(t1), 0xff);
-        andi(rn(t2), rn(t2), 0xff);
-        andi(r0, r1, 0xff);
-        lshi(r0, r0, 24);
-        lshi(rn(t1), rn(t1), 8);
-        orr(r0, r0, rn(t0));
-        lshi(rn(t2), rn(t2), 16);
-        orr(r0, r0, rn(t1));
-        orr(r0, r0, rn(t2));
-
-        jit_unget_reg(t2);
-        jit_unget_reg(t1);
-        jit_unget_reg(t0);
+        generic_bswapr_ui(_jit, r0, r1);
     }
 }
 
-static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    rshi_u(rn(reg), r1, 32);
-    htonr_ui(r0, r1);
-    htonr_ui(rn(reg), rn(reg));
-    lshi(r0, r0, 32);
-    orr(r0, r0, rn(reg));
-    jit_unget_reg(reg);
-}
-#  endif
-
 static void
 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c
index da37848..b4642fa 100644
--- a/lib/jit_mips-sz.c
+++ b/lib/jit_mips-sz.c
@@ -1,7 +1,7 @@
 
 #if __WORDSIZE == 32
 #if NEW_ABI
-#define JIT_INSTR_MAX 44
+#define JIT_INSTR_MAX 52
     0, /* data */
     0, /* live */
     0, /* align */
@@ -402,6 +402,9 @@
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* NEW_ABI */
 #endif /* __WORDSIZE */
 
@@ -808,11 +811,14 @@
     8, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* NEW_ABI */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 44
+#define JIT_INSTR_MAX 116
     0, /* data */
     0, /* live */
     4, /* align */
@@ -1213,4 +1219,7 @@
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 5ffad2b..94fe797 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1419,6 +1419,11 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _ui);
 #if __WORDSIZE == 64
                case_rr(hton, _ul);
+#endif
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+#if __WORDSIZE == 64
+               case_rr(bswap, _ul);
 #endif
                case_rr(ext, _c);
                case_rr(ext, _uc);
diff --git a/lib/jit_names.c b/lib/jit_names.c
index cc640e8..ebd3d56 100644
--- a/lib/jit_names.c
+++ b/lib/jit_names.c
@@ -228,4 +228,6 @@ static char *code_name[] = {
     "movr_f_w",                "movi_f_w",
     "movr_d_ww",       "movi_d_ww",
     "movr_d_w",                "movi_d_w",
+    "bswapr_us",
+    "bswapr_ui",               "bswapr_ul",
 };
diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c
index 0046a05..0517692 100644
--- a/lib/jit_ppc-cpu.c
+++ b/lib/jit_ppc-cpu.c
@@ -521,23 +521,11 @@ static jit_word_t 
_movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 #    define extr_i(r0,r1)              EXTSW(r0,r1)
 #    define extr_ui(r0,r1)             CLRLDI(r0,r1,32)
 #  endif
-#  if __BYTE_ORDER == __BIG_ENDIAN
-#    define htonr_us(r0,r1)            extr_us(r0,r1)
-#    if __WORDSIZE == 32
-#      define htonr_ui(r0,r1)          movr(r0,r1)
-#    else
-#      define htonr_ui(r0,r1)          extr_ui(r0,r1)
-#      define htonr_ul(r0,r1)          movr(r0,r1)
-#    endif
-#  else
-#    define htonr_us(r0,r1)            _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#    define htonr_ui(r0,r1)            _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#    if __WORDSIZE == 64
-#      define htonr_ul(r0,r1)          _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
-#    endif
+#  define bswapr_us(r0,r1)             generic_bswapr_us(_jit,r0,r1)
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define bswapr_ul(r0,r1)           generic_bswapr_ul(_jit,r0,r1)
 #  endif
 #  define addr(r0,r1,r2)               ADD(r0,r1,r2)
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
@@ -1158,22 +1146,8 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     return (word);
 }
 
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr);
-    rshi(rn(t0), r1, 8);
-    andi(r0, r1, 0xff);
-    andi(rn(t0), rn(t0), 0xff);
-    lshi(r0, r0, 8);
-    orr(r0, r0, rn(t0));
-    jit_unget_reg(t0);
-}
-
 static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
@@ -1188,22 +1162,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
     jit_unget_reg(reg);
 }
 
-#    if __WORDSIZE == 64
-static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    rshi_u(rn(reg), r1, 32);
-    htonr_ui(r0, r1);
-    htonr_ui(rn(reg), rn(reg));
-    lshi(r0, r0, 32);
-    orr(r0, r0, rn(reg));
-    jit_unget_reg(reg);
-}
-#    endif
-#  endif
-
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c
index 817af11..0be7047 100644
--- a/lib/jit_ppc-sz.c
+++ b/lib/jit_ppc-sz.c
@@ -403,6 +403,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* _CALL_SYV */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
@@ -813,6 +816,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* _CALL_AIX */
 #endif /* __BYTEORDER */
 #endif /* __powerpc__ */
@@ -1222,6 +1228,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
 #endif /* __BYTEORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
@@ -1630,6 +1639,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c
index d05d4b1..e94d1a5 100644
--- a/lib/jit_ppc.c
+++ b/lib/jit_ppc.c
@@ -1355,6 +1355,11 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _ui);
 #  if __WORDSIZE == 64
                case_rr(hton, _ul);
+#  endif
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+#  if __WORDSIZE == 64
+               case_rr(bswap, _ul);
 #  endif
                case_rr(neg,);
                case_rr(com,);
diff --git a/lib/jit_riscv-cpu.c b/lib/jit_riscv-cpu.c
index b65ca5c..9f029c0 100644
--- a/lib/jit_riscv-cpu.c
+++ b/lib/jit_riscv-cpu.c
@@ -434,12 +434,9 @@ static void 
_stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_l(i0, r0, r1)           _stxi_l(_jit, i0, r0, r1)
 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#  define htonr_us(r0, r1)             _htonr_us(_jit, r0, r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ui(r0, r1)             _htonr_ui(_jit, r0, r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ul(r0, r1)             _htonr_ul(_jit, r0, r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_us(r0, r1)            generic_bswapr_us(_jit, r0, r1)
+#  define bswapr_ui(r0, r1)            generic_bswapr_ui(_jit, r0, r1)
+#  define bswapr_ul(r0, r1)            generic_bswapr_ul(_jit, r0, r1)
 #  define extr_c(r0, r1)               _extr_c(_jit, r0, r1)
 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_uc(r0, r1)              andi(r0, r1, 0xff)
@@ -1247,59 +1244,6 @@ DEFST(s, H)
 DEFST(i, W)
 DEFST(l, D)
 
-static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr);
-    rshi(rn(t0), r1, 8);
-    andi(r0, r1, 0xff);
-    andi(rn(t0), rn(t0), 0xff);
-    lshi(r0, r0, 8);
-    orr(r0, r0, rn(t0));
-    jit_unget_reg(t0);
-}
-
-static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                t0;
-    jit_int32_t                t1;
-    jit_int32_t                t2;
-    t0 = jit_get_reg(jit_class_gpr);
-    t1 = jit_get_reg(jit_class_gpr);
-    t2 = jit_get_reg(jit_class_gpr);
-    rshi(rn(t0), r1, 24);
-    rshi(rn(t1), r1, 16);
-    rshi(rn(t2), r1,  8);
-    andi(rn(t0), rn(t0), 0xff);
-    andi(rn(t1), rn(t1), 0xff);
-    andi(rn(t2), rn(t2), 0xff);
-    andi(r0, r1, 0xff);
-    lshi(r0, r0, 24);
-    lshi(rn(t1), rn(t1), 8);
-    orr(r0, r0, rn(t0));
-    lshi(rn(t2), rn(t2), 16);
-    orr(r0, r0, rn(t1));
-    orr(r0, r0, rn(t2));
-    jit_unget_reg(t2);
-    jit_unget_reg(t1);
-    jit_unget_reg(t0);
-}
-
-static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr);
-    rshi_u(rn(t0), r1, 32);
-    htonr_ui(r0, r1);
-    htonr_ui(rn(t0), rn(t0));
-    lshi(r0, r0, 32);
-    orr(r0, r0, rn(t0));
-    jit_unget_reg(t0);
-}
-
 static void
 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
diff --git a/lib/jit_riscv-sz.c b/lib/jit_riscv-sz.c
index 8c70fcc..c8908d8 100644
--- a/lib/jit_riscv-sz.c
+++ b/lib/jit_riscv-sz.c
@@ -400,4 +400,7 @@
     0, /* movi_d_ww */
     4, /* movr_d_w */
     16,        /* movi_d_w */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_riscv.c b/lib/jit_riscv.c
index c2fcca4..1dc3c9e 100644
--- a/lib/jit_riscv.c
+++ b/lib/jit_riscv.c
@@ -1125,6 +1125,9 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+               case_rr(bswap, _ul);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c
index 4fc39a8..619ab15 100644
--- a/lib/jit_s390-cpu.c
+++ b/lib/jit_s390-cpu.c
@@ -966,6 +966,9 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define bswapr_us(r0, r1)            generic_bswapr_us(_jit, r0, r1)
+#  define bswapr_ui(r0, r1)            generic_bswapr_ui(_jit, r0, r1)
+#  define bswapr_ul(r0, r1)            generic_bswapr_ul(_jit, r0, r1)
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
@@ -1083,13 +1086,6 @@ static void 
_ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define htonr_us(r0,r1)              extr_us(r0,r1)
-#  if __WORDSIZE == 32
-#    define htonr_ui(r0,r1)            movr(r0,r1)
-#  else
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
-#  endif
 #  define extr_c(r0,r1)                        LGBR(r0,r1)
 #  define extr_uc(r0,r1)               LLGCR(r0,r1)
 #  define extr_s(r0,r1)                        LGHR(r0,r1)
diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c
index 96e6b75..bb9071d 100644
--- a/lib/jit_s390-sz.c
+++ b/lib/jit_s390-sz.c
@@ -1,6 +1,6 @@
 
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 104
+#define JIT_INSTR_MAX 128
     0, /* data */
     0, /* live */
     6, /* align */
@@ -401,10 +401,13 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    52,        /* bswapr_us */
+    128,       /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 104
+#define JIT_INSTR_MAX 344
     0, /* data */
     0, /* live */
     6, /* align */
@@ -805,4 +808,7 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    68,        /* bswapr_us */
+    160,       /* bswapr_ui */
+    344,       /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_s390.c b/lib/jit_s390.c
index aecc08a..4b89bea 100644
--- a/lib/jit_s390.c
+++ b/lib/jit_s390.c
@@ -1151,6 +1151,11 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _ui);
 #if __WORDSIZE == 64
                case_rr(hton, _ul);
+#endif
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+#if __WORDSIZE == 64
+               case_rr(bswap, _ul);
 #endif
                case_rr(ext, _c);
                case_rr(ext, _uc);
diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c
index 2ac3f5d..90c3767 100644
--- a/lib/jit_sparc-cpu.c
+++ b/lib/jit_sparc-cpu.c
@@ -545,6 +545,9 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
 static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
+#  define bswapr_us(r0, r1)            generic_bswapr_us(_jit, r0, r1)
+#  define bswapr_ui(r0, r1)            generic_bswapr_ui(_jit, r0, r1)
+#  define bswapr_ul(r0, r1)            generic_bswapr_ul(_jit, r0, r1)
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
@@ -673,7 +676,6 @@ static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, 
jit_word_t);
 #    define rshr_u(r0, r1, r2)         SRLX(r1, r2, r0)
 #    define rshi_u(r0, r1, i0)         SRLXI(r1, i0, r0)
 #  endif
-#  define htonr_us(r0,r1)              extr_us(r0,r1)
 #  define extr_c(r0,r1)                        _extr_c(_jit,r0,r1)
 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_uc(r0,r1)               andi(r0, r1, 0xff)
@@ -681,11 +683,7 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_us(r0,r1)               _extr_us(_jit,r0,r1)
 static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  if __WORDSIZE == 32
-#    define htonr_ui(r0,r1)            movr(r0,r1)
-#  else
-#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
-#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  if __WORDSIZE == 64
 #    define extr_i(r0,r1)              _extr_i(_jit,r0,r1)
 static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
 #    define extr_ui(r0,r1)             _extr_ui(_jit,r0,r1)
diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c
index 4c905bf..5ec051d 100644
--- a/lib/jit_sparc-sz.c
+++ b/lib/jit_sparc-sz.c
@@ -1,5 +1,5 @@
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 44
+#define JIT_INSTR_MAX 52
     0, /* data */
     0, /* live */
     0, /* align */
@@ -400,10 +400,13 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 64
+#define JIT_INSTR_MAX 116
     0, /* data */
     0, /* live */
     4, /* align */
@@ -804,4 +807,7 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
 #endif /* __WORDSIZE */
diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c
index 84ff48c..23d4442 100644
--- a/lib/jit_sparc.c
+++ b/lib/jit_sparc.c
@@ -1463,6 +1463,11 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _ui);
 #if __WORDSIZE == 64
                case_rr(hton, _ul);
+#endif
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+#if __WORDSIZE == 64
+               case_rr(bswap, _ul);
 #endif
                case_rr(ext, _c);
                case_rr(ext, _uc);
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 6dcf672..81534f0 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -379,13 +379,13 @@ static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
 #    define movir_u(r0, r1)            _movir_u(_jit, r0, r1)
 static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
 #  endif
-#  define htonr_us(r0, r1)             _htonr_us(_jit, r0, r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define htonr_ui(r0, r1)             _htonr_ui(_jit, r0, r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_us(r0, r1)            _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0, r1)            _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 #  if __X64 && !__X64_32
-#define htonr_ul(r0, r1)               _htonr_ul(_jit, r0, r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#define bswapr_ul(r0, r1)              _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
 #endif
 #  define extr_c(r0, r1)               _extr_c(_jit, r0, r1)
 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -2263,7 +2263,7 @@ _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 #endif
 
 static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     extr_us(r0, r1);
     ic(0x66);
@@ -2274,7 +2274,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 }
 
 static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     movr(r0, r1);
     rex(0, 0, _NOREG, _NOREG, r0);
@@ -2284,7 +2284,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 
 #if __X64 && !__X64_32
 static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     movr(r0, r1);
     rex(0, 1, _NOREG, _NOREG, r0);
diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c
index 745f110..bd4b9a0 100644
--- a/lib/jit_x86-sz.c
+++ b/lib/jit_x86-sz.c
@@ -401,6 +401,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    7, /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
 #endif
 
 #if __X64
@@ -806,6 +809,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    6, /* bswapr_ul */
 #else
 
 #  if __X64_32
@@ -1210,6 +1216,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    0, /* bswapr_ul */
 
 #  else
 #define JIT_INSTR_MAX 115
@@ -1613,6 +1622,9 @@
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    6, /* bswapr_ul */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
diff --git a/lib/jit_x86.c b/lib/jit_x86.c
index 133ee39..e3e1383 100644
--- a/lib/jit_x86.c
+++ b/lib/jit_x86.c
@@ -1698,6 +1698,11 @@ _emit_code(jit_state_t *_jit)
                case_rr(hton, _ui);
 #if __X64 && !__X64_32
                case_rr(hton, _ul);
+#endif
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+#if __X64 && !__X64_32
+               case_rr(bswap, _ul);
 #endif
                case_rr(ext, _c);
                case_rr(ext, _uc);
diff --git a/lib/lightning.c b/lib/lightning.c
index cc6887f..11f4069 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -1380,6 +1380,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_truncr_f_i:                       case 
jit_code_truncr_f_l:
        case jit_code_truncr_d_i:                       case 
jit_code_truncr_d_l:
        case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul:
+       case jit_code_bswapr_us:        case jit_code_bswapr_ui:        case 
jit_code_bswapr_ul:
        case jit_code_ldr_c:    case jit_code_ldr_uc:
        case jit_code_ldr_s:    case jit_code_ldr_us:   case jit_code_ldr_i:
        case jit_code_ldr_ui:   case jit_code_ldr_l:    case jit_code_negr_f:
@@ -3491,6 +3492,31 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, 
jit_node_t *link,
     }
 }
 
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr_us(r0,r1)              bswapr_us(r0,r1)
+#  define htonr_ui(r0,r1)              bswapr_ui(r0,r1)
+#  if __WORDSIZE == 64
+#    define htonr_ul(r0,r1)            bswapr_ul(r0,r1)
+#  endif
+#else
+#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  if __WORDSIZE == 32
+#    define htonr_ui(r0,r1)            movr(r0,r1)
+#  else
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  endif
+#endif
+
+static maybe_unused void
+generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+static maybe_unused void
+generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+#if __WORDSIZE == 64
+static maybe_unused void
+generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+#endif
+
 #if defined(__i386__) || defined(__x86_64__)
 #  include "jit_x86.c"
 #elif defined(__mips__)
@@ -3514,3 +3540,47 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, 
jit_node_t *link,
 #elif defined(__riscv)
 #  include "jit_riscv.c"
 #endif
+
+static maybe_unused void
+generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+    rshi(rn(reg), r1, 8);
+    andi(r0, r1, 0xff);
+    andi(rn(reg), rn(reg), 0xff);
+    lshi(r0, r0, 8);
+    orr(r0, r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+
+static maybe_unused void
+generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+       rshi(rn(reg), r1, 16);
+       bswapr_us(r0, r1);
+       bswapr_us(rn(reg), rn(reg));
+       lshi(r0, r0, 16);
+       orr(r0, r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+
+#if __WORDSIZE == 64
+static maybe_unused void
+generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+    rshi_u(rn(reg), r1, 32);
+    bswapr_ui(r0, r1);
+    bswapr_ui(rn(reg), rn(reg));
+    lshi(r0, r0, 32);
+    orr(r0, r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+#endif
-- 
2.35.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]