Re: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation

From:	Michael Clark
Subject:	Re: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation
Date:	Sun, 26 Jan 2025 10:46:21 +1300
User-agent:	Mozilla Thunderbird

to save time on reviews. I had a look over the patch in light
of QEMU conventions I neglected due to it being portable code.

this is what I have found so far:

- use g_malloc/g_malloc0/g_free instead of malloc/calloc/free.
- remove duplicate unused definition of ctz/clz/popcnt.
- remove some magic constants and replace with math.
- remove some unnecessary casts.

the only thing that forks the code is the allocator conversion
as most of the code is quite functional/combinatorial. I could
get away with layering a patch on top of the upstream code.

I noticed QEMU doesn't use alloca much. I prefer it for small
temporary function scope allocations because it is super cheap.

I was thinking. it would be nice to pipe the bytes to another
thread. that way the disassembler won't trash the cache and it
might run a little faster. because otherwise it will dominate
L1 cache and compete with the translator for cache. one could
use something like this pipe, with the addition of a sleeping
mutex. I am using it in a kernel with spinlocks but it would
not be too hard to add a sleeping mutex on qfull. it would be
faster than sockets because it's just a couple of atomic compare
swap and memcpy in the case that the queue is not full. one
could copy a whole translation block at a time. that limits
the cache footprint to stuff already in cache.

- https://github.com/michaeljclark/cpipe/

I will spin a v2 in a few weeks time. with this sort of thing:

diff --git a/disas/x86-core.c b/disas/x86-core.c
index d2704bf839f4..8a08f9bff4b3 100644
--- a/disas/x86-core.c
+++ b/disas/x86-core.c
@@ -27,6 +27,7 @@

 #include "disas/x86.h"
 #include "disas/x86-tables.inc"
+#include "qemu/osdep.h"

 #define array_size(arr) (sizeof(arr) / sizeof(arr[0]))

@@ -787,7 +788,7 @@ static int x86_opc_data_compare_alpha(const void*p1, const void *p2)


 static x86_table_idx x86_opc_table_index(size_t n)
 {
-    x86_table_idx tab = { n, malloc(sizeof(size_t) * n) };
+    x86_table_idx tab = { n, g_malloc(sizeof(size_t) * n) };
     for (size_t i = 0; i < tab.count; i++) tab.idx[i] = i;
     return tab;
 }

@@ -819,7 +820,7 @@ x86_table_idx x86_opc_table_filter(x86_table_idxtab, uint modes)

         const x86_opc_data *d = x86_opc_table + tab.idx[i];
         if (d->mode & modes) count++;
     }
-    x86_table_idx newtab = { count, malloc(sizeof(size_t) * count) };
+    x86_table_idx newtab = { count, g_malloc(sizeof(size_t) * count) };
     count = 0;
     for (size_t i = 0; i < tab.count; i++) {
         const x86_opc_data *d = x86_opc_table + tab.idx[i];

@@ -1079,24 +1080,25 @@ static void x86_build_accel_table(x86_acc_idx*idx, x86_acc_entry *acc)


 static x86_acc_idx *x86_table_build(uint modes)
 {
-    x86_acc_idx *idx = calloc(1, sizeof(x86_acc_idx));
+    x86_acc_idx *idx = g_malloc0(sizeof(x86_acc_idx));
     x86_table_idx tab = x86_opc_table_sorted(x86_opc_table_filter(
         x86_opc_table_identity(), modes), x86_sort_numeric);
-    ullong *modfun = (ullong *)calloc(2048, sizeof(ullong));
-    ullong *modmod = (ullong *)calloc(2048, sizeof(ullong));
+    size_t bmap_size = /*t,p,m*/ 512 * /*opcode*/ 256 >> /*bits*/ 3;
+    ullong *modfun = g_malloc0(bmap_size);
+    ullong *modmod = g_malloc0(bmap_size);
     x86_build_prefix_clashes(idx, tab, modfun, modmod);

x86_build_prefix_table(idx, tab, NULL, &idx->map_count, modfun,modmod);

-    idx->map = calloc(idx->map_count, sizeof(x86_opc_data));
+    idx->map = g_malloc0(idx->map_count * sizeof(x86_opc_data));
     x86_build_prefix_table(idx, tab, idx->map, NULL, modfun, modmod);
     qsort(idx->map, idx->map_count, sizeof(x86_opc_data),
         x86_opc_data_compare_build);
-    idx->page_offsets = calloc(512, sizeof(uchar));
+    idx->page_offsets = g_malloc0(/*t,p,m*/ 512);
     idx->acc_count = x86_build_accel_offsets(idx);
-    idx->acc = calloc(sizeof(x86_acc_entry), idx->acc_count);
+    idx->acc = g_malloc0(idx->acc_count * sizeof(x86_acc_entry));
     x86_build_accel_table(idx, idx->acc);
-    free(tab.idx);
-    free(modfun);
-    free(modmod);
+    g_free(tab.idx);
+    g_free(modfun);
+    g_free(modmod);
     return idx;
 }

@@ -2674,7 +2676,7 @@ err:

 x86_ctx *x86_ctx_create(uint mode)
 {
-    x86_ctx *ctx = malloc(sizeof(x86_ctx));
+    x86_ctx *ctx = g_malloc(sizeof(x86_ctx));
     ctx->mode = mode;
     ctx->idx = x86_table_build(mode);
     return ctx;
@@ -2682,8 +2684,8 @@ x86_ctx *x86_ctx_create(uint mode)

 void x86_ctx_destroy(x86_ctx *ctx)
 {
-    free(ctx->idx->page_offsets);
-    free(ctx->idx->map);
-    free(ctx->idx->acc);
-    free(ctx);
+    g_free(ctx->idx->page_offsets);
+    g_free(ctx->idx->map);
+    g_free(ctx->idx->acc);
+    g_free(ctx);
 }
diff --git a/disas/x86.h b/disas/x86.h
index 3e745c2438fa..caf04db31b56 100644
--- a/disas/x86.h
+++ b/disas/x86.h
@@ -42,102 +42,6 @@ typedef ushort u16;
 typedef uint u32;
 typedef ullong u64;

-static inline uint clz_u32(uint val) {
-    return val == 0 ? 32 : __builtin_clz(val);
-}
-static inline uint clz_u64(ullong val) {
-    return val == 0 ? 64 : __builtin_clzll(val);
-}
-static inline uint ctz_u32(uint val) {
-    return val == 0 ? 32 : __builtin_ctz(val);
-}
-static inline uint ctz_u64(ullong val) {
-    return val == 0 ? 64 : __builtin_ctzll(val);
-}
-static inline uint popcnt_u32(uint val) {
-    return __builtin_popcount(val);
-}
-static inline uint popcnt_u64(ullong val) {
-    return __builtin_popcountll(val);
-}
-static inline uint ispow2_u32(uint v) {
-    return v && !(v & (v - 1));
-}
-static inline uint ispow2_u64(ullong v) {
-    return v && !(v & (v - 1));
-}
-
-#define clz_ulong clz_u64
-#define ctz_ulong ctz_u64
-#define popcnt_ulong popcnt_u64
-#define ispow2_ulong ispow2_u64
-
-/* C11 generics for clz, ctz, popcnt, ispow2 */
-#define clz(X) _Generic((X), \
-uint : clz_u32, int : clz_u32, \
-ulong : clz_ulong, long : clz_ulong, \
-ullong : clz_u64, llong : clz_u64)(X)
-#define ctz(X) _Generic((X), \
-uint : ctz_u32, int : ctz_u32, \
-ulong : ctz_ulong, long : ctz_ulong, \
-ullong : ctz_u64, llong : ctz_u64)(X)
-#define popcnt(X) _Generic((X), \
-uint : popcnt_u32, int : popcnt_u32, \
-ulong : popcnt_ulong, long : popcnt_ulong, \
-ullong : popcnt_u64, llong : popcnt_u64)(X)
-#define ispow2(X) _Generic((X), \
-uint : ispow2_u32, int : ispow2_u32, \
-ulong : ispow2_ulong, long : ispow2_ulong, \
-ullong : ispow2_u64, llong : ispow2_u64)(X)
-
-static inline uint rupgtpow2_u32(uint x) {
-    return 1ull << (32 - clz(x - 1));
-}
-static inline uint rupgepow2_u32(uint x) {
-    return 1ull << (32 - clz(x));
-}
-static inline uint rdnlepow2_u32(uint x) {
-    return 1ull << (31 - clz(x - 1));
-}
-static inline uint rdnltpow2_u32(uint x) {
-    return 1ull << (31 - clz(x));
-}
-static inline ullong rupgtpow2_u64(ullong x) {
-    return 1ull << (64 - clz(x - 1));
-}
-static inline ullong rupgepow2_u64(ullong x) {
-    return 1ull << (64 - clz(x));
-}
-static inline ullong rdnlepow2_u64(ullong x) {
-    return 1ull << (63 - clz(x - 1));
-}
-static inline ullong rdnltpow2_u64(ullong x) {
-    return 1ull << (63 - clz(x));
-}
-
-#define rupgtpow2_ulong rupgtpow2_u64
-#define rupgepow2_ulong rupgepow2_u64
-#define rdnlepow2_ulong rdnlepow2_u64
-#define rdnltpow2_ulong rdnltpow2_u64
-
-/* C11 generics for roundpow2 */
-#define rupgtpow2(X) _Generic((X), \
-uint : rupgtpow2_u32, int : rupgtpow2_u32, \
-ulong : rupgtpow2_ulong, long : rupgtpow2_ulong, \
-ullong : rupgtpow2_u64, llong : rupgtpow2_u64)(X)
-#define rupgepow2(X) _Generic((X), \
-uint : rupgepow2_u32, int : rupgepow2_u32, \
-ulong : rupgepow2_ulong, long : rupgepow2_ulong, \
-ullong : rupgepow2_u64, llong : rupgepow2_u64)(X)
-#define rdnlepow2(X) _Generic((X), \
-uint : rdnlepow2_u32, int : rdnlepow2_u32, \
-ulong : rdnlepow2_ulong, long : rdnlepow2_ulong, \
-ullong : rdnlepow2_u64, llong : rdnlepow2_u64)(X)
-#define rdnltpow2(X) _Generic((X), \
-uint : rdnltpow2_u32, int : rdnltpow2_u32, \
-ulong : rdnltpow2_ulong, long : rdnltpow2_ulong, \
-ullong : rdnltpow2_u64, llong : rdnltpow2_u64)(X)
-
 /* endian helpers */
 static inline u16 be16(u16 v)
 {

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v1 0/4] x86-disas: port x86-mini disassembler to QEMU, Michael Clark, 2025/01/23
- [PATCH v1 3/4] x86-disas: add x86-mini metadata tablegen script, Michael Clark, 2025/01/23
- [PATCH v1 1/4] x86-disas: add x86-mini instruction set metadata, Michael Clark, 2025/01/23
  - Re: [PATCH v1 1/4] x86-disas: add x86-mini instruction set metadata, Michael Clark, 2025/01/24
- [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation, Michael Clark, 2025/01/23
  - Re: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation, Michael Clark <=
  - Re: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation, Michael Clark, 2025/01/26
- [PATCH v1 2/4] x86-disas: add x86-mini metadata documentation, Michael Clark, 2025/01/23

Prev by Date: Re: [PATCH v5 3/4] audio: Add functions to initialize buffers
Next by Date: Re: [PULL 15/32] tests/functional/aarch64: add tests for FEAT_RME
Previous by thread: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation
Next by thread: Re: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation
Index(es):
- Date
- Thread