[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 02/16] target/arm: Create gen_gvec_{u,s}{rshr,rsra}
From: |
Peter Maydell |
Subject: |
Re: [PATCH v3 02/16] target/arm: Create gen_gvec_{u,s}{rshr,rsra} |
Date: |
Tue, 12 May 2020 14:09:20 +0100 |
On Fri, 8 May 2020 at 16:22, Richard Henderson
<address@hidden> wrote:
>
> Create vectorized versions of handle_shri_with_rndacc
> for shift+round and shift+round+accumulate. Add out-of-line
> helpers in preparation for longer vector lengths from SVE.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> target/arm/helper.h | 20 ++
> target/arm/translate.h | 9 +
> target/arm/translate-a64.c | 11 +-
> target/arm/translate.c | 461 +++++++++++++++++++++++++++++++++++--
> target/arm/vec_helper.c | 50 ++++
> 5 files changed, 525 insertions(+), 26 deletions(-)
> +void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
> + int64_t shift, uint32_t opr_sz, uint32_t max_sz)
> +{
> + static const TCGOpcode vecop_list[] = {
> + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
> + };
> + static const GVecGen2i ops[4] = {
> + { .fni8 = gen_srshr8_i64,
> + .fniv = gen_srshr_vec,
> + .fno = gen_helper_gvec_srshr_b,
> + .opt_opc = vecop_list,
> + .vece = MO_8 },
> + { .fni8 = gen_srshr16_i64,
> + .fniv = gen_srshr_vec,
> + .fno = gen_helper_gvec_srshr_h,
> + .opt_opc = vecop_list,
> + .vece = MO_16 },
> + { .fni4 = gen_srshr32_i32,
> + .fniv = gen_srshr_vec,
> + .fno = gen_helper_gvec_srshr_s,
> + .opt_opc = vecop_list,
> + .vece = MO_32 },
> + { .fni8 = gen_srshr64_i64,
> + .fniv = gen_srshr_vec,
> + .fno = gen_helper_gvec_srshr_d,
> + .prefer_i64 = TCG_TARGET_REG_BITS == 64,
> + .opt_opc = vecop_list,
> + .vece = MO_64 },
> + };
> +
> + /* tszimm encoding produces immediates in the range [1..esize] */
> + tcg_debug_assert(shift > 0);
This assert can be triggered:
#4 0x00000000004ee086 in gen_gvec_ursra (vece=3, rd_ofs=3112,
rm_ofs=3296, shift=-6, opr_sz=8,
max_sz=8) at
/home/petmay01/linaro/qemu-from-laptop/qemu/target/arm/translate.c:4413
#5 0x00000000004f17ad in disas_neon_data_insn (s=0x7fffffffe000,
insn=4089066426)
at /home/petmay01/linaro/qemu-from-laptop/qemu/target/arm/translate.c:5702
#6 0x0000000000510dfa in disas_arm_insn (s=0x7fffffffe000, insn=4089066426)
at /home/petmay01/linaro/qemu-from-laptop/qemu/target/arm/translate.c:10803
#7 0x0000000000511b6b in arm_tr_translate_insn
(dcbase=0x7fffffffe000, cpu=0xd04ea0)
at /home/petmay01/linaro/qemu-from-laptop/qemu/target/arm/translate.c:11277
#8 0x000000000045a833 in translator_loop (ops=0xc2ad40
<arm_translator_ops>, db=0x7fffffffe000,
cpu=0xd04ea0, tb=0x7fffe80e3540 <code_gen_buffer+931091>, max_insns=512)
at /home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/translator.c:102
#9 0x0000000000512303 in gen_intermediate_code (cpu=0xd04ea0,
tb=0x7fffe80e3540 <code_gen_buffer+931091>, max_insns=512)
at /home/petmay01/linaro/qemu-from-laptop/qemu/target/arm/translate.c:11568
#10 0x0000000000458c01 in tb_gen_code (cpu=0xd04ea0, pc=4268188000,
cs_base=0, flags=1196032,
cflags=-16777216) at
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/translate-all.c:1718
#11 0x0000000000455f73 in tb_find (cpu=0xd04ea0, last_tb=0x0,
tb_exit=0, cf_mask=0)
at /home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cpu-exec.c:407
#12 0x00000000004566d3 in cpu_exec (cpu=0xd04ea0)
at /home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cpu-exec.c:731
#13 0x000000000049a380 in cpu_loop (env=0xd0d180)
at /home/petmay01/linaro/qemu-from-laptop/qemu/linux-user/arm/cpu_loop.c:219
#14 0x0000000000465637 in main (argc=5, argv=0x7fffffffeb28,
envp=0x7fffffffeb58)
at /home/petmay01/linaro/qemu-from-laptop/qemu/linux-user/main.c:872
> + tcg_debug_assert(shift <= (8 << vece));
> +
> + if (shift == (8 << vece)) {
> + /*
> + * Shifts larger than the element size are architecturally valid.
> + * Signed results in all sign bits. With rounding, this produces
> + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
> + * I.e. always zero.
> + */
> + tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
> + } else {
> + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
> + }
> +}
> @@ -5269,6 +5685,28 @@ static int disas_neon_data_insn(DisasContext *s,
> uint32_t insn)
> }
> return 0;
>
> + case 2: /* VRSHR */
> + /* Right shift comes here negative. */
> + shift = -shift;
> + if (u) {
> + gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
> + vec_size, vec_size);
> + } else {
> + gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
> + vec_size, vec_size);
> + }
> + return 0;
> +
> + case 3: /* VRSRA */
> + if (u) {
> + gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
> + vec_size, vec_size);
> + } else {
> + gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
> + vec_size, vec_size);
> + }
> + return 0;
I think the VRSRA case needs the same "shift = -shift" as VRSHR.
thanks
-- PMM
- [PATCH v3 00/16] target/arm: partial vector cleanup, Richard Henderson, 2020/05/08
- [PATCH v3 01/16] target/arm: Create gen_gvec_[us]sra, Richard Henderson, 2020/05/08
- [PATCH v3 02/16] target/arm: Create gen_gvec_{u,s}{rshr,rsra}, Richard Henderson, 2020/05/08
- Re: [PATCH v3 02/16] target/arm: Create gen_gvec_{u,s}{rshr,rsra},
Peter Maydell <=
- Re: [PATCH v3 02/16] target/arm: Create gen_gvec_{u,s}{rshr,rsra}, Peter Maydell, 2020/05/12
- [PATCH v3 03/16] target/arm: Create gen_gvec_{sri,sli}, Richard Henderson, 2020/05/08
- [PATCH v3 04/16] target/arm: Remove unnecessary range check for VSHL, Richard Henderson, 2020/05/08
- [PATCH v3 05/16] target/arm: Tidy handle_vec_simd_shri, Richard Henderson, 2020/05/08