Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extens

qemu-riscv

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extens

From:	Richard Henderson
Subject:	Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc
Date:	Thu, 12 Sep 2019 11:27:20 -0400
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.8.0

On 9/11/19 2:25 AM, liuzhiwei wrote:
>  #define VECTOR_HELPER(name) HELPER(glue(vector_, name))
> +#define SIGNBIT8    (1 << 7)
> +#define SIGNBIT16   (1 << 15)
> +#define SIGNBIT32   (1 << 31)
> +#define SIGNBIT64   ((uint64_t)1 << 63)

Perhaps make up your mind if you want signed or unsigned values?  Perhaps just
use or redefine INT<N>_MIN instead?

> +static int64_t extend_gpr(target_ulong reg)
> +{
> +    return sign_extend(reg, sizeof(target_ulong) * 8);
> +}

Note wrt usage:
+                extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]);

This is equivalent to "extend_rs1 = (target_long)env->gpr[rs1]".

I don't see how this helper function is helping, really.
Also, pass gprs by value, not by index.

> +static inline int vector_get_carry(CPURISCVState *env, int width, int lmul,
> +    int index)
> +{
> +    int mlen = width / lmul;
> +    int idx = (index * mlen) / 8;
> +    int pos = (index * mlen) % 8;
> +
> +    return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1;
> +}

Any reason not to re-use vector_elem_mask?

> +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b)
> +{
> +    uint64_t hi_64, carry;
> +
> +    /* first get the whole product in {hi_64, lo_64} */
> +    uint64_t a_hi = a >> 32;
> +    uint64_t a_lo = (uint32_t)a;
> +    uint64_t b_hi = b >> 32;
> +    uint64_t b_lo = (uint32_t)b;
> +
> +    /*
> +     * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo)
> +     *               = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 +
> +     *                 (a_lo * b_hi) << 32 + a_lo * b_lo
> +     *               = {hi_64, lo_64}
> +     * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) 
> >> 64
> +     *       = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry
> +     * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) +
> +     *           (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 
> 32
> +     */
> +
> +    carry =  ((uint64_t)(uint32_t)(a_hi * b_lo) +
> +              (uint64_t)(uint32_t)(a_lo * b_hi) +
> +              ((a_lo * b_lo) >> 32)) >> 32;
> +
> +    hi_64 = a_hi * b_hi +
> +            ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) +
> +            carry;
> +
> +    return hi_64;
> +}

Use mulu64().

> +static inline int64_t s64xu64_lh(int64_t a, uint64_t b)
> +{
> +    uint64_t abs_a = a;
> +    uint64_t lo_64, hi_64;
> +
> +    if (a < 0) {
> +        abs_a =  ~a + 1;

 abs_a = -a

> +static inline int64_t s64xs64_lh(int64_t a, int64_t b)
> +{
> +    uint64_t abs_a = a, abs_b = b;
> +    uint64_t lo_64, hi_64;
> +
> +    if (a < 0) {
> +        abs_a =  ~a + 1;
> +    }
> +    if (b < 0) {
> +        abs_b = ~b + 1;
> +    }
> +    lo_64 = abs_a * abs_b;
> +    hi_64 = u64xu64_lh(abs_a, abs_b);
> +
> +    if ((a ^ b) & SIGNBIT64) {
> +        lo_64 = ~lo_64;
> +        hi_64 = ~hi_64;
> +        if (lo_64 == UINT64_MAX) {
> +            lo_64 = 0;
> +            hi_64 += 1;
> +        } else {
> +            lo_64 += 1;
> +        }
> +    }
> +    return hi_64;
> +}

Use muls64().


> +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1,
> +    uint32_t rs2, uint32_t rd)
> +{
> +    int i, j, vl;
> +    uint32_t lmul, width, src1, src2, dest, vlmax, carry;
> +
> +    vl    = env->vfp.vl;
> +    lmul  = vector_get_lmul(env);
> +    width   = vector_get_width(env);
> +    vlmax = vector_get_vlmax(env);
> +
> +    if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) {
> +        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> +        return;
> +    }
> +    vector_lmul_check_reg(env, lmul, rs1, false);
> +    vector_lmul_check_reg(env, lmul, rs2, false);
> +    vector_lmul_check_reg(env, lmul, rd, false);
> +
> +    for (i = 0; i < vlmax; i++) {
> +        src1 = rs1 + (i / (VLEN / width));
> +        src2 = rs2 + (i / (VLEN / width));
> +        dest = rd + (i / (VLEN / width));
> +        j = i % (VLEN / width);
> +        if (i < env->vfp.vstart) {
> +            continue;

Again, hoist.

> +        } else if (i < vl) {

I would think this too could be moved into the loop condition.

> +            switch (width) {
> +            case 8:
> +                carry = vector_get_carry(env, width, lmul, i);
> +                env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]
> +                    + env->vfp.vreg[src2].u8[j] + carry;
> +                break;
> +            case 16:
> +                carry = vector_get_carry(env, width, lmul, i);
> +                env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]
> +                    + env->vfp.vreg[src2].u16[j] + carry;
> +                break;
> +            case 32:
> +                carry = vector_get_carry(env, width, lmul, i);
> +                env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]
> +                    + env->vfp.vreg[src2].u32[j] + carry;
> +                break;
> +            case 64:
> +                carry = vector_get_carry(env, width, lmul, i);
> +                env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]
> +                    + env->vfp.vreg[src2].u64[j] + carry;
> +                break;
> +            default:
> +                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
> +                break;
> +            }
> +        } else {
> +            vector_tail_common(env, dest, j, width);

With this tail clearing being done as a loop of its own, which would devolve to
memset on a little-endian host.


> +        }
> +    }
> +    env->vfp.vstart = 0;
> +}
> +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1,
> +    uint32_t rs2, uint32_t rd)
> +{

Watch the spacing between functions.
Pass gpr rs1 by value.

> +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1,
> +    uint32_t rs2, uint32_t rd)
> +{
...
> +                env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5)

Pass the immediate as a sign-extended immediate to begin with, not as an
unsigned 5-bit field.

All of the rest of the helpers are about the same.

Consider creating a helper function that contains the basic outline of the
vector processing, and takes a (set of) function pointers that perform the
operation.  With optimization, compiler inlining should produce the same code
as you have here without having to replicate quite so much code for each
helper.  You can also fix a bug in the basic outline in one place instead of
hundreds.


r~

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-riscv] [PATCH v2 06/17] RISC-V: add vector extension fault-only-first implementation, (continued)
- [Qemu-riscv] [PATCH v2 06/17] RISC-V: add vector extension fault-only-first implementation, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 06/17] RISC-V: add vector extension fault-only-first implementation, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 07/17] RISC-V: add vector extension atomic instructions, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 07/17] RISC-V: add vector extension atomic instructions, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 09/17] RISC-V: add vector extension integer instructions part2, bit/shift, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 09/17] RISC-V: add vector extension integer instructions part2, bit/shift, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 10/17] RISC-V: add vector extension integer instructions part3, cmp/min/max, liuzhiwei, 2019/09/11
- [Qemu-riscv] [PATCH v2 05/17] RISC-V: add vector extension load and store instructions, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 05/17] RISC-V: add vector extension load and store instructions, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc, Richard Henderson <=
    - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 16/17] RISC-V: add vector extension mask instructions, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 16/17] RISC-V: add vector extension mask instructions, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 17/17] RISC-V: add vector extension premutation instructions, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 17/17] RISC-V: add vector extension premutation instructions, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 15/17] RISC-V: add vector extension reduction instructions, liuzhiwei, 2019/09/11
  - Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 15/17] RISC-V: add vector extension reduction instructions, Richard Henderson, 2019/09/12
- [Qemu-riscv] [PATCH v2 13/17] RISC-V: add vector extension float instruction part1, add/sub/mul/div, liuzhiwei, 2019/09/11
- [Qemu-riscv] [PATCH v2 12/17] RISC-V: add vector extension fixed point instructions, liuzhiwei, 2019/09/11
- [Qemu-riscv] [PATCH v2 14/17] RISC-V: add vector extension float instructions part2, sqrt/cmp/cvt/others, liuzhiwei, 2019/09/11

Prev by Date: Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 01/17] RISC-V: add vfp field in CPURISCVState
Next by Date: Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc
Previous by thread: [Qemu-riscv] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc
Next by thread: Re: [Qemu-riscv] [Qemu-devel] [PATCH v2 08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc
Index(es):
- Date
- Thread