Older Intel manuals (pre-2010) describe Z as undefined, but AMD and
newer Intel manuals describe Z as unchanged.
Signed-off-by: Richard Henderson <address@hidden>
---
target-i386/translate.c | 40 +++++++++++++++++++++++++++++++---------
1 file changed, 31 insertions(+), 9 deletions(-)
---
Clemens, your patch fails to fix flags computation for bts/btr/btc,
which should be done similarly to bt.
And to answer your question, no, QEMU does not make any assumptions
about undefined flags. We often set them to zero, just because that
is easier than any other setting.
r~
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 02625e3..032b0fd 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
}
bt_op:
tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
switch(op) {
case 0:
- tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
- tcg_gen_movi_tl(cpu_cc_dst, 0);
break;
case 1:
- tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
break;
case 2:
- tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
- tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
- tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
break;
default:
case 3:
- tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
tcg_gen_movi_tl(cpu_tmp0, 1);
tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
break;
}
- set_cc_op(s, CC_OP_SARB + ot);
if (op != 0) {
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
} else {
gen_op_mov_reg_v(ot, rm, cpu_T[0]);
}
+ }
+
+ /* Delay all CC updates until after the store above. Note that
+ C is the result of the test, Z is unchanged, and the others
+ are all undefined. */
+ switch (s->cc_op) {
+ case CC_OP_MULB ... CC_OP_MULQ:
+ case CC_OP_ADDB ... CC_OP_ADDQ:
+ case CC_OP_ADCB ... CC_OP_ADCQ:
+ case CC_OP_SUBB ... CC_OP_SUBQ:
+ case CC_OP_SBBB ... CC_OP_SBBQ:
+ case CC_OP_LOGICB ... CC_OP_LOGICQ:
+ case CC_OP_INCB ... CC_OP_INCQ:
+ case CC_OP_DECB ... CC_OP_DECQ:
+ case CC_OP_SHLB ... CC_OP_SHLQ:
+ case CC_OP_SARB ... CC_OP_SARQ:
+ case CC_OP_BMILGB ... CC_OP_BMILGQ:
+ /* Z was going to be computed from the non-zero status of CC_DST.
+ We can get that same Z value (and the new C value) by leaving
+ CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
+ same width. */
tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
- tcg_gen_movi_tl(cpu_cc_dst, 0);
+ set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
+ break;
+ default:
+ /* Otherwise, generate EFLAGS and replace the C bit. */
+ gen_compute_eflags(s);
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
+ ctz32(CC_C), 1);
+ break;
}
break;
case 0x1bc: /* bsf / tzcnt */