qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 27/27] accel/tcg: Precompute curr_cflags into cpu->tcg_cflags


From: Alex Bennée
Subject: Re: [PATCH 27/27] accel/tcg: Precompute curr_cflags into cpu->tcg_cflags
Date: Fri, 05 Mar 2021 17:12:21 +0000
User-agent: mu4e 1.5.8; emacs 28.0.50

Richard Henderson <richard.henderson@linaro.org> writes:

> The primary motivation is to remove a dozen insns along
> the fast-path in tb_lookup.  As a byproduct, this allows
> us to completely remove parallel_cpus.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  accel/tcg/tcg-accel-ops.h       |  1 +
>  include/exec/exec-all.h         |  7 +------
>  include/hw/core/cpu.h           |  2 ++
>  accel/tcg/cpu-exec.c            |  3 ---
>  accel/tcg/tcg-accel-ops-mttcg.c |  3 +--
>  accel/tcg/tcg-accel-ops-rr.c    |  2 +-
>  accel/tcg/tcg-accel-ops.c       |  8 ++++++++
>  accel/tcg/translate-all.c       |  4 ----
>  linux-user/main.c               |  1 +
>  linux-user/sh4/signal.c         |  8 +++++---
>  linux-user/syscall.c            | 18 ++++++++++--------
>  11 files changed, 30 insertions(+), 27 deletions(-)
>
> diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
> index 48130006de..6a5fcef889 100644
> --- a/accel/tcg/tcg-accel-ops.h
> +++ b/accel/tcg/tcg-accel-ops.h
> @@ -17,5 +17,6 @@
>  void tcg_cpus_destroy(CPUState *cpu);
>  int tcg_cpus_exec(CPUState *cpu);
>  void tcg_handle_interrupt(CPUState *cpu, int mask);
> +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
>  
>  #endif /* TCG_CPUS_H */
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 75f8c3981a..310f474540 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -510,8 +510,6 @@ struct TranslationBlock {
>      uintptr_t jmp_dest[2];
>  };
>  
> -extern bool parallel_cpus;
> -
>  /* Hide the qatomic_read to make code a little easier on the eyes */
>  static inline uint32_t tb_cflags(const TranslationBlock *tb)
>  {
> @@ -521,10 +519,7 @@ static inline uint32_t tb_cflags(const TranslationBlock 
> *tb)
>  /* current cflags for hashing/comparison */
>  static inline uint32_t curr_cflags(CPUState *cpu)
>  {
> -    uint32_t cflags = deposit32(0, CF_CLUSTER_SHIFT, 8, cpu->cluster_index);
> -    cflags |= parallel_cpus ? CF_PARALLEL : 0;
> -    cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
> -    return cflags;
> +    return cpu->tcg_cflags;
>  }
>  
>  /* TranslationBlock invalidate API */
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index c005d3dc2d..c68bc3ba8a 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -282,6 +282,7 @@ struct qemu_work_item;
>   *   to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
>   *   be the same as the cluster-id property of the CPU object's 
> TYPE_CPU_CLUSTER
>   *   QOM parent.
> + * @tcg_cflags: Pre-computed cflags for this cpu.
>   * @nr_cores: Number of cores within this CPU package.
>   * @nr_threads: Number of threads within this CPU.
>   * @running: #true if CPU is currently running (lockless).
> @@ -412,6 +413,7 @@ struct CPUState {
>      /* TODO Move common fields from CPUArchState here. */
>      int cpu_index;
>      int cluster_index;
> +    uint32_t tcg_cflags;
>      uint32_t halted;
>      uint32_t can_do_io;
>      int32_t exception_index;
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 931da96c2b..bdfa036ac8 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -267,8 +267,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
>              mmap_unlock();
>          }
>  
> -        /* Since we got here, we know that parallel_cpus must be true.  */
> -        parallel_cpus = false;
>          cpu_exec_enter(cpu);
>          /* execute the generated code */
>          trace_exec_tb(tb, pc);
> @@ -296,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
>       * the execution.
>       */
>      g_assert(cpu_in_exclusive_context(cpu));
> -    parallel_cpus = true;
>      cpu->running = false;
>      end_exclusive();

I don't see where we generate non-parallel aware code. Do we not care
about it anymore? Anyway just an observation:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

-- 
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]