qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH for-4.0 v8 6/7] qemu_thread_create: propagate th


From: Fei Li
Subject: Re: [Qemu-devel] [PATCH for-4.0 v8 6/7] qemu_thread_create: propagate the error to callers to handle
Date: Wed, 19 Dec 2018 20:14:41 +0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1



On 12/19/2018 06:10 PM, Markus Armbruster wrote:
Fei Li <address@hidden> writes:

On 12/13/2018 03:26 PM, Markus Armbruster wrote:
There's a question for David Gibson inline.  Please search for /ppc/.

Fei Li <address@hidden> writes:

Make qemu_thread_create() return a Boolean to indicate if it succeeds
rather than failing with an error. And add an Error parameter to hold
the error message and let the callers handle it.
The "rather than failing with an error" is misleading.  Before the
patch, we report to stderr and abort().  What about:

      qemu-thread: Make qemu_thread_create() handle errors properly

      qemu_thread_create() abort()s on error.  Not nice.  Give it a
      return value and an Error ** argument, so it can return success /
      failure.
A nice commit-amend! Thanks!
Still missing from the commit message then: how you update the callers.
Yes, agree. I think the-how should also be noted here, like
- propagating the err to callers whose call trace already have the
Error paramater;
- just add an &error_abort for qemu_thread_create() and make it a
"TODO: xxx";
Let's see below.

Cc: Markus Armbruster <address@hidden>
Cc: Daniel P. Berrangé <address@hidden>
Cc: Dr. David Alan Gilbert <address@hidden>
Signed-off-by: Fei Li <address@hidden>
---
   cpus.c                      | 45 ++++++++++++++++++++++++-------------
   dump.c                      |  6 +++--
   hw/misc/edu.c               |  6 +++--
   hw/ppc/spapr_hcall.c        | 10 +++++++--
   hw/rdma/rdma_backend.c      |  4 +++-
   hw/usb/ccid-card-emulated.c | 16 ++++++++++----
   include/qemu/thread.h       |  4 ++--
   io/task.c                   |  3 ++-
   iothread.c                  | 16 +++++++++-----
   migration/migration.c       | 54 
+++++++++++++++++++++++++++++----------------
   migration/postcopy-ram.c    | 14 ++++++++++--
   migration/ram.c             | 40 ++++++++++++++++++++++++---------
   migration/savevm.c          | 11 ++++++---
   tests/atomic_add-bench.c    |  3 ++-
   tests/iothread.c            |  2 +-
   tests/qht-bench.c           |  3 ++-
   tests/rcutorture.c          |  3 ++-
   tests/test-aio.c            |  2 +-
   tests/test-rcu-list.c       |  3 ++-
   ui/vnc-jobs.c               | 17 +++++++++-----
   ui/vnc-jobs.h               |  2 +-
   ui/vnc.c                    |  4 +++-
   util/compatfd.c             | 12 ++++++++--
   util/oslib-posix.c          | 17 ++++++++++----
   util/qemu-thread-posix.c    | 24 +++++++++++++-------
   util/qemu-thread-win32.c    | 16 ++++++++++----
   util/rcu.c                  |  3 ++-
   util/thread-pool.c          |  4 +++-
   28 files changed, 243 insertions(+), 101 deletions(-)

diff --git a/cpus.c b/cpus.c
index 7b091bda53..e8450e518a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1961,15 +1961,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error 
**errp)
               snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
                    cpu->cpu_index);
   -            qemu_thread_create(cpu->thread, thread_name,
qemu_tcg_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
+            if (!qemu_thread_create(cpu->thread, thread_name,
+                                    qemu_tcg_cpu_thread_fn, cpu,
+                                    QEMU_THREAD_JOINABLE, errp)) {
+                return;
+            }
             } else {
               /* share a single thread for all cpus with TCG */
               snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
-            qemu_thread_create(cpu->thread, thread_name,
-                               qemu_tcg_rr_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
+            if (!qemu_thread_create(cpu->thread, thread_name,
+                                    qemu_tcg_rr_cpu_thread_fn, cpu,
+                                    QEMU_THREAD_JOINABLE, errp)) {
+                return;
+            }
                 single_tcg_halt_cond = cpu->halt_cond;
               single_tcg_cpu_thread = cpu->thread;
This is a caller that sets an error on failure.  You make it set an
error on qemu_thread_create() failure.  Makes sense.
Thanks for the comment!
@@ -1997,8 +2002,10 @@ static void qemu_hax_start_vcpu(CPUState *cpu, Error 
**errp)
         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
                cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
+                            cpu, QEMU_THREAD_JOINABLE, errp)) {
+        return;
+    }
   #ifdef _WIN32
       cpu->hThread = qemu_thread_get_handle(cpu->thread);
   #endif
Likewise.  I'll stop commenting on this pattern now.

@@ -2013,8 +2020,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu, Error 
**errp)
       qemu_cond_init(cpu->halt_cond);
       snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
                cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
+                            cpu, QEMU_THREAD_JOINABLE, errp)) {
+        /* keep 'if' here in case there is further error handling logic */
+    }
   }
     static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
@@ -2031,8 +2040,10 @@ static void qemu_hvf_start_vcpu(CPUState *cpu, Error 
**errp)
         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
                cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
+                            cpu, QEMU_THREAD_JOINABLE, errp)) {
+        /* keep 'if' here in case there is further error handling logic */
+    }
   }
     static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
@@ -2044,8 +2055,10 @@ static void qemu_whpx_start_vcpu(CPUState *cpu, Error 
**errp)
       qemu_cond_init(cpu->halt_cond);
       snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
                cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
+                            cpu, QEMU_THREAD_JOINABLE, errp)) {
+        return;
+    }
   #ifdef _WIN32
       cpu->hThread = qemu_thread_get_handle(cpu->thread);
   #endif
@@ -2060,8 +2073,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu, Error 
**errp)
       qemu_cond_init(cpu->halt_cond);
       snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
                cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
-                       QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn,
+                            cpu, QEMU_THREAD_JOINABLE, errp)) {
+        /* keep 'if' here in case there is further error handling logic */
+    }
   }
     bool qemu_init_vcpu(CPUState *cpu, Error **errp)
diff --git a/dump.c b/dump.c
index 4ec94c5e25..1f003aff9a 100644
--- a/dump.c
+++ b/dump.c
@@ -2020,8 +2020,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
       if (detach_p) {
           /* detached dump */
           s->detached = true;
-        qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
-                           s, QEMU_THREAD_DETACHED);
+        if (!qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
+                                s, QEMU_THREAD_DETACHED, errp)) {
+            /* keep 'if' here in case there is further error handling logic */
+        }
       } else {
           /* sync dump */
           dump_process(s, errp);
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index cdcf550dd7..6684c60a96 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -355,8 +355,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
         qemu_mutex_init(&edu->thr_mutex);
       qemu_cond_init(&edu->thr_cond);
-    qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
-                       edu, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
+                            edu, QEMU_THREAD_JOINABLE, errp)) {
+        return;
+    }
         memory_region_init_io(&edu->mmio, OBJECT(edu),
&edu_mmio_ops, edu,
                       "edu-mmio", 1 * MiB);
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index ae913d070f..7c16ade04a 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
       sPAPRPendingHPT *pending = spapr->pending_hpt;
       uint64_t current_ram_size;
       int rc;
+    Error *local_err = NULL;
         if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
           return H_AUTHORITY;
@@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
       pending->shift = shift;
       pending->ret = H_HARDWARE;
   -    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
-                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+    if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+                            hpt_prepare_thread, pending,
+                            QEMU_THREAD_DETACHED, &local_err)) {
+        error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
+        g_free(pending);
+        return H_RESOURCE;
+    }
         spapr->pending_hpt = pending;
This is a caller that returns an error code on failure.  You change it
to report the error, then return failure.  The return failure part looks
fine.  Whether reporting the error is appropriate I can't say for sure.
No other failure mode reports anything.  David, what do you think?
Just as David explains. :)
Fei Li, you could pass &error_abort to side-step this question for now.

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index d7a4bbd91f..53a2bd0d85 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -164,8 +164,10 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
       snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
                ibv_get_device_name(backend_dev->ib_dev));
       backend_dev->comp_thread.run = true;
+    /* FIXME: let the further caller handle the error instead of abort() here 
*/
       qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
-                       comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
+                       comp_handler_thread, backend_dev,
+                       QEMU_THREAD_DETACHED, &error_abort);
   }
This is a caller that can't return failure.  You pass &error_abort.  No
behavioral change.
Actually, yes..The reason why I did not do some change is that I am
not quite
sure about how to fix for the rdma device, esp. setting certain value
for the
dev->regs_data[idx] when it fails.
I recommend to split this patch.  First part adds the Error ** parameter
to qemu_thread_create(), passing &error_abort everywhere.  No functional
change.  Subsequent patches then improve on &error_abort.  This way,
each improvement patch can be cc'ed to just that part's maintainer(s).
Parts you don't want to touch you simply leave at &error_abort.  Makes
sense?
Yes, I think this makes sense, much clearer. :) But I am a little worried about
whether too many subsequent improvement patches (some of them are quite
small changes) are acceptable.
BTW, referring to the split, I think the previous "[2/7] qemu_init_vcpu: add a
new Error parameter to propagate" should be merged into the later
improvement for qemu_xxx_init_vcpu. What do you think?
I think I'd mark the spot TODO, not FIXME.  Matter of taste, I guess.
Sounds good, thanks!
   void rdma_backend_register_comp_handler(void (*handler)(int status,
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index 25976ed84f..c6783f124a 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -33,6 +33,7 @@
   #include "qemu/main-loop.h"
   #include "ccid.h"
   #include "qapi/error.h"
+#include "qemu/error-report.h"
     #define DPRINTF(card, lvl, fmt, ...) \
   do {\
@@ -544,10 +545,17 @@ static void emulated_realize(CCIDCardState *base, Error 
**errp)
           error_setg(errp, "%s: failed to initialize vcard", 
TYPE_EMULATED_CCID);
           goto out2;
       }
-    qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
-                       card, QEMU_THREAD_JOINABLE);
-    qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
-                       card, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
+                            card, QEMU_THREAD_JOINABLE, errp)) {
+        error_report("failed to create event_thread");
+        goto out2;
+    }
+    if (!qemu_thread_create(&card->apdu_thread_id, "ccid/apdu",
+                            handle_apdu_thread, card,
+                            QEMU_THREAD_JOINABLE, errp)) {
+        error_report("failed to create handle_apdu_thread");
+        goto out2;
+    }
     out2:
       clean_event_notifier(card);
error_report() in a realize() method is almost certainly wrong.
Ok, I will remove these two.
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 55d83a907c..12291f4ccd 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -152,9 +152,9 @@ void qemu_event_reset(QemuEvent *ev);
   void qemu_event_wait(QemuEvent *ev);
   void qemu_event_destroy(QemuEvent *ev);
   -void qemu_thread_create(QemuThread *thread, const char *name,
+bool qemu_thread_create(QemuThread *thread, const char *name,
                           void *(*start_routine)(void *),
-                        void *arg, int mode);
+                        void *arg, int mode, Error **errp);
   void *qemu_thread_join(QemuThread *thread);
   void qemu_thread_get_self(QemuThread *thread);
   bool qemu_thread_is_self(QemuThread *thread);
diff --git a/io/task.c b/io/task.c
index 2886a2c1bc..6d3a18ab80 100644
--- a/io/task.c
+++ b/io/task.c
@@ -149,7 +149,8 @@ void qio_task_run_in_thread(QIOTask *task,
                          "io-task-worker",
                          qio_task_thread_worker,
                          data,
-                       QEMU_THREAD_DETACHED);
+                       QEMU_THREAD_DETACHED,
+                       &error_abort);
   }
This is a caller that can't return failure.  You pass &error_abort.  No
behavioral change.  Unlike above, you don't mark this spot FIXME.  Any
particular reason for marking one, but not the other?
Emm, it is a little difficult to add a Error parameter for its callers and
the callers seem does not need the Error. Thus I think passing
&error_abort in this function instead of its further callers is more
direct. :)
The same reasons for the several below.

But just as you mentioned, maybe we should add a "TODO: xxxx" for the direct
&error_abort case in case the callers need the Error parameter in future.
Your use of &error_abort in this patch is fine simply because it's no
worse than before.  I'm merely probing your use of FIXME / TODO.

Adding a FIXME is appropriate when you're convinced the code is actually
broken.

Adding a TODO is appropriate when you believe the code should be
improved.

Both are almost always worth mentioning in the commit message.

If you don't really know, and you're not really changing how the code
behaves, then it's better not to add either kind of comment.
Ok, for such cases, I will not add either comment.
Thanks for the detail explanation!

I'll stop commenting on this pattern now.

diff --git a/iothread.c b/iothread.c
index 2fb1cdf55d..7335dacf0b 100644
--- a/iothread.c
+++ b/iothread.c
@@ -164,9 +164,7 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
                                   &local_error);
       if (local_error) {
           error_propagate(errp, local_error);
-        aio_context_unref(iothread->ctx);
-        iothread->ctx = NULL;
-        return;
+        goto fail;
       }
         qemu_mutex_init(&iothread->init_done_lock);
@@ -178,8 +176,12 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
        */
       name = object_get_canonical_path_component(OBJECT(obj));
       thread_name = g_strdup_printf("IO %s", name);
-    qemu_thread_create(&iothread->thread, thread_name, iothread_run,
-                       iothread, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&iothread->thread, thread_name, iothread_run,
+                            iothread, QEMU_THREAD_JOINABLE, errp)) {
+        g_free(thread_name);
+        g_free(name);
+        goto fail;
+    }
       g_free(thread_name);
       g_free(name);
   @@ -190,6 +192,10 @@ static void iothread_complete(UserCreatable
*obj, Error **errp)
                          &iothread->init_done_lock);
       }
       qemu_mutex_unlock(&iothread->init_done_lock);
+    return;
+fail:
+    aio_context_unref(iothread->ctx);
+    iothread->ctx = NULL;
   }
     typedef struct {
diff --git a/migration/migration.c b/migration/migration.c
index 0537fc0c26..af6c72ac5d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -438,19 +438,22 @@ static void process_incoming_migration_co(void *opaque)
           /* Make sure all file formats flush their mutable metadata */
           bdrv_invalidate_cache_all(&local_err);
           if (local_err) {
-            migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                    MIGRATION_STATUS_FAILED);
               error_report_err(local_err);
-            exit(EXIT_FAILURE);
+            goto fail;
           }
             if (colo_init_ram_cache() < 0) {
               error_report("Init ram cache failed");
-            exit(EXIT_FAILURE);
+            goto fail;
           }
   -        qemu_thread_create(&mis->colo_incoming_thread, "COLO
incoming",
-             colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
+        if (!qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
+                                colo_process_incoming_thread, mis,
+                                QEMU_THREAD_JOINABLE, &local_err)) {
+            error_reportf_err(local_err, "failed to create "
+                              "colo_process_incoming_thread: ");
+            goto fail;
+        }
           mis->have_colo_incoming_thread = true;
           qemu_coroutine_yield();
   @@ -461,20 +464,22 @@ static void
process_incoming_migration_co(void *opaque)
       }
         if (ret < 0) {
-        Error *local_err = NULL;
-
-        migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                          MIGRATION_STATUS_FAILED);
           error_report("load of migration failed: %s", strerror(-ret));
-        qemu_fclose(mis->from_src_file);
-        if (multifd_load_cleanup(&local_err) != 0) {
-            error_report_err(local_err);
-        }
-        exit(EXIT_FAILURE);
+        goto fail;
       }
       mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
       qemu_bh_schedule(mis->bh);
       mis->migration_incoming_co = NULL;
+    return;
+fail:
+    local_err = NULL;
+    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+                      MIGRATION_STATUS_FAILED);
+    qemu_fclose(mis->from_src_file);
+    if (multifd_load_cleanup(&local_err) != 0) {
+        error_report_err(local_err);
+    }
+    exit(EXIT_FAILURE);
   }
You change handling of errors other than qemu_thread_create().  Separate
patch, please.  I'd put it before this one.
Ok, thanks for the reminder. Will update in the next version.
     static void migration_incoming_setup(QEMUFile *f)
@@ -2345,6 +2350,7 @@ out:
   static int open_return_path_on_source(MigrationState *ms,
                                         bool create_thread)
   {
+    Error *local_err = NULL;
         ms->rp_state.from_dst_file =
qemu_file_get_return_path(ms->to_dst_file);
       if (!ms->rp_state.from_dst_file) {
@@ -2358,8 +2364,13 @@ static int open_return_path_on_source(MigrationState *ms,
           return 0;
       }
   -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
-                       source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+                            source_return_path_thread, ms,
+                            QEMU_THREAD_JOINABLE, &local_err)) {
+        error_reportf_err(local_err,
+                          "failed to create source_return_path_thread: ");
+        return -1;
+    }
         trace_open_return_path_on_source_continue();
This is a caller that returns an error code on failure.  You change it
to report the error, then return failure.  This is okay, because its
sole caller also reports errors that way.
Thanks.
@@ -3189,8 +3200,13 @@ void migrate_fd_connect(MigrationState *s, Error 
*error_in)
           migrate_fd_cleanup(s);
           return;
       }
-    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
-                       QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&s->thread, "live_migration", migration_thread,
+                            s, QEMU_THREAD_JOINABLE, &error_in)) {
+        error_reportf_err(error_in, "failed to create migration_thread: ");
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+        migrate_fd_cleanup(s);
+        return;
+    }
       s->migration_thread_running = true;
   }
This is a caller that reports errors.  You make it handle
qemu_thread_create() the same way.  Good.
Thanks!
   diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index fa09dba534..80bfa9c4a2 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1083,6 +1083,8 @@ retry:
     int postcopy_ram_enable_notify(MigrationIncomingState *mis)
   {
+    Error *local_err = NULL;
+
       /* Open the fd for the kernel to give us userfaults */
       mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
       if (mis->userfault_fd == -1) {
@@ -1109,8 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
*mis)
       }
         qemu_sem_init(&mis->fault_thread_sem, 0);
-    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
-                       postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&mis->fault_thread, "postcopy/fault",
+                            postcopy_ram_fault_thread, mis,
+                            QEMU_THREAD_JOINABLE, &local_err)) {
+        error_reportf_err(local_err,
+                          "failed to create postcopy_ram_fault_thread: ");
+        close(mis->userfault_event_fd);
+        close(mis->userfault_fd);
+        qemu_sem_destroy(&mis->fault_thread_sem);
+        return -1;
+    }
       qemu_sem_wait(&mis->fault_thread_sem);
       qemu_sem_destroy(&mis->fault_thread_sem);
       mis->have_fault_thread = true;
This is a caller that reports errors, then returns failure.  You make it
handle qemu_thread_create() the same way.  Good.

Not related to this patch, just spotted while reviewing it:

         /* Mark so that we get notified of accesses to unwritten areas */
         if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
             error_report("ram_block_enable_notify failed");
             return -1;
         }

Do we leak mis->userfault_fd, mis->userfault_event_fd,
mis->fault_thread_sem here?
Actually the patch 5/7 fixes this: we leave the cleanup() handling to
postcopy_ram_incoming_cleanup() when failing to notify here.
Looking back to the history, I falsely did close(these_fds) just here but
David corrected me, and the following is quoted from his earlier comment:
"
I don't think these close() calls are safe.  This code is just after
starting the fault thread, and the fault thread has a poll() call on
these fd's, so we can't close them until we've instructed that thread
to exit.

We should fall out through postcopy_ram_incoming_cleanup, and because
the thread exists it should do a notify to the thread, a join and then
only later do the close calls.
"
diff --git a/migration/ram.c b/migration/ram.c
index 658dfa88a3..6e0cccf066 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
   static int compress_threads_save_setup(void)
   {
       int i, thread_count;
+    Error *local_err = NULL;
         if (!migrate_use_compression()) {
           return 0;
@@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
           comp_param[i].quit = false;
           qemu_mutex_init(&comp_param[i].mutex);
           qemu_cond_init(&comp_param[i].cond);
-        qemu_thread_create(compress_threads + i, "compress",
-                           do_data_compress, comp_param + i,
-                           QEMU_THREAD_JOINABLE);
+        if (!qemu_thread_create(compress_threads + i, "compress",
+                                do_data_compress, comp_param + i,
+                                QEMU_THREAD_JOINABLE, &local_err)) {
+            error_reportf_err(local_err, "failed to create do_data_compress: 
");
+            goto exit;
+        }
       }
       return 0;
Reviewing the migration changes is getting tiresome...
Yes, indeed, the migration involves a lot! Thanks so much for helping
to review!
   Is reporting the
error appropriate here, and why?
I think the qemu monitor should display the obvious and exact failing
reason for administrators, esp considering that qemu_thread_create()
itself does not print any message thus we have no idea which direct
function fails if gdb is not enabled.
IOW, I think David's answer to that ppc's error_reportf_err() also
apply here:

"The error returns are for the guest, the reported errors are for the
guest administrator or management layers."
There could well be an issue with the "management layers" part.  Should
this error be sent to the management layer via QMP somehow?  Migration
maintainers should be able to assist with this question.

@@ -1075,8 +1079,14 @@ static void multifd_new_send_channel_async(QIOTask 
*task, gpointer opaque)
           p->c = QIO_CHANNEL(sioc);
           qio_channel_set_delay(p->c, false);
           p->running = true;
-        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-                           QEMU_THREAD_JOINABLE);
+        if (!qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
+                                QEMU_THREAD_JOINABLE, &local_err)) {
+            migrate_set_error(migrate_get_current(), local_err);
+            error_reportf_err(local_err,
+                              "failed to create multifd_send_thread: ");
+            multifd_save_cleanup();
+            return;
+        }
             atomic_inc(&multifd_send_state->count);
       }
Same question.

@@ -1350,8 +1360,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error 
**errp)
       p->num_packets = 1;
         p->running = true;
-    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
-                       QEMU_THREAD_JOINABLE);
+    if (!qemu_thread_create(&p->thread, p->name, multifd_recv_thread,
+                            p, QEMU_THREAD_JOINABLE, &local_err)) {
+        error_propagate_prepend(errp, local_err,
+                                "failed to create multifd_recv_thread: ");
+        multifd_recv_terminate_threads(local_err);
+        return false;
+    }
       atomic_inc(&multifd_recv_state->count);
       return atomic_read(&multifd_recv_state->count) ==
              migrate_multifd_channels();
@@ -3617,6 +3632,7 @@ static void compress_threads_load_cleanup(void)
   static int compress_threads_load_setup(QEMUFile *f)
   {
       int i, thread_count;
+    Error *local_err = NULL;
         if (!migrate_use_compression()) {
           return 0;
@@ -3638,9 +3654,13 @@ static int compress_threads_load_setup(QEMUFile *f)
           qemu_cond_init(&decomp_param[i].cond);
           decomp_param[i].done = true;
           decomp_param[i].quit = false;
-        qemu_thread_create(decompress_threads + i, "decompress",
-                           do_data_decompress, decomp_param + i,
-                           QEMU_THREAD_JOINABLE);
+        if (!qemu_thread_create(decompress_threads + i, "decompress",
+                                do_data_decompress, decomp_param + i,
+                                QEMU_THREAD_JOINABLE, &local_err)) {
+            error_reportf_err(local_err,
+                              "failed to create do_data_decompress: ");
+            goto exit;
+        }
       }
       return 0;
   exit:
Same question.

diff --git a/migration/savevm.c b/migration/savevm.c
index d784e8aa40..b8bdcde5d8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1747,9 +1747,14 @@ static int 
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
       mis->have_listen_thread = true;
       /* Start up the listening thread and wait for it to signal ready */
       qemu_sem_init(&mis->listen_thread_sem, 0);
-    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
-                       postcopy_ram_listen_thread, NULL,
-                       QEMU_THREAD_DETACHED);
+    if (!qemu_thread_create(&mis->listen_thread, "postcopy/listen",
+                            postcopy_ram_listen_thread, NULL,
+                            QEMU_THREAD_DETACHED, &local_err)) {
+        error_reportf_err(local_err,
+                          "failed to create postcopy_ram_listen_thread: ");
+        qemu_sem_destroy(&mis->listen_thread_sem);
+        return -1;
+    }
       qemu_sem_wait(&mis->listen_thread_sem);
       qemu_sem_destroy(&mis->listen_thread_sem);
This is a caller that reports errors, then returns failure.  You make it
handle qemu_thread_create() the same way.  Good.

I'll stop commenting on this pattern now.
Thanks.
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
index 2f6c72f63a..338b9563e3 100644
--- a/tests/atomic_add-bench.c
+++ b/tests/atomic_add-bench.c
@@ -2,6 +2,7 @@
   #include "qemu/thread.h"
   #include "qemu/host-utils.h"
   #include "qemu/processor.h"
+#include "qapi/error.h"
     struct thread_info {
       uint64_t r;
@@ -110,7 +111,7 @@ static void create_threads(void)
             info->r = (i + 1) ^ time(NULL);
           qemu_thread_create(&threads[i], NULL, thread_func, info,
-                           QEMU_THREAD_JOINABLE);
+                           QEMU_THREAD_JOINABLE, &error_abort);
       }
   }
... snip for all tests/xxx.c as all the passed parameter is &error_abort ...
   diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 929391f85d..35a652d1fd 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -31,6 +31,7 @@
   #include "vnc-jobs.h"
   #include "qemu/sockets.h"
   #include "qemu/main-loop.h"
+#include "qapi/error.h"
   #include "block/aio.h"
     /*
@@ -331,15 +332,21 @@ static bool vnc_worker_thread_running(void)
       return queue; /* Check global queue */
   }
   -void vnc_start_worker_thread(void)
+bool vnc_start_worker_thread(Error **errp)
   {
       VncJobQueue *q;
   -    if (vnc_worker_thread_running())
-        return ;
+    if (vnc_worker_thread_running()) {
+        goto out;
+    }
         q = vnc_queue_init();
-    qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
-                       QEMU_THREAD_DETACHED);
+    if (!qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread,
+                            q, QEMU_THREAD_DETACHED, errp)) {
+        vnc_queue_clear(q);
+        return false;
+    }
       queue = q; /* Set global queue */
+out:
+    return true;
   }
I recommend to pass &error_abort to qemu_thread_create() in this patch,
then convert vnc_start_worker_thread() to Error in a subsequent patch.
Ok, thanks! This makes this patch shorter. :)
BTW, would it be better by adding a "TODO: xxx" comment before the
&error_abort in this patch, and remove it in the subsequent patch?
If it is ok, I will do the same adding for the latter touch_all_pages().
See my remark on use of FIXME and TODO above.

Adding a TODO only to remove it later in the same series is fine.  More
so when it helps avoid review questions like "I think you need to do X
here", followed by "Oh, I see you're doing X here" when the reviewer
gets to the later patch.
Ok, will do so then, thanks for the advice.

diff --git a/ui/vnc-jobs.h b/ui/vnc-jobs.h
index 59f66bcc35..14640593db 100644
--- a/ui/vnc-jobs.h
+++ b/ui/vnc-jobs.h
@@ -37,7 +37,7 @@ void vnc_job_push(VncJob *job);
   void vnc_jobs_join(VncState *vs);
     void vnc_jobs_consume_buffer(VncState *vs);
-void vnc_start_worker_thread(void);
+bool vnc_start_worker_thread(Error **errp);
     /* Locks */
   static inline int vnc_trylock_display(VncDisplay *vd)
diff --git a/ui/vnc.c b/ui/vnc.c
index 0c1b477425..0ffe9e6a5d 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3236,7 +3236,9 @@ void vnc_display_init(const char *id, Error **errp)
       vd->connections_limit = 32;
         qemu_mutex_init(&vd->mutex);
-    vnc_start_worker_thread();
+    if (!vnc_start_worker_thread(errp)) {
+        return;
+    }
         vd->dcl.ops = &dcl_ops;
       register_displaychangelistener(&vd->dcl);
These two hunks then also go into the subsequent patch.
Ok.
diff --git a/util/compatfd.c b/util/compatfd.c
index 980bd33e52..886aa249f9 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -16,6 +16,7 @@
   #include "qemu/osdep.h"
   #include "qemu-common.h"
   #include "qemu/thread.h"
+#include "qapi/error.h"
     #include <sys/syscall.h>
   @@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t
*mask)
       struct sigfd_compat_info *info;
       QemuThread thread;
       int fds[2];
+    Error *local_err = NULL;
         info = malloc(sizeof(*info));
       if (info == NULL) {
@@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t *mask)
       memcpy(&info->mask, mask, sizeof(*mask));
       info->fd = fds[1];
   -    qemu_thread_create(&thread, "signalfd_compat",
sigwait_compat, info,
-                       QEMU_THREAD_DETACHED);
+    if (!qemu_thread_create(&thread, "signalfd_compat", sigwait_compat,
+                            info, QEMU_THREAD_DETACHED, &local_err)) {
+        error_reportf_err(local_err, "failed to create sigwait_compat: ");
+        close(fds[0]);
+        close(fds[1]);
+        free(info);
+        return -1;
+    }
         return fds[0];
   }
This function is implements signalfd() when the kernel doesn't provide
it.

signalfd() sets errno on failure.  The replacement's existing failure
modes set errno.  You add a failure mode that doesn't set errno.  That's
a bug.  To fix it, you can either make qemu_thread_create() set errno,
or you can make it return a value you can use to set errno.  The common
way to do the latter is returning a *negated* errno value.
Oops, I forgot setting the errno for Linux implementation! My fault..
I will set errno inside qemu_thread_create() as follows:
      err = pthread_attr_init(&attr);
      if (err) {
-        error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
-                         strerror(err));
+        errno = err;
+        error_setg_errno(errp, errno, "pthread_attr_init failed");
          return false;
      }
Make sure to set errno on all failures, not just this one.
Actually, this code update is changed for qemu_thread_create() itself,
I think if the errno is set in this function, no callers' errno need to be set.
Please correct me if I understand wrong. :)
Also add a function comment.  I suspect returning negated errno would
lead to a shorter function comment.
Actually only one caller needs the errno, that is the above qemu_signalfd_compat(). For the returning value, I remember there's once a email thread talking about it: returning a bool (and let the passed errp hold the error message) is to keep the consistency with glib. IMO, returning a bool or returning the -errno is equal to me if we do not use the return value again in the callers, it just involves the
judgement. But if we want to reuse the return value, like:
  ret = qemu_thread_create(xx, xx, &local_err);
I do not think it is much needed. What do you think?
  Yet another reason to write
function comments!  Making myself document the mess I made has made me
clean it up before I submit it many times :)
Ok, thanks for the experience. Will add the comment. :)

signalfd() doesn't print anything on failure.  The replacement's
existing failure modes don't print anything.  You add a failure mode
that does print.  I think it shouldn't.
Ok, I will remove it. Thanks!
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index c1bee2a581..2c779fd634 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -437,9 +437,12 @@ static bool touch_all_pages(char *area, size_t hpagesize, 
size_t numpages,
       size_t size_per_thread;
       char *addr = area;
       int i = 0;
+    int started_thread = 0;
+    Error *local_err = NULL;
         memset_thread_failed = false;
       memset_num_threads = get_memset_num_threads(smp_cpus);
+    started_thread = memset_num_threads;
       memset_thread = g_new0(MemsetThread, memset_num_threads);
       numpages_per_thread = (numpages / memset_num_threads);
       size_per_thread = (hpagesize * numpages_per_thread);
@@ -448,13 +451,19 @@ static bool touch_all_pages(char *area, size_t hpagesize, 
size_t numpages,
           memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
                                       numpages : numpages_per_thread;
           memset_thread[i].hpagesize = hpagesize;
-        qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
-                           do_touch_pages, &memset_thread[i],
-                           QEMU_THREAD_JOINABLE);
+        if (!qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
+                                do_touch_pages, &memset_thread[i],
+                                QEMU_THREAD_JOINABLE, &local_err)) {
+            error_reportf_err(local_err, "failed to create do_touch_pages: ");
+            memset_thread_failed = true;
+            started_thread = i;
+            goto out;
+        }
           addr += size_per_thread;
           numpages -= numpages_per_thread;
       }
-    for (i = 0; i < memset_num_threads; i++) {
+out:
+    for (i = 0; i < started_thread; i++) {
           qemu_thread_join(&memset_thread[i].pgthread);
       }
       g_free(memset_thread);
You need to convert this function to Error instead, because its caller
os_mem_prealloc() sets an error on failure.  I recommend to pass
&error_abort in this patch, and convert to Error in a subsequent patch.
Ok, thanks for the advice.
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 865e476df5..81b40a1ece 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -15,6 +15,7 @@
   #include "qemu/atomic.h"
   #include "qemu/notify.h"
   #include "qemu-thread-common.h"
+#include "qapi/error.h"
     static bool name_threads;
   @@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
       return r;
   }
   -void qemu_thread_create(QemuThread *thread, const char *name,
-                       void *(*start_routine)(void*),
-                       void *arg, int mode)
+bool qemu_thread_create(QemuThread *thread, const char *name,
+                        void *(*start_routine)(void *),
+                        void *arg, int mode, Error **errp)
   {
       sigset_t set, oldset;
       int err;
@@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread, const char 
*name,
         err = pthread_attr_init(&attr);
       if (err) {
-        error_exit(err, __func__);
+        error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
+                         strerror(err));
-err is actually wrong: pthread_attr_init() returns a *positive* errno
code on failure.
Yes, a definite wrong code.. :( Actually, pthread_attr_init() returns a nonzero error
number, thus I do the below update by assigning the return err to errno.

     err = pthread_attr_init(&attr);
     if (err) {
-        error_exit(err, __func__);
+        errno = err;
+        error_setg_errno(errp, errno, "pthread_attr_init failed");
+        return false;
     }

Have a nice day, thanks so much for the review! ;)
Fei

+        return false;
       }
         if (mode == QEMU_THREAD_DETACHED) {
@@ -526,16 +529,21 @@ void qemu_thread_create(QemuThread *thread, const char 
*name,
       qemu_thread_args->name = g_strdup(name);
       qemu_thread_args->start_routine = start_routine;
       qemu_thread_args->arg = arg;
-
Let's keep the blank line.
ok.

Thanks so much for the review! Have a nice day. :)
Fei
You're welcome :)

       err = pthread_create(&thread->thread, &attr,
                            qemu_thread_start, qemu_thread_args);
-
-    if (err)
-        error_exit(err, __func__);
+    if (err) {
+        error_setg_errno(errp, -err, "pthread_create failed: %s",
+                         strerror(err));
+        pthread_attr_destroy(&attr);
+        g_free(qemu_thread_args->name);
+        g_free(qemu_thread_args);
+        return false;
+    }
         pthread_sigmask(SIG_SETMASK, &oldset, NULL);
         pthread_attr_destroy(&attr);
+    return true;
   }
     void qemu_thread_get_self(QemuThread *thread)
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 4a363ca675..57b1143e97 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -20,6 +20,7 @@
   #include "qemu/thread.h"
   #include "qemu/notify.h"
   #include "qemu-thread-common.h"
+#include "qapi/error.h"
   #include <process.h>
     static bool name_threads;
@@ -388,9 +389,9 @@ void *qemu_thread_join(QemuThread *thread)
       return ret;
   }
   -void qemu_thread_create(QemuThread *thread, const char *name,
-                       void *(*start_routine)(void *),
-                       void *arg, int mode)
+bool qemu_thread_create(QemuThread *thread, const char *name,
+                        void *(*start_routine)(void *),
+                        void *arg, int mode, Error **errp)
   {
       HANDLE hThread;
       struct QemuThreadData *data;
@@ -409,10 +410,17 @@ void qemu_thread_create(QemuThread *thread, const char 
*name,
       hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
                                         data, 0, &thread->tid);
       if (!hThread) {
-        error_exit(GetLastError(), __func__);
+        if (data->mode != QEMU_THREAD_DETACHED) {
+            DeleteCriticalSection(&data->cs);
+        }
+        error_setg_errno(errp, errno,
+                         "failed to create win32_start_routine");
+        g_free(data);
+        return false;
       }
       CloseHandle(hThread);
       thread->data = data;
+    return true;
   }
     void qemu_thread_get_self(QemuThread *thread)
diff --git a/util/rcu.c b/util/rcu.c
index 5676c22bd1..145dcdb0c6 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -32,6 +32,7 @@
   #include "qemu/atomic.h"
   #include "qemu/thread.h"
   #include "qemu/main-loop.h"
+#include "qapi/error.h"
   #if defined(CONFIG_MALLOC_TRIM)
   #include <malloc.h>
   #endif
@@ -325,7 +326,7 @@ static void rcu_init_complete(void)
        * must have been quiescent even after forking, just recreate it.
        */
       qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
-                       NULL, QEMU_THREAD_DETACHED);
+                       NULL, QEMU_THREAD_DETACHED, &error_abort);
         rcu_register_thread();
   }
diff --git a/util/thread-pool.c b/util/thread-pool.c
index 610646d131..ad0f980783 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -22,6 +22,7 @@
   #include "trace.h"
   #include "block/thread-pool.h"
   #include "qemu/main-loop.h"
+#include "qapi/error.h"
     static void do_spawn_thread(ThreadPool *pool);
   @@ -132,7 +133,8 @@ static void do_spawn_thread(ThreadPool *pool)
       pool->new_threads--;
       pool->pending_threads++;
   -    qemu_thread_create(&t, "worker", worker_thread, pool,
QEMU_THREAD_DETACHED);
+    qemu_thread_create(&t, "worker", worker_thread, pool,
+                       QEMU_THREAD_DETACHED, &error_abort);
   }
     static void spawn_thread_bh_fn(void *opaque)





reply via email to

[Prev in Thread] Current Thread [Next in Thread]