bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Fix race condition in ext2fs when remounting


From: James Clarke
Subject: [PATCH] Fix race condition in ext2fs when remounting
Date: Wed, 22 Jul 2015 23:42:55 +0100

On some systems, ext2fs.static would regularly hang at startup, as a
race condition meant it would process paging requests while reounting.
To fix this, libpager has been altered to allow inhibiting and resuming
its worker threads.

* console/pager.c (pager_requests): New variable.
(user_pager_init): Updated call to pager_start_workers to use new
pager_requests variable.
* daemons/runsystem.sh: Removed artificial delay working around the race
condition.
* ext2fs/ext2fs.c (diskfs_reload_global_state): Call new
inhibit_ext2_pager and resume_ext2_pager functions, and leave sblock as
non-NULL so it will be munmapped.
* ext2fs/ext2fs.h (inhibit_ext2_pager,resume_ext2_pager): New functions.
* ext2fs/pager.c (file_pager_requests): New variable.
(create_disk_pager): Updated call to pager_start_workers to use new
file_pager_requests variable.
(inhibit_ext2_pager,resume_ext2_pager): New functions.
* fatfs/fatfs.h (inhibit_fat_pager,resume_fat_pager): New functions.
* fatfs/pager.c (file_pager_requests): New variable.
(create_fat_pager): Updated call to pager_start_workers to use new
file_pager_requests variable.
(inhibit_fat_pager,resume_fat_pager): New functions.
* libdiskfs/disk-pager.c (diskfs_disk_pager_requests): New variable.
(diskfs_start_disk_pager): Updated call to pager_start_workers to use
new diskfs_disk_pager_requests variable.
* libdiskfs/diskfs-pager.h (diskfs_disk_pager_requests): New variable.
* libpager/demuxer.c (struct pager_requests): Renamed struct requests to
struct pager_requests. Replaced queue with queue_in and queue_out
pointers. Added inhibit_wakeup field.
(pager_demuxer): Updated to use new queue_in/queue_out pointers. Only
wake up workers if not inhibited.
(worker_func): Updated to use new queue_in/queue_out pointers. Final
worker thread to sleep notifies the inhibit_wakeup condition variable.
(pager_start_workers): Added out parameter for the requests instance.
Allocate heap space shared by both queues. Initialise new inhibit_wakeup
condition.
(pager_inhibit_workers,pager_resume_workers): New functions.
* libpager/pager.h (struct pager_requests): Public forward definition.
(pager_start_workers): Added out parameter for the requests instance.
(pager_inhibit_workers,pager_resume_workers): New functions.
* libpager/queue.h (queue_empty): New function.
* storeio/pager.c (pager_requests): New variable.
(init_dev_paging): Updated call to pager_start_workers to use new
pager_requests variable.
---
 console/pager.c          |   3 +-
 daemons/runsystem.sh     |   3 --
 ext2fs/ext2fs.c          |  12 ++++-
 ext2fs/ext2fs.h          |   6 +++
 ext2fs/pager.c           |  29 +++++++++++-
 fatfs/fatfs.h            |   2 +
 fatfs/pager.c            |  29 +++++++++++-
 libdiskfs/disk-pager.c   |   3 +-
 libdiskfs/diskfs-pager.h |   1 +
 libpager/demuxer.c       | 119 ++++++++++++++++++++++++++++++++++++++++-------
 libpager/pager.h         |  28 ++++++++++-
 libpager/queue.h         |   8 ++++
 storeio/pager.c          |   3 +-
 13 files changed, 219 insertions(+), 27 deletions(-)

diff --git a/console/pager.c b/console/pager.c
index 5e13ba4..818e49d 100644
--- a/console/pager.c
+++ b/console/pager.c
@@ -42,6 +42,7 @@ struct user_pager_info
 
 /* We need a separate bucket for the pager ports.  */
 static struct port_bucket *pager_bucket;
+static struct pager_requests *pager_requests;
 
 
 /* Implement the pager_clear_user_data callback from the pager library. */
@@ -133,7 +134,7 @@ user_pager_init (void)
     error (5, errno, "Cannot create pager bucket");
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (pager_bucket);
+  err = pager_start_workers (pager_bucket, &pager_requests);
   if (err)
     error (5, err, "Cannot start pager worker threads");
 }
diff --git a/daemons/runsystem.sh b/daemons/runsystem.sh
index ae25a7d..5d0ad01 100644
--- a/daemons/runsystem.sh
+++ b/daemons/runsystem.sh
@@ -118,9 +118,6 @@ esac
 /hurd/mach-defpager
 
 # This is necessary to make stat / return the correct device ids.
-# Work around a race condition (probably in the root translator).
-for i in `seq 1 100000` ; do : ; done # XXX
-
 fsysopts / --update --readonly
 
 # Finally, start the actual init.
diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c
index d0fdfe7..03c9eed 100644
--- a/ext2fs/ext2fs.c
+++ b/ext2fs/ext2fs.c
@@ -207,10 +207,20 @@ main (int argc, char **argv)
 error_t
 diskfs_reload_global_state ()
 {
+  error_t err;
+
   pokel_flush (&global_pokel);
   pager_flush (diskfs_disk_pager, 1);
-  sblock = NULL;
+
+  /* libdiskfs is not responsible for inhibiting paging.  */
+  err = inhibit_ext2_pager ();
+  if (err)
+    return err;
+
   get_hypermetadata ();
   map_hypermetadata ();
+
+  resume_ext2_pager ();
+
   return 0;
 }
diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h
index 96d8e9d..a744685 100644
--- a/ext2fs/ext2fs.h
+++ b/ext2fs/ext2fs.h
@@ -201,6 +201,12 @@ struct user_pager_info
 /* Set up the disk pager.  */
 void create_disk_pager (void);
 
+/* Inhibit the disk pager.  */
+error_t inhibit_ext2_pager (void);
+
+/* Resume the disk pager.  */
+void resume_ext2_pager (void);
+
 /* Call this when we should turn off caching so that unused memory object
    ports get freed.  */
 void drop_pager_softrefs (struct node *node);
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index b56c923..c200f30 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -34,6 +34,10 @@ struct port_bucket *disk_pager_bucket;
 /* A ports bucket to hold file pager ports.  */
 struct port_bucket *file_pager_bucket;
 
+/* Stores a reference to the requests instance used by the file pager so its
+   worker threads can be inhibited and resumed.  */
+struct pager_requests *file_pager_requests;
+
 pthread_spinlock_t node_to_page_lock = PTHREAD_SPINLOCK_INITIALIZER;
 
 
@@ -1217,11 +1221,34 @@ create_disk_pager (void)
   file_pager_bucket = ports_create_bucket ();
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (file_pager_bucket);
+  err = pager_start_workers (file_pager_bucket, &file_pager_requests);
   if (err)
     ext2_panic ("can't create libpager worker threads: %s", strerror (err));
 }
 
+error_t
+inhibit_ext2_pager (void)
+{
+  error_t err;
+
+  /* The file pager can rely on the disk pager, so inhibit the file
+     pager first.  */
+
+  err = pager_inhibit_workers (file_pager_requests);
+  if (err)
+    return err;
+
+  err = pager_inhibit_workers (diskfs_disk_pager_requests);
+  return err;
+}
+
+void
+resume_ext2_pager (void)
+{
+  pager_resume_workers (diskfs_disk_pager_requests);
+  pager_resume_workers (file_pager_requests);
+}
+
 /* Call this to create a FILE_DATA pager and return a send right.
    NODE must be locked.  */
 mach_port_t
diff --git a/fatfs/fatfs.h b/fatfs/fatfs.h
index 3c3d836..54c3426 100644
--- a/fatfs/fatfs.h
+++ b/fatfs/fatfs.h
@@ -121,6 +121,8 @@ extern struct dirrect dr_root_node;
 void drop_pager_softrefs (struct node *);
 void allow_pager_softrefs (struct node *);
 void create_fat_pager (void);
+error_t inhibit_fat_pager (void);
+void resume_fat_pager (void);
 
 void flush_node_pager (struct node *node);
 
diff --git a/fatfs/pager.c b/fatfs/pager.c
index 10d1fc9..a4f70d8 100644
--- a/fatfs/pager.c
+++ b/fatfs/pager.c
@@ -29,6 +29,10 @@ struct port_bucket *disk_pager_bucket;
 /* A ports bucket to hold file pager ports.  */
 struct port_bucket *file_pager_bucket;
 
+/* Stores a reference to the requests instance used by the file pager so its
+   worker threads can be inhibited and resumed.  */
+struct pager_requests *file_pager_requests;
+
 /* Mapped image of the FAT.  */
 void *fat_image;
 
@@ -776,11 +780,34 @@ create_fat_pager (void)
   file_pager_bucket = ports_create_bucket ();
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (file_pager_bucket);
+  err = pager_start_workers (file_pager_bucket, &file_pager_requests);
   if (err)
     error (2, err, "can't create libpager worker threads");
 }
 
+error_t
+inhibit_fat_pager (void)
+{
+  error_t err;
+
+  /* The file pager can rely on the disk pager, so inhibit the file
+     pager first.  */
+
+  err = pager_inhibit_workers (file_pager_requests);
+  if (err)
+    return err;
+
+  err = pager_inhibit_workers (diskfs_disk_pager_requests);
+  return err;
+}
+
+void
+resume_fat_pager (void)
+{
+  pager_resume_workers (diskfs_disk_pager_requests);
+  pager_resume_workers (file_pager_requests);
+}
+
 /* Call this to create a FILE_DATA pager and return a send right.
    NODE must be locked.  */
 mach_port_t
diff --git a/libdiskfs/disk-pager.c b/libdiskfs/disk-pager.c
index 008aa2d..33b109c 100644
--- a/libdiskfs/disk-pager.c
+++ b/libdiskfs/disk-pager.c
@@ -24,6 +24,7 @@
 __thread struct disk_image_user *diskfs_exception_diu;
 
 struct pager *diskfs_disk_pager;
+struct requests *diskfs_disk_pager_requests;
 
 static void fault_handler (int sig, long int sigcode, struct sigcontext *scp);
 static struct hurd_signal_preemptor preemptor =
@@ -43,7 +44,7 @@ diskfs_start_disk_pager (struct user_pager_info *upi,
   mach_port_t disk_pager_port;
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (pager_bucket);
+  err = pager_start_workers (pager_bucket, &diskfs_disk_pager_requests);
   if (err)
     error (2, err, "creating pager worker threads failed");
 
diff --git a/libdiskfs/diskfs-pager.h b/libdiskfs/diskfs-pager.h
index a253069..db99f9ff 100644
--- a/libdiskfs/diskfs-pager.h
+++ b/libdiskfs/diskfs-pager.h
@@ -40,6 +40,7 @@ extern void diskfs_start_disk_pager (struct user_pager_info 
*info,
                                     size_t size, void **image);
 
 extern struct pager *diskfs_disk_pager;
+extern struct requests *diskfs_disk_pager_requests;
 
 struct disk_image_user
   {
diff --git a/libpager/demuxer.c b/libpager/demuxer.c
index 4dd3cd8..59dd1c5 100644
--- a/libpager/demuxer.c
+++ b/libpager/demuxer.c
@@ -60,7 +60,7 @@ request_inp (const struct request *r)
 /* A worker.  */
 struct worker
 {
-  struct requests *requests;   /* our pagers request queue */
+  struct pager_requests *requests;     /* our pagers request queue */
   struct queue queue;  /* other workers may delegate requests to us */
   unsigned long tag;   /* tag of the object we are working on */
 };
@@ -68,12 +68,18 @@ struct worker
 /* This is the queue for incoming requests.  A single thread receives
    messages from the port set, looks the service routine up, and
    enqueues the request here.  */
-struct requests
+struct pager_requests
 {
   struct port_bucket *bucket;
-  struct queue queue;
+  /* Normally, both queues are the same.  However, when the workers are
+     inhibited, a new queue_in is created, but queue_out is left as the
+     old value, so the workers drain queue_out but do not receive new
+     requests.  */
+  struct queue *queue_in;      /* the queue to add to */
+  struct queue *queue_out;     /* the queue to take from */
   int asleep;
   pthread_cond_t wakeup;
+  pthread_cond_t inhibit_wakeup;
   pthread_mutex_t lock;
   struct worker workers[WORKER_COUNT];
 };
@@ -81,7 +87,7 @@ struct requests
 /* Demultiplex a single message directed at a pager port; INP is the
    message received; fill OUTP with the reply.  */
 static int
-pager_demuxer (struct requests *requests,
+pager_demuxer (struct pager_requests *requests,
               mach_msg_header_t *inp,
               mach_msg_header_t *outp)
 {
@@ -108,10 +114,10 @@ pager_demuxer (struct requests *requests,
 
   pthread_mutex_lock (&requests->lock);
 
-  queue_enqueue (&requests->queue, &r->item);
+  queue_enqueue (requests->queue_in, &r->item);
 
-  /* Awake worker.  */
-  if (requests->asleep > 0)
+  /* Awake worker, but only if not inhibited.  */
+  if (requests->asleep > 0 && requests->queue_in == requests->queue_out)
       pthread_cond_signal (&requests->wakeup);
 
   pthread_mutex_unlock (&requests->lock);
@@ -160,7 +166,7 @@ static void *
 worker_func (void *arg)
 {
   struct worker *self = (struct worker *) arg;
-  struct requests *requests = self->requests;
+  struct pager_requests *requests = self->requests;
   struct request *r = NULL;
   mig_reply_header_t reply_msg;
 
@@ -186,9 +192,11 @@ worker_func (void *arg)
 
     get_request_locked:
       /* ... get a request from the global queue instead.  */
-      while ((r = queue_dequeue (&requests->queue)) == NULL)
+      while ((r = queue_dequeue (requests->queue_out)) == NULL)
        {
          requests->asleep += 1;
+         if (requests->asleep == WORKER_COUNT)
+           pthread_cond_broadcast (&requests->inhibit_wakeup);
          pthread_cond_wait (&requests->wakeup, &requests->lock);
          requests->asleep -= 1;
        }
@@ -281,7 +289,7 @@ worker_func (void *arg)
 static void *
 service_paging_requests (void *arg)
 {
-  struct requests *requests = arg;
+  struct pager_requests *requests = arg;
 
   int demuxer (mach_msg_header_t *inp,
               mach_msg_header_t *outp)
@@ -298,27 +306,44 @@ service_paging_requests (void *arg)
 
 /* Start the worker threads libpager uses to service requests.  */
 error_t
-pager_start_workers (struct port_bucket *pager_bucket)
+pager_start_workers (struct port_bucket *pager_bucket,
+                    struct pager_requests **out_requests)
 {
   error_t err;
   int i;
   pthread_t t;
-  struct requests *requests;
+  struct pager_requests *requests;
+
+  assert (out_requests != NULL);
 
   requests = malloc (sizeof *requests);
   if (requests == NULL)
-    return ENOMEM;
+    {
+      err = ENOMEM;
+      goto done;
+    }
 
   requests->bucket = pager_bucket;
   requests->asleep = 0;
-  queue_init (&requests->queue);
+
+  requests->queue_in = malloc (sizeof *requests->queue_in);
+  if (requests->queue_in == NULL)
+    {
+      err = ENOMEM;
+      goto done;
+    }
+  queue_init (requests->queue_in);
+  /* Until the workers are inhibited, both queues are the same.  */
+  requests->queue_out = requests->queue_in;
+
   pthread_cond_init (&requests->wakeup, NULL);
+  pthread_cond_init (&requests->inhibit_wakeup, NULL);
   pthread_mutex_init (&requests->lock, NULL);
 
   /* Make a thread to service paging requests.  */
   err = pthread_create (&t, NULL, service_paging_requests, requests);
   if (err)
-    return err;
+    goto done;
   pthread_detach (t);
 
   for (i = 0; i < WORKER_COUNT; i++)
@@ -329,9 +354,71 @@ pager_start_workers (struct port_bucket *pager_bucket)
 
       err = pthread_create (&t, NULL, &worker_func, &requests->workers[i]);
       if (err)
-       return err;
+       goto done;
       pthread_detach (t);
     }
 
+done:
+  if (err)
+    *out_requests = NULL;
+  else
+    *out_requests = requests;
+
   return err;
 }
+
+error_t
+pager_inhibit_workers (struct pager_requests *requests)
+{
+  error_t err = 0;
+
+  pthread_mutex_lock (&requests->lock);
+
+  /* Check the workers are not already inhibited.  */
+  assert (requests->queue_out == requests->queue_in);
+
+  /* Any new paging requests will go into a new queue.  */
+  struct queue *new_queue = malloc (sizeof *new_queue);
+  if (new_queue == NULL)
+    {
+      err = ENOMEM;
+      goto done_locked;
+    }
+  queue_init (new_queue);
+  requests->queue_in = new_queue;
+
+  /* Wait until all the workers are asleep and the queue has been
+     drained. All individual worker queues must have been drained, as
+     they are populated while the relevant worker is still running, and
+     it will always drain its personal queue before sleeping.
+     Check that the queue is empty, since it's possible that a request
+     came in, was queued and a worker was signalled but the lock was
+     acquired here before the worker woke up.  */
+  while (requests->asleep < WORKER_COUNT || !queue_empty(requests->queue_out))
+    pthread_cond_wait (&requests->inhibit_wakeup, &requests->lock);
+
+done_locked:
+  pthread_mutex_unlock (&requests->lock);
+  return err;
+}
+
+void
+pager_resume_workers (struct pager_requests *requests)
+{
+  pthread_mutex_lock (&requests->lock);
+
+  /* Check the workers are inhibited.  */
+  assert (requests->queue_out != requests->queue_in);
+  assert (requests->asleep == WORKER_COUNT);
+  assert (queue_empty(requests->queue_out));
+
+  /* The queue has been drained and will no longer be used.  */
+  free (requests->queue_out);
+  requests->queue_out = requests->queue_in;
+
+  /* We need to wake up all workers, as there could be multiple requests
+     in the new queue.  */
+  pthread_cond_broadcast (&requests->wakeup);
+
+  pthread_mutex_unlock (&requests->lock);
+}
diff --git a/libpager/pager.h b/libpager/pager.h
index fe34238..df4db68 100644
--- a/libpager/pager.h
+++ b/libpager/pager.h
@@ -25,8 +25,32 @@
    scope.  */
 struct user_pager_info;
 
-/* Start the worker threads libpager uses to service requests.  */
-error_t pager_start_workers (struct port_bucket *pager_bucket);
+struct pager_requests;
+
+/* Start the worker threads libpager uses to service requests. If no
+   error is returned, *requests will be a valid pointer, else it will be
+   set to NULL.  */
+error_t
+pager_start_workers (struct port_bucket *pager_bucket,
+                    struct pager_requests **requests);
+
+/* Inhibit the worker threads libpager uses to service requests,
+   blocking until all requests sent before this function is called have
+   finished.
+   Note that RPCs will not be inhibited, so new requests will
+   queue up, but will not be handled until the workers are resumed. If
+   RPCs should be inhibited as well, call ports_inhibit_bucket_rpcs with
+   the bucket used to create the workers before calling this. However,
+   inhibiting RPCs and not calling this is generally insufficient, as
+   libports is unaware of our internal worker pool, and will return once
+   all the RPCs have been queued, before they have been handled by a
+   worker thread.  */
+error_t
+pager_inhibit_workers (struct pager_requests *requests);
+
+/* Resume the worker threads libpager uses to service requests.  */
+void
+pager_resume_workers (struct pager_requests *requests);
 
 /* Create a new pager.  The pager will have a port created for it
    (using libports, in BUCKET) and will be immediately ready
diff --git a/libpager/queue.h b/libpager/queue.h
index d3cf738..abcd3b9 100644
--- a/libpager/queue.h
+++ b/libpager/queue.h
@@ -19,6 +19,8 @@
    You should have received a copy of the GNU General Public License
    along with the GNU Hurd.  If not, see <http://www.gnu.org/licenses/>.  */
 
+#include <stdbool.h>
+
 /* A FIFO queue with constant-time enqueue and dequeue operations.  */
 struct item {
   struct item *next;
@@ -59,3 +61,9 @@ queue_dequeue (struct queue *q)
   r->next = NULL;
   return r;
 }
+
+static inline bool
+queue_empty (struct queue *q)
+{
+  return q->head == NULL;
+}
diff --git a/storeio/pager.c b/storeio/pager.c
index c260d73..f8f59cd 100644
--- a/storeio/pager.c
+++ b/storeio/pager.c
@@ -142,6 +142,7 @@ pager_clear_user_data (struct user_pager_info *upi)
 }
 
 static struct port_bucket *pager_port_bucket = 0;
+static struct pager_requests *pager_requests;
 
 /* Initialize paging for this device.  */
 static void
@@ -160,7 +161,7 @@ init_dev_paging ()
          pager_port_bucket = ports_create_bucket ();
 
          /* Start libpagers worker threads.  */
-         err = pager_start_workers (pager_port_bucket);
+         err = pager_start_workers (pager_port_bucket, &pager_requests);
          if (err)
            {
              errno = err;
-- 
2.4.6




reply via email to

[Prev in Thread] Current Thread [Next in Thread]