Re: [PATCH] WIP: Fix ext2fs remount race condition

2015-07-22 Thread Justus Winter
Hi James :)

a few nitpicks upfront:

Quoting James Clarke (2015-07-22 02:46:48)
 --- a/ext2fs/ext2fs.c
 +++ b/ext2fs/ext2fs.c
 @@ -207,10 +207,28 @@ main (int argc, char **argv)
  error_t
  diskfs_reload_global_state ()
  {
 +  error_t err;
 +
pokel_flush (global_pokel);
pager_flush (diskfs_disk_pager, 1);
 +
 +  /* Paging must specifically be inhibited; if not, a paging request
 + could be handled while sblock is still NULL.
 + While some RPCs are inhibited when this function is called by
 + libdiskfs, paging RPCs are still enabled. Even if we were to
 + inhibit paging RPCs, libpager has its own pool of workers to handle
 + requests asynchronously, which libports is unaware of, so requests
 + can be handled even after the relevant RPCs are disabled.  This is
 + all dealt with by {inhibit,resume}_disk_pager.  */
 +  err = inhibit_disk_pager ();

I don't like that name, it implies that merely the disk pager is
inhibited.

 +  if (err)
 +return err;
 +
sblock = NULL;

Please remove that as discussed.  Nulling sblock merely prevents it
from being munmapped.

 --- a/fatfs/fatfs.h
 +++ b/fatfs/fatfs.h

Thanks for looking after fatfs.  I usually mimick all changes that I'm
doing to ext2fs.  Note however, that fatfs is overall in a bad shape,
to the point that I don't expect it to work, even though it kindof
does for some reason.

  /* Start the worker threads libpager uses to service requests.  */
  error_t
 -pager_start_workers (struct port_bucket *pager_bucket)
 +pager_start_workers (struct requests **out_requests,
 +struct port_bucket *pager_bucket)

It might be a matter of taste, but I would have added the out
parameter at the end of the parameter list.  Not sure how this is
usually done in the Hurd codebase tbh.

  {
error_t err;
int i;
pthread_t t;
struct requests *requests;
  
 +  if (out_requests == NULL)
 +/* Return rather than using goto done, since that would dereference
 +   out_requests.  */
 +return EINVAL;
 +

If at all, then `assert (out_requests);'.

 +error_t
 +pager_inhibit_workers (struct requests *requests)
 +{
 +  error_t err;
 +
 +  pthread_mutex_lock (requests-lock);
 +
 +  /* Check the workers are not already inhibited nor in the process of
 + being inhibited, and only create a new queue if necessary;
 + otherwise the queued requests would be discarded, and queue_out
 + would be leaked.  */
 +  if (requests-queue_out == requests-queue_in)

Likewise, `assert (requests-queue_out == requests-queue_in);'.
Calling inhibit twice is a breach of protocol.

You might need to serialize the diskfs_reload_global_state function,
but that should reduce the complexity

 +{
 +  /* Any new paging requests will go into a new queue.  */
 +  struct queue *new_queue = malloc (sizeof *new_queue);
 +  if (new_queue == NULL)
 +   {
 + err = ENOMEM;
 + goto done_locked;
 +   }
 +  queue_init (new_queue);
 +  requests-queue_in = new_queue;
 +}
 +
 +  /* Wait until all the workers are asleep, as then the old queue and
 + all individual worker queues have been drained.  */
 +  while (requests-asleep  WORKER_COUNT)
 +pthread_cond_wait (requests-inhibit_wakeup, requests-lock);
 +
 +  pthread_cond_broadcast (requests-resume_wakeup);

... here, trying to deal with pager_resume_workers being called before
pager_inhibit_workers is finished.  That's not a valid use case imho.

 +
 +done_locked:
 +  pthread_mutex_unlock (requests-lock);
 +  return err;
 +}
 +
 +void
 +pager_resume_workers (struct requests *requests)
 +{
 +  pthread_mutex_lock (requests-lock);
 +
 +  /* Check the workers are inhibited/being inhibited.  */
 +  if (requests-queue_out != requests-queue_in)

Assert that.

 +{
 +  /* If the inhibiting has not yet finished (the old queue has not
 +drained), wait for it to do so.  */
 +  while (requests-asleep  WORKER_COUNT)

Assert that.

 +   pthread_cond_wait (requests-resume_wakeup, requests-lock);
 +
 +  /* Another resume may have run in the meantime, in which case the
 +old queue has already been freed, so queue_out should not be
 +freed and updated to be queue_in.  */
 +  if (requests-queue_out != requests-queue_in)
 +   {
 + /* The queue has been drained and will no longer be used.  */
 + free (requests-queue_out);
 + requests-queue_out = requests-queue_in;
 + /* We need to wake up all workers, as there could be multiple
 +requests in the new queue.  */
 + pthread_cond_broadcast (requests-wakeup);
 +   }
 +}
 +
 +  pthread_mutex_unlock (requests-lock);
 +}

Many thanks :)
Justus



[PATCH] WIP: Fix ext2fs remount race condition

2015-07-21 Thread James Clarke
---
 ext2fs/ext2fs.c  |  18 +++
 ext2fs/ext2fs.h  |   6 +++
 ext2fs/pager.c   |  35 +-
 fatfs/fatfs.h|   2 +
 fatfs/pager.c|  35 +-
 libdiskfs/disk-pager.c   |   3 +-
 libdiskfs/diskfs-pager.h |   1 +
 libpager/demuxer.c   | 122 +++
 libpager/pager.h |  23 -
 9 files changed, 230 insertions(+), 15 deletions(-)

diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c
index d0fdfe7..200c210 100644
--- a/ext2fs/ext2fs.c
+++ b/ext2fs/ext2fs.c
@@ -207,10 +207,28 @@ main (int argc, char **argv)
 error_t
 diskfs_reload_global_state ()
 {
+  error_t err;
+
   pokel_flush (global_pokel);
   pager_flush (diskfs_disk_pager, 1);
+
+  /* Paging must specifically be inhibited; if not, a paging request
+ could be handled while sblock is still NULL.
+ While some RPCs are inhibited when this function is called by
+ libdiskfs, paging RPCs are still enabled. Even if we were to
+ inhibit paging RPCs, libpager has its own pool of workers to handle
+ requests asynchronously, which libports is unaware of, so requests
+ can be handled even after the relevant RPCs are disabled.  This is
+ all dealt with by {inhibit,resume}_disk_pager.  */
+  err = inhibit_disk_pager ();
+  if (err)
+return err;
+
   sblock = NULL;
   get_hypermetadata ();
   map_hypermetadata ();
+
+  resume_disk_pager ();
+
   return 0;
 }
diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h
index 96d8e9d..d5f400e 100644
--- a/ext2fs/ext2fs.h
+++ b/ext2fs/ext2fs.h
@@ -201,6 +201,12 @@ struct user_pager_info
 /* Set up the disk pager.  */
 void create_disk_pager (void);
 
+/* Inhibit the disk pager.  */
+error_t inhibit_disk_pager (void);
+
+/* Resume the disk pager.  */
+void resume_disk_pager (void);
+
 /* Call this when we should turn off caching so that unused memory object
ports get freed.  */
 void drop_pager_softrefs (struct node *node);
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index b56c923..f41107f 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -34,6 +34,10 @@ struct port_bucket *disk_pager_bucket;
 /* A ports bucket to hold file pager ports.  */
 struct port_bucket *file_pager_bucket;
 
+/* Stores a reference to the requests instance used by the file pager so its
+   worker threads can be inhibited and resumed.  */
+struct requests *file_pager_requests;
+
 pthread_spinlock_t node_to_page_lock = PTHREAD_SPINLOCK_INITIALIZER;
 
 
@@ -1217,11 +1221,40 @@ create_disk_pager (void)
   file_pager_bucket = ports_create_bucket ();
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (file_pager_bucket);
+  err = pager_start_workers (file_pager_requests, file_pager_bucket);
   if (err)
 ext2_panic (can't create libpager worker threads: %s, strerror (err));
 }
 
+error_t
+inhibit_disk_pager (void)
+{
+  error_t err;
+
+  /* Inhibiting RPCs is not sufficient, nor is it in fact necessary.
+ Since libpager has its own pool of workers, requests can still be
+ handled after RPCs have been inhibited, so pager_inhibit_workers
+ must be used. In fact, RPCs will not be inhibited; the requests
+ will just queue up inside libpager, and will be handled once the
+ workers are resumed.
+ The file pager can rely on the disk pager, so inhibit the file
+ pager first.  */
+
+  err = pager_inhibit_workers (file_pager_requests);
+  if (err)
+return err;
+
+  err = pager_inhibit_workers (diskfs_disk_pager_requests);
+  return err;
+}
+
+void
+resume_disk_pager (void)
+{
+  pager_resume_workers (diskfs_disk_pager_requests);
+  pager_resume_workers (file_pager_requests);
+}
+
 /* Call this to create a FILE_DATA pager and return a send right.
NODE must be locked.  */
 mach_port_t
diff --git a/fatfs/fatfs.h b/fatfs/fatfs.h
index 3c3d836..54c3426 100644
--- a/fatfs/fatfs.h
+++ b/fatfs/fatfs.h
@@ -121,6 +121,8 @@ extern struct dirrect dr_root_node;
 void drop_pager_softrefs (struct node *);
 void allow_pager_softrefs (struct node *);
 void create_fat_pager (void);
+error_t inhibit_fat_pager (void);
+void resume_fat_pager (void);
 
 void flush_node_pager (struct node *node);
 
diff --git a/fatfs/pager.c b/fatfs/pager.c
index 10d1fc9..3d860d1 100644
--- a/fatfs/pager.c
+++ b/fatfs/pager.c
@@ -29,6 +29,10 @@ struct port_bucket *disk_pager_bucket;
 /* A ports bucket to hold file pager ports.  */
 struct port_bucket *file_pager_bucket;
 
+/* Stores a reference to the requests instance used by the file pager so its
+   worker threads can be inhibited and resumed.  */
+struct requests *file_pager_requests;
+
 /* Mapped image of the FAT.  */
 void *fat_image;
 
@@ -776,11 +780,40 @@ create_fat_pager (void)
   file_pager_bucket = ports_create_bucket ();
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (file_pager_bucket);
+  err = pager_start_workers (file_pager_requests, file_pager_bucket);
   if (err)
 error (2, err, can't create libpager