qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 085/104] virtiofsd: Support remote posix locks


From: Masayoshi Mizuma
Subject: Re: [PATCH 085/104] virtiofsd: Support remote posix locks
Date: Wed, 15 Jan 2020 18:38:31 -0500

On Thu, Dec 12, 2019 at 04:38:45PM +0000, Dr. David Alan Gilbert (git) wrote:
> From: Vivek Goyal <address@hidden>
> 
> Doing posix locks with-in guest kernel are not sufficient if a file/dir
> is being shared by multiple guests. So we need the notion of daemon doing
> the locks which are visible to rest of the guests.
> 
> Given posix locks are per process, one can not call posix lock API on host,
> otherwise bunch of basic posix locks properties are broken. For example,
> If two processes (A and B) in guest open the file and take locks on different
> sections of file, if one of the processes closes the fd, it will close
> fd on virtiofsd and all posix locks on file will go away. This means if
> process A closes the fd, then locks of process B will go away too.
> 
> Similar other problems exist too.
> 
> This patch set tries to emulate posix locks while using open file
> description locks provided on Linux.
> 
> Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable
> or disable posix locking in daemon. By default it is enabled.
> 
> There are few issues though.
> 
> - GETLK() returns pid of process holding lock. As we are emulating locks
>   using OFD, and these locks are not per process and don't return pid
>   of process, so GETLK() in guest does not reuturn process pid.
> 
> - As of now only F_SETLK is supported and not F_SETLKW. We can't block
>   the thread in virtiofsd for arbitrary long duration as there is only
>   one thread serving the queue. That means unlock request will not make
>   it to daemon and F_SETLKW will block infinitely and bring virtio-fs
>   to a halt. This is a solvable problem though and will require significant
>   changes in virtiofsd and kernel. Left as a TODO item for now.
> 
> Signed-off-by: Vivek Goyal <address@hidden>
> ---
>  tools/virtiofsd/passthrough_ll.c | 190 +++++++++++++++++++++++++++++++
>  1 file changed, 190 insertions(+)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c 
> b/tools/virtiofsd/passthrough_ll.c
> index fbcc222860..fc79d5ac43 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -68,6 +68,13 @@
>  #include "seccomp.h"
>  
>  #define HAVE_POSIX_FALLOCATE 1
> +
> +/* Keep track of inode posix locks for each owner. */
> +struct lo_inode_plock {
> +    uint64_t lock_owner;
> +    int fd; /* fd for OFD locks */
> +};
> +
>  struct lo_map_elem {
>      union {
>          struct lo_inode *inode;
> @@ -96,6 +103,8 @@ struct lo_inode {
>      struct lo_key key;
>      uint64_t refcount; /* protected by lo->mutex */
>      fuse_ino_t fuse_ino;
> +    pthread_mutex_t plock_mutex;
> +    GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
>  };
>  
>  struct lo_cred {
> @@ -115,6 +124,7 @@ struct lo_data {
>      int norace;
>      int writeback;
>      int flock;
> +    int posix_lock;
>      int xattr;
>      const char *source;
>      double timeout;
> @@ -138,6 +148,8 @@ static const struct fuse_opt lo_opts[] = {
>      { "source=%s", offsetof(struct lo_data, source), 0 },
>      { "flock", offsetof(struct lo_data, flock), 1 },
>      { "no_flock", offsetof(struct lo_data, flock), 0 },
> +    { "posix_lock", offsetof(struct lo_data, posix_lock), 1 },
> +    { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 },
>      { "xattr", offsetof(struct lo_data, xattr), 1 },
>      { "no_xattr", offsetof(struct lo_data, xattr), 0 },
>      { "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
> @@ -486,6 +498,17 @@ static void lo_init(void *userdata, struct 
> fuse_conn_info *conn)
>          fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
>          conn->want |= FUSE_CAP_FLOCK_LOCKS;
>      }
> +
> +    if (conn->capable & FUSE_CAP_POSIX_LOCKS) {
> +        if (lo->posix_lock) {
> +            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n");
> +            conn->want |= FUSE_CAP_POSIX_LOCKS;
> +        } else {
> +            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n");
> +            conn->want &= ~FUSE_CAP_POSIX_LOCKS;
> +        }
> +    }
> +
>      if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
>          lo->readdirplus_clear) {
>          fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
> @@ -773,6 +796,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, 
> struct stat *st)
>      return p;
>  }
>  
> +/* value_destroy_func for posix_locks GHashTable */
> +static void posix_locks_value_destroy(gpointer data)
> +{
> +    struct lo_inode_plock *plock = data;
> +
> +    /*
> +     * We had used open() for locks and had only one fd. So
> +     * closing this fd should release all OFD locks.
> +     */
> +    close(plock->fd);
> +    free(plock);
> +}
> +
>  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
>                          struct fuse_entry_param *e)
>  {
> @@ -826,6 +862,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t 
> parent, const char *name,
>          newfd = -1;
>          inode->key.ino = e->attr.st_ino;
>          inode->key.dev = e->attr.st_dev;
> +        pthread_mutex_init(&inode->plock_mutex, NULL);
> +        inode->posix_locks = g_hash_table_new_full(
> +            g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
>  
>          pthread_mutex_lock(&lo->mutex);
>          inode->fuse_ino = lo_add_inode_mapping(req, inode);
> @@ -1192,6 +1231,11 @@ static void unref_inode_lolocked(struct lo_data *lo, 
> struct lo_inode *inode,
>      if (!inode->refcount) {
>          lo_map_remove(&lo->ino_map, inode->fuse_ino);
>          g_hash_table_remove(lo->inodes, &inode->key);
> +        if (g_hash_table_size(inode->posix_locks)) {
> +            fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
> +        }
> +        g_hash_table_destroy(inode->posix_locks);
> +        pthread_mutex_destroy(&inode->plock_mutex);
>          pthread_mutex_unlock(&lo->mutex);
>          close(inode->fd);
>          free(inode);
> @@ -1548,6 +1592,136 @@ out:
>      }
>  }
>  
> +/* Should be called with inode->plock_mutex held */
> +static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
> +                                                      struct lo_inode *inode,
> +                                                      uint64_t lock_owner,
> +                                                      pid_t pid, int *err)
> +{
> +    struct lo_inode_plock *plock;
> +    char procname[64];
> +    int fd;
> +
> +    plock =
> +        g_hash_table_lookup(inode->posix_locks, 
> GUINT_TO_POINTER(lock_owner));
> +
> +    if (plock) {
> +        return plock;
> +    }
> +
> +    plock = malloc(sizeof(struct lo_inode_plock));
> +    if (!plock) {
> +        *err = ENOMEM;
> +        return NULL;
> +    }
> +
> +    /* Open another instance of file which can be used for ofd locks. */
> +    sprintf(procname, "%i", inode->fd);
> +
> +    /* TODO: What if file is not writable? */
> +    fd = openat(lo->proc_self_fd, procname, O_RDWR);
> +    if (fd == -1) {

> +        *err = -errno;

I think the errno is positive value, so the minus isn't needed?

           *err = errno;

Otherwise looks good to me.

Reviewed-by: Masayoshi Mizuma <address@hidden>

Thanks,
Masa

> +        free(plock);
> +        return NULL;
> +    }
> +
> +    plock->lock_owner = lock_owner;
> +    plock->fd = fd;
> +    g_hash_table_insert(inode->posix_locks, 
> GUINT_TO_POINTER(plock->lock_owner),
> +                        plock);
> +    return plock;
> +}
> +
> +static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info 
> *fi,
> +                     struct flock *lock)
> +{
> +    struct lo_data *lo = lo_data(req);
> +    struct lo_inode *inode;
> +    struct lo_inode_plock *plock;
> +    int ret, saverr = 0;
> +
> +    fuse_log(FUSE_LOG_DEBUG,
> +             "lo_getlk(ino=%" PRIu64 ", flags=%d)"
> +             " owner=0x%lx, l_type=%d l_start=0x%lx"
> +             " l_len=0x%lx\n",
> +             ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start,
> +             lock->l_len);
> +
> +    inode = lo_inode(req, ino);
> +    if (!inode) {
> +        fuse_reply_err(req, EBADF);
> +        return;
> +    }
> +
> +    pthread_mutex_lock(&inode->plock_mutex);
> +    plock =
> +        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, 
> &ret);
> +    if (!plock) {
> +        pthread_mutex_unlock(&inode->plock_mutex);
> +        fuse_reply_err(req, ret);
> +        return;
> +    }
> +
> +    ret = fcntl(plock->fd, F_OFD_GETLK, lock);
> +    if (ret == -1) {
> +        saverr = errno;
> +    }
> +    pthread_mutex_unlock(&inode->plock_mutex);
> +
> +    if (saverr) {
> +        fuse_reply_err(req, saverr);
> +    } else {
> +        fuse_reply_lock(req, lock);
> +    }
> +}
> +
> +static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info 
> *fi,
> +                     struct flock *lock, int sleep)
> +{
> +    struct lo_data *lo = lo_data(req);
> +    struct lo_inode *inode;
> +    struct lo_inode_plock *plock;
> +    int ret, saverr = 0;
> +
> +    fuse_log(FUSE_LOG_DEBUG,
> +             "lo_setlk(ino=%" PRIu64 ", flags=%d)"
> +             " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d"
> +             " l_start=0x%lx l_len=0x%lx\n",
> +             ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, 
> sleep,
> +             lock->l_whence, lock->l_start, lock->l_len);
> +
> +    if (sleep) {
> +        fuse_reply_err(req, EOPNOTSUPP);
> +        return;
> +    }
> +
> +    inode = lo_inode(req, ino);
> +    if (!inode) {
> +        fuse_reply_err(req, EBADF);
> +        return;
> +    }
> +
> +    pthread_mutex_lock(&inode->plock_mutex);
> +    plock =
> +        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, 
> &ret);
> +
> +    if (!plock) {
> +        pthread_mutex_unlock(&inode->plock_mutex);
> +        fuse_reply_err(req, ret);
> +        return;
> +    }
> +
> +    /* TODO: Is it alright to modify flock? */
> +    lock->l_pid = 0;
> +    ret = fcntl(plock->fd, F_OFD_SETLK, lock);
> +    if (ret == -1) {
> +        saverr = errno;
> +    }
> +    pthread_mutex_unlock(&inode->plock_mutex);
> +    fuse_reply_err(req, saverr);
> +}
> +
>  static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
>                          struct fuse_file_info *fi)
>  {
> @@ -1649,6 +1823,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, 
> struct fuse_file_info *fi)
>  {
>      int res;
>      (void)ino;
> +    struct lo_inode *inode;
> +
> +    inode = lo_inode(req, ino);
> +    if (!inode) {
> +        fuse_reply_err(req, EBADF);
> +        return;
> +    }
> +
> +    /* An fd is going away. Cleanup associated posix locks */
> +    pthread_mutex_lock(&inode->plock_mutex);
> +    g_hash_table_remove(inode->posix_locks, 
> GUINT_TO_POINTER(fi->lock_owner));
> +    pthread_mutex_unlock(&inode->plock_mutex);
> +
>      res = close(dup(lo_fi_fd(req, fi)));
>      fuse_reply_err(req, res == -1 ? errno : 0);
>  }
> @@ -2111,6 +2298,8 @@ static struct fuse_lowlevel_ops lo_oper = {
>      .releasedir = lo_releasedir,
>      .fsyncdir = lo_fsyncdir,
>      .create = lo_create,
> +    .getlk = lo_getlk,
> +    .setlk = lo_setlk,
>      .open = lo_open,
>      .release = lo_release,
>      .flush = lo_flush,
> @@ -2466,6 +2655,7 @@ int main(int argc, char *argv[])
>      struct lo_data lo = {
>          .debug = 0,
>          .writeback = 0,
> +        .posix_lock = 1,
>          .proc_self_fd = -1,
>      };
>      struct lo_map_elem *root_elem;
> -- 
> 2.23.0
> 
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]