[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 026/104] virtiofsd: Fast path for virtio read
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [PATCH 026/104] virtiofsd: Fast path for virtio read |
Date: |
Mon, 20 Jan 2020 12:32:04 +0000 |
User-agent: |
Mutt/1.13.0 (2019-11-30) |
* Masayoshi Mizuma (address@hidden) wrote:
> On Thu, Dec 12, 2019 at 04:37:46PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <address@hidden>
> >
> > Readv the data straight into the guests buffer.
> >
> > Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> > With fix by:
> > Signed-off-by: Eryu Guan <address@hidden>
> > ---
> > tools/virtiofsd/fuse_lowlevel.c | 5 +
> > tools/virtiofsd/fuse_virtio.c | 159 ++++++++++++++++++++++++++++++++
> > tools/virtiofsd/fuse_virtio.h | 4 +
> > 3 files changed, 168 insertions(+)
> >
> > diff --git a/tools/virtiofsd/fuse_lowlevel.c
> > b/tools/virtiofsd/fuse_lowlevel.c
> > index c2b114cf5b..5f80625652 100644
> > --- a/tools/virtiofsd/fuse_lowlevel.c
> > +++ b/tools/virtiofsd/fuse_lowlevel.c
> > @@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct
> > fuse_session *se,
> > return fuse_send_msg(se, ch, iov, iov_count);
> > }
> >
> > + if (fuse_lowlevel_is_virtio(se) && buf->count == 1 &&
> > + buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) {
> > + return virtio_send_data_iov(se, ch, iov, iov_count, buf, len);
> > + }
> > +
> > abort(); /* Will have taken vhost path */
> > return 0;
> > }
> > diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
> > index c33e0f7e8c..146cd3f702 100644
> > --- a/tools/virtiofsd/fuse_virtio.c
> > +++ b/tools/virtiofsd/fuse_virtio.c
> > @@ -230,6 +230,165 @@ err:
> > return ret;
> > }
> >
> > +/*
> > + * Callback from fuse_send_data_iov_* when it's virtio and the buffer
> > + * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
> > + * We need send the iov and then the buffer.
> > + * Return 0 on success
> > + */
> > +int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
> > + struct iovec *iov, int count, struct fuse_bufvec
> > *buf,
> > + size_t len)
> > +{
> > + int ret = 0;
> > + VuVirtqElement *elem;
> > + VuVirtq *q;
> > +
> > + assert(count >= 1);
> > + assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
> > +
> > + struct fuse_out_header *out = iov[0].iov_base;
> > + /* TODO: Endianness! */
> > +
> > + size_t iov_len = iov_size(iov, count);
> > + size_t tosend_len = iov_len + len;
> > +
> > + out->len = tosend_len;
> > +
> > + fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n",
> > __func__,
> > + count, len, iov_len);
> > +
> > + /* unique == 0 is notification which we don't support */
> > + assert(out->unique);
> > +
> > + /* For virtio we always have ch */
> > + assert(ch);
> > + assert(!ch->qi->reply_sent);
> > + elem = ch->qi->qe;
> > + q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx];
> > +
> > + /* The 'in' part of the elem is to qemu */
> > + unsigned int in_num = elem->in_num;
> > + struct iovec *in_sg = elem->in_sg;
> > + size_t in_len = iov_size(in_sg, in_num);
> > + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length
> > %zd\n",
> > + __func__, elem->index, in_num, in_len);
> > +
> > + /*
> > + * The elem should have room for a 'fuse_out_header' (out from fuse)
> > + * plus the data based on the len in the header.
> > + */
> > + if (in_len < sizeof(struct fuse_out_header)) {
> > + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
> > + __func__, elem->index);
>
> > + ret = -E2BIG;
>
> The ret should be positive value, right?
>
> ret = E2BIG;
Yes, I think so.
> > + goto err;
> > + }
> > + if (in_len < tosend_len) {
> > + fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
> > + __func__, elem->index, tosend_len);
>
> > + ret = -E2BIG;
>
> ret = E2BIG;
>
> > + goto err;
> > + }
> > +
> > + /* TODO: Limit to 'len' */
> > +
> > + /* First copy the header data from iov->in_sg */
> > + copy_iov(iov, count, in_sg, in_num, iov_len);
> > +
> > + /*
> > + * Build a copy of the the in_sg iov so we can skip bits in it,
> > + * including changing the offsets
> > + */
>
> > + struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
>
> assert(in_sg_cpy) should be here? in case calloc() fails...
Thanks, added.
> > + memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
> > + /* These get updated as we skip */
> > + struct iovec *in_sg_ptr = in_sg_cpy;
> > + int in_sg_cpy_count = in_num;
> > +
> > + /* skip over parts of in_sg that contained the header iov */
> > + size_t skip_size = iov_len;
> > +
> > + size_t in_sg_left = 0;
> > + do {
> > + while (skip_size != 0 && in_sg_cpy_count) {
> > + if (skip_size >= in_sg_ptr[0].iov_len) {
> > + skip_size -= in_sg_ptr[0].iov_len;
> > + in_sg_ptr++;
> > + in_sg_cpy_count--;
> > + } else {
> > + in_sg_ptr[0].iov_len -= skip_size;
> > + in_sg_ptr[0].iov_base += skip_size;
> > + break;
> > + }
> > + }
> > +
> > + int i;
> > + for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
> > + in_sg_left += in_sg_ptr[i].iov_len;
> > + }
> > + fuse_log(FUSE_LOG_DEBUG,
> > + "%s: after skip skip_size=%zd in_sg_cpy_count=%d "
> > + "in_sg_left=%zd\n",
> > + __func__, skip_size, in_sg_cpy_count, in_sg_left);
> > + ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
> > + buf->buf[0].pos);
> > +
>
> > + fuse_log(FUSE_LOG_DEBUG, "%s: preadv_res=%d(%m) len=%zd\n",
> > + __func__, ret, len);
>
> "%m" should be removed? because it may show the previous errno even if
> preadv()
> is succsess. Like as:
>
> [ID: 00000079] virtio_send_data_iov: after skip skip_size=0 in_sg_cpy_count=1
> in_sg_left=65536
> [ID: 00000079] virtio_send_data_iov: preadv_res=16000(No such file or
> directory) len=65536
I think there's another problem; that fuse_log might corrupt errno, so
we return a bad errno below it.
So I'll split it into two separate fuse_log's - one inside the (ret ==
-1_ block with the %m and one after without it.
> Otherwise, looks good to me:
>
> Reviewed-by: Masayoshi Mizuma <address@hidden>
Thanks
>
> Thanks,
> Masa
>
> > + if (ret == -1) {
> > + ret = errno;
> > + free(in_sg_cpy);
> > + goto err;
> > + }
> > + if (ret < len && ret) {
> > + fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
> > + /* Skip over this much next time around */
> > + skip_size = ret;
> > + buf->buf[0].pos += ret;
> > + len -= ret;
> > +
> > + /* Lets do another read */
> > + continue;
> > + }
> > + if (!ret) {
> > + /* EOF case? */
> > + fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
> > + in_sg_left);
> > + break;
> > + }
> > + if (ret != len) {
> > + fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
> > + ret = EIO;
> > + free(in_sg_cpy);
> > + goto err;
> > + }
> > + in_sg_left -= ret;
> > + len -= ret;
> > + } while (in_sg_left);
> > + free(in_sg_cpy);
> > +
> > + /* Need to fix out->len on EOF */
> > + if (len) {
> > + struct fuse_out_header *out_sg = in_sg[0].iov_base;
> > +
> > + tosend_len -= len;
> > + out_sg->len = tosend_len;
> > + }
> > +
> > + ret = 0;
> > +
> > + vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len);
> > + vu_queue_notify(&se->virtio_dev->dev, q);
> > +
> > +err:
> > + if (ret == 0) {
> > + ch->qi->reply_sent = true;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > /* Thread function for individual queues, created when a queue is
> > 'started' */
> > static void *fv_queue_thread(void *opaque)
> > {
> > diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
> > index 135a14875a..cc676b9193 100644
> > --- a/tools/virtiofsd/fuse_virtio.h
> > +++ b/tools/virtiofsd/fuse_virtio.h
> > @@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se);
> > int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
> > struct iovec *iov, int count);
> >
> > +int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
> > + struct iovec *iov, int count,
> > + struct fuse_bufvec *buf, size_t len);
> > +
> > #endif
> > --
> > 2.23.0
> >
> >
>
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK