[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functio
From: |
Elena Ufimtseva |
Subject: |
[PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions |
Date: |
Sun, 18 Jul 2021 23:27:42 -0700 |
From: John G Johnson <john.g.johnson@oracle.com>
Add user.c and user.h files for vfio-user with the basic
send and receive functions.
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
hw/vfio/user.h | 120 ++++++++++++++
include/hw/vfio/vfio-common.h | 2 +
hw/vfio/user.c | 286 ++++++++++++++++++++++++++++++++++
MAINTAINERS | 4 +
hw/vfio/meson.build | 1 +
5 files changed, 413 insertions(+)
create mode 100644 hw/vfio/user.h
create mode 100644 hw/vfio/user.c
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
new file mode 100644
index 0000000000..cdbc074579
--- /dev/null
+++ b/hw/vfio/user.h
@@ -0,0 +1,120 @@
+#ifndef VFIO_USER_H
+#define VFIO_USER_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specfic data, such as the
+ * region and offset info for read and write commands.
+ */
+
+/* commands */
+enum vfio_user_command {
+ VFIO_USER_VERSION = 1,
+ VFIO_USER_DMA_MAP = 2,
+ VFIO_USER_DMA_UNMAP = 3,
+ VFIO_USER_DEVICE_GET_INFO = 4,
+ VFIO_USER_DEVICE_GET_REGION_INFO = 5,
+ VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6,
+ VFIO_USER_DEVICE_GET_IRQ_INFO = 7,
+ VFIO_USER_DEVICE_SET_IRQS = 8,
+ VFIO_USER_REGION_READ = 9,
+ VFIO_USER_REGION_WRITE = 10,
+ VFIO_USER_DMA_READ = 11,
+ VFIO_USER_DMA_WRITE = 12,
+ VFIO_USER_DEVICE_RESET = 13,
+ VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_MAX,
+};
+
+/* flags */
+#define VFIO_USER_REQUEST 0x0
+#define VFIO_USER_REPLY 0x1
+#define VFIO_USER_TYPE 0xF
+
+#define VFIO_USER_NO_REPLY 0x10
+#define VFIO_USER_ERROR 0x20
+
+typedef struct vfio_user_hdr {
+ uint16_t id;
+ uint16_t command;
+ uint32_t size;
+ uint32_t flags;
+ uint32_t error_reply;
+} vfio_user_hdr_t;
+
+/*
+ * VFIO_USER_VERSION
+ */
+#define VFIO_USER_MAJOR_VER 0
+#define VFIO_USER_MINOR_VER 0
+
+struct vfio_user_version {
+ vfio_user_hdr_t hdr;
+ uint16_t major;
+ uint16_t minor;
+ char capabilities[];
+};
+
+#define VFIO_USER_DEF_MAX_FDS 8
+#define VFIO_USER_MAX_MAX_FDS 16
+
+#define VFIO_USER_DEF_MAX_XFER (1024 * 1024)
+#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024)
+
+typedef struct VFIOUserFDs {
+ int send_fds;
+ int recv_fds;
+ int *fds;
+} VFIOUserFDs;
+
+typedef struct VFIOUserReply {
+ QTAILQ_ENTRY(VFIOUserReply) next;
+ vfio_user_hdr_t *msg;
+ VFIOUserFDs *fds;
+ int rsize;
+ uint32_t id;
+ QemuCond cv;
+ uint8_t complete;
+} VFIOUserReply;
+
+enum proxy_state {
+ CONNECTED = 1,
+ RECV_ERROR = 2,
+ CLOSING = 3,
+ CLOSED = 4,
+};
+
+typedef struct VFIOProxy {
+ QLIST_ENTRY(VFIOProxy) next;
+ char *sockname;
+ struct QIOChannel *ioc;
+ int (*request)(void *opaque, char *buf, VFIOUserFDs *fds);
+ void *reqarg;
+ int flags;
+ QemuCond close_cv;
+
+ /*
+ * above only changed when iolock is held
+ * below are protected by per-proxy lock
+ */
+ QemuMutex lock;
+ QTAILQ_HEAD(, VFIOUserReply) free;
+ QTAILQ_HEAD(, VFIOUserReply) pending;
+ enum proxy_state state;
+ int close_wait;
+} VFIOProxy;
+
+#define VFIO_PROXY_CLIENT 0x1
+
+void vfio_user_recv(void *opaque);
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
+#endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 8af11b0a76..f43dc6e5d0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
} VFIOAddressSpace;
struct VFIOGroup;
+typedef struct VFIOProxy VFIOProxy;
typedef struct VFIOContainer {
VFIOAddressSpace *space;
@@ -143,6 +144,7 @@ typedef struct VFIODevice {
VFIOMigration *migration;
Error *migration_blocker;
OnOffAuto pre_copy_dirty_page_tracking;
+ VFIOProxy *proxy;
} VFIODevice;
struct VFIODeviceOps {
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
new file mode 100644
index 0000000000..021d5540e0
--- /dev/null
+++ b/hw/vfio/user.c
@@ -0,0 +1,286 @@
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/main-loop.h"
+#include "hw/hw.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/vfio.h"
+#include "qemu/sockets.h"
+#include "io/channel.h"
+#include "io/channel-util.h"
+#include "sysemu/iothread.h"
+#include "user.h"
+
+static uint64_t max_xfer_size = VFIO_USER_DEF_MAX_XFER;
+static IOThread *vfio_user_iothread;
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds);
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds);
+static void vfio_user_shutdown(VFIOProxy *proxy);
+
+static void vfio_user_shutdown(VFIOProxy *proxy)
+{
+ qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
+ qio_channel_set_aio_fd_handler(proxy->ioc,
+
iothread_get_aio_context(vfio_user_iothread),
+ NULL, NULL, NULL);
+}
+
+void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret)
+{
+ vfio_user_hdr_t *hdr = (vfio_user_hdr_t *)buf;
+
+ /*
+ * convert header to associated reply
+ * positive ret is reply size, negative is error code
+ */
+ hdr->flags = VFIO_USER_REPLY;
+ if (ret > 0) {
+ hdr->size = ret;
+ } else if (ret < 0) {
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = -ret;
+ hdr->size = sizeof(*hdr);
+ }
+ vfio_user_send(proxy, hdr, NULL);
+}
+
+void vfio_user_recv(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOProxy *proxy = vbasedev->proxy;
+ VFIOUserReply *reply = NULL;
+ g_autofree int *fdp = NULL;
+ VFIOUserFDs reqfds = { 0, 0, fdp };
+ vfio_user_hdr_t msg;
+ struct iovec iov = {
+ .iov_base = &msg,
+ .iov_len = sizeof(msg),
+ };
+ int isreply, i, ret;
+ size_t msgleft, numfds = 0;
+ char *data = NULL;
+ g_autofree char *buf = NULL;
+ Error *local_err = NULL;
+
+ qemu_mutex_lock(&proxy->lock);
+ if (proxy->state == CLOSING) {
+ qemu_mutex_unlock(&proxy->lock);
+ return;
+ }
+
+ ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds,
+ &local_err);
+ if (ret <= 0) {
+ /* read error or other side closed connection */
+ error_setg_errno(&local_err, errno, "vfio_user_recv read error");
+ goto fatal;
+ }
+
+ if (ret < sizeof(msg)) {
+ error_setg(&local_err, "vfio_user_recv short read of header");
+ goto err;
+ }
+
+ /*
+ * For replies, find the matching pending request
+ */
+ switch (msg.flags & VFIO_USER_TYPE) {
+ case VFIO_USER_REQUEST:
+ isreply = 0;
+ break;
+ case VFIO_USER_REPLY:
+ isreply = 1;
+ break;
+ default:
+ error_setg(&local_err, "vfio_user_recv unknown message type");
+ goto err;
+ }
+
+ if (isreply) {
+ QTAILQ_FOREACH(reply, &proxy->pending, next) {
+ if (msg.id == reply->id) {
+ break;
+ }
+ }
+ if (reply == NULL) {
+ error_setg(&local_err, "vfio_user_recv unexpected reply");
+ goto err;
+ }
+ QTAILQ_REMOVE(&proxy->pending, reply, next);
+
+ /*
+ * Process any received FDs
+ */
+ if (numfds != 0) {
+ if (reply->fds == NULL || reply->fds->recv_fds < numfds) {
+ error_setg(&local_err, "vfio_user_recv unexpected FDs");
+ goto err;
+ }
+ reply->fds->recv_fds = numfds;
+ memcpy(reply->fds->fds, fdp, numfds * sizeof(int));
+ }
+
+ } else {
+ /*
+ * The client doesn't expect any FDs in requests, but
+ * they will be expected on the server
+ */
+ if (numfds != 0 && (proxy->flags & VFIO_PROXY_CLIENT)) {
+ error_setg(&local_err, "vfio_user_recv fd in client reply");
+ goto err;
+ }
+ reqfds.recv_fds = numfds;
+ }
+
+ /*
+ * put the whole message into a single buffer
+ */
+ msgleft = msg.size - sizeof(msg);
+ if (isreply) {
+ if (msg.size > reply->rsize) {
+ error_setg(&local_err,
+ "vfio_user_recv reply larger than recv buffer");
+ goto fatal;
+ }
+ *reply->msg = msg;
+ data = (char *)reply->msg + sizeof(msg);
+ } else {
+ if (msg.size > max_xfer_size) {
+ error_setg(&local_err, "vfio_user_recv request larger than max");
+ goto fatal;
+ }
+ buf = g_malloc0(msg.size);
+ memcpy(buf, &msg, sizeof(msg));
+ data = buf + sizeof(msg);
+ }
+
+ if (msgleft != 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err);
+ if (ret < 0) {
+ goto fatal;
+ }
+ if (ret != msgleft) {
+ error_setg(&local_err, "vfio_user_recv short read of msg body");
+ goto err;
+ }
+ }
+
+ /*
+ * Replies signal a waiter, requests get processed by vfio code
+ * that may assume the iothread lock is held.
+ */
+ qemu_mutex_unlock(&proxy->lock);
+ if (isreply) {
+ reply->complete = 1;
+ qemu_cond_signal(&reply->cv);
+ } else {
+ qemu_mutex_lock_iothread();
+ /*
+ * make sure proxy wasn't closed while we waited
+ * checking without holding the proxy lock is safe
+ * since state is only set to CLOSING when iolock is held
+ */
+ if (proxy->state != CLOSING) {
+ ret = proxy->request(proxy->reqarg, buf, &reqfds);
+ if (ret < 0 && !(msg.flags & VFIO_USER_NO_REPLY)) {
+ vfio_user_send_reply(proxy, buf, ret);
+ }
+ }
+ qemu_mutex_unlock_iothread();
+ }
+
+ return;
+ fatal:
+ vfio_user_shutdown(proxy);
+ proxy->state = RECV_ERROR;
+
+ err:
+ qemu_mutex_unlock(&proxy->lock);
+ for (i = 0; i < numfds; i++) {
+ close(fdp[i]);
+ }
+ if (reply != NULL) {
+ /* force an error to keep sending thread from hanging */
+ reply->msg->flags |= VFIO_USER_ERROR;
+ reply->msg->error_reply = EINVAL;
+ reply->complete = 1;
+ qemu_cond_signal(&reply->cv);
+ }
+ error_report_err(local_err);
+}
+
+static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds)
+{
+ struct iovec iov = {
+ .iov_base = msg,
+ .iov_len = msg->size,
+ };
+ size_t numfds = 0;
+ int msgleft, ret, *fdp = NULL;
+ char *buf;
+ Error *local_err = NULL;
+
+ if (proxy->state != CONNECTED) {
+ msg->flags |= VFIO_USER_ERROR;
+ msg->error_reply = ECONNRESET;
+ return;
+ }
+
+ if (fds != NULL && fds->send_fds != 0) {
+ numfds = fds->send_fds;
+ fdp = fds->fds;
+ }
+ ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds,
&local_err);
+ if (ret < 0) {
+ goto err;
+ }
+ if (ret == msg->size) {
+ return;
+ }
+
+ buf = iov.iov_base + ret;
+ msgleft = iov.iov_len - ret;
+ do {
+ ret = qio_channel_write(proxy->ioc, buf, msgleft, &local_err);
+ if (ret < 0) {
+ goto err;
+ }
+ buf += ret, msgleft -= ret;
+ } while (msgleft != 0);
+ return;
+
+ err:
+ error_report_err(local_err);
+}
+
+static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg,
+ VFIOUserFDs *fds)
+{
+ bool iolock = qemu_mutex_iothread_locked();
+
+ if (iolock) {
+ qemu_mutex_unlock_iothread();
+ }
+ qemu_mutex_lock(&proxy->lock);
+ vfio_user_send_locked(proxy, msg, fds);
+ qemu_mutex_unlock(&proxy->lock);
+ if (iolock) {
+ qemu_mutex_lock_iothread();
+ }
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index 12d69f3a45..aa4df6c418 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1883,8 +1883,12 @@ L: qemu-s390x@nongnu.org
vfio-user
M: John G Johnson <john.g.johnson@oracle.com>
M: Thanos Makatos <thanos.makatos@nutanix.com>
+M: Elena Ufimtseva <elena.ufimtseva@oracle.com>
+M: Jagannathan Raman <jag.raman@oracle.com>
S: Supported
F: docs/devel/vfio-user.rst
+F: hw/vfio/user.c
+F: hw/vfio/user.h
vhost
M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index da9af297a0..739b30be73 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -8,6 +8,7 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
'pci-quirks.c',
'pci.c',
+ 'user.c',
))
vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
--
2.25.1
- [PATCH RFC 00/19] vfio-user implementation, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 07/19] vfio-user: define vfio-user pci ops, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 04/19] vfio-user: Define type vfio_user_pci_dev_info, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions,
Elena Ufimtseva <=
- [PATCH RFC 08/19] vfio-user: VFIO container setup & teardown, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 02/19] vfio-user: add VFIO base abstract class, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 10/19] vfio-user: device region read/write, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 01/19] vfio-user: introduce vfio-user protocol specification, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 11/19] vfio-user: get region and DMA map/unmap operations, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 14/19] vfio_user: setup MSI/X interrupts and PCI config operations, Elena Ufimtseva, 2021/07/19
- [PATCH RFC 06/19] vfio-user: negotiate protocol with remote server, Elena Ufimtseva, 2021/07/19