[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC v3 4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueEleme
From: |
Eugenio Perez Martin |
Subject: |
Re: [RFC v3 4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueElement members |
Date: |
Thu, 16 Jan 2025 20:02:21 +0100 |
On Fri, Jan 10, 2025 at 6:09 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
>
> Adds the in_xlat_addr & out_xlat_addr hwaddr arrays to the
> VirtQueueElement struct and introduces an optional GPA output parameter
> to dma_memory_map().
>
> These arrays will store a VirtQueueElement's input/output descriptors'
> GPA of the mapped memory region, if it's backed by guest memory, via
> dma_memory_map().
>
> The GPA will always correspond 1:1 to the iovec entry when translating
> addresses between Qemu VAs and SVQ IOVAs in vhost_svq_translate_addr().
> This helps to avoid extra complicated code in SVQ's
> vhost_svq_vring_write_descs() function (e.g. splitting up iovec into
> multiple buffers, not breaking devices using aliased mapping, etc.).
>
> Since the translation is only done once inside the DMA API alongside
> virtqueue_pop(), the cost should be minimal.
>
I think this is a very strong change as it touches the dma subsystem.
Let me try to avoid it on 5/5 :).
> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
> ---
> hw/display/virtio-gpu.c | 5 ++--
> hw/hyperv/vmbus.c | 8 +++---
> hw/ide/ahci.c | 7 +++---
> hw/usb/libhw.c | 2 +-
> hw/virtio/virtio.c | 50 ++++++++++++++++++++++++++-----------
> include/hw/pci/pci_device.h | 2 +-
> include/hw/virtio/virtio.h | 2 ++
> include/system/dma.h | 25 ++++++++++++++++++-
> system/dma-helpers.c | 2 +-
> 9 files changed, 77 insertions(+), 26 deletions(-)
>
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index 11a7a85750..afb9a8b69f 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -839,7 +839,7 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
> len = l;
> map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, a, &len,
> DMA_DIRECTION_TO_DEVICE,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED, NULL);
> if (!map) {
> qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO
> memory for"
> " element %d\n", __func__, e);
> @@ -1258,7 +1258,8 @@ static bool virtio_gpu_load_restore_mapping(VirtIOGPU
> *g,
> hwaddr len = res->iov[i].iov_len;
> res->iov[i].iov_base =
> dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
> - DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
> + DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED,
> + NULL);
>
> if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
> /* Clean up the half-a-mapping we just created... */
> diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
> index 12a7dc4312..c3308a1bfd 100644
> --- a/hw/hyperv/vmbus.c
> +++ b/hw/hyperv/vmbus.c
> @@ -374,7 +374,7 @@ static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf,
> uint32_t len)
> maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) |
> off_in_page;
>
> iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED, NULL);
> if (mlen != pgleft) {
> dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
> iter->map = NULL;
> @@ -492,7 +492,8 @@ int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir,
> struct iovec *iov,
> }
>
> iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED,
> + NULL);
> if (!l) {
> ret = -EFAULT;
> goto err;
> @@ -568,7 +569,8 @@ static vmbus_ring_buffer
> *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
> dma_addr_t mlen = sizeof(*rb);
>
> rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
> - DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
> + DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED,
> + NULL);
> if (mlen != sizeof(*rb)) {
> dma_memory_unmap(ringbuf->as, rb, mlen,
> DMA_DIRECTION_FROM_DEVICE, 0);
> diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
> index 1303c21cb7..aeea2dc61d 100644
> --- a/hw/ide/ahci.c
> +++ b/hw/ide/ahci.c
> @@ -221,7 +221,7 @@ static void map_page(AddressSpace *as, uint8_t **ptr,
> uint64_t addr,
> }
>
> *ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED, NULL);
> if (len < wanted && *ptr) {
> dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
> *ptr = NULL;
> @@ -928,7 +928,7 @@ static int ahci_populate_sglist(AHCIDevice *ad,
> QEMUSGList *sglist,
> /* map PRDT */
> if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
> DMA_DIRECTION_TO_DEVICE,
> - MEMTXATTRS_UNSPECIFIED))){
> + MEMTXATTRS_UNSPECIFIED, NULL))) {
> trace_ahci_populate_sglist_no_map(ad->hba, ad->port_no);
> return -1;
> }
> @@ -1338,7 +1338,8 @@ static void handle_cmd(AHCIState *s, int port, uint8_t
> slot)
> tbl_addr = le64_to_cpu(cmd->tbl_addr);
> cmd_len = 0x80;
> cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
> - DMA_DIRECTION_TO_DEVICE,
> MEMTXATTRS_UNSPECIFIED);
> + DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED,
> + NULL);
> if (!cmd_fis) {
> trace_handle_cmd_badfis(s, port);
> return;
> diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
> index 4f03ef4ba9..762d70b419 100644
> --- a/hw/usb/libhw.c
> +++ b/hw/usb/libhw.c
> @@ -37,7 +37,7 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
> while (len) {
> dma_addr_t xlen = len;
> mem = dma_memory_map(sgl->as, base, &xlen, dir,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED, NULL);
> if (!mem) {
> goto err;
> }
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 85110bce37..be756f3ac8 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1553,9 +1553,9 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int
> in_bytes,
> }
>
> static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
> - hwaddr *addr, struct iovec *iov,
> - unsigned int max_num_sg, bool is_write,
> - hwaddr pa, size_t sz)
> + hwaddr *addr, hwaddr *xlat_addr,
> + struct iovec *iov, unsigned int max_num_sg,
> + bool is_write, hwaddr pa, size_t sz)
> {
> bool ok = false;
> unsigned num_sg = *p_num_sg;
> @@ -1579,7 +1579,8 @@ static bool virtqueue_map_desc(VirtIODevice *vdev,
> unsigned int *p_num_sg,
> is_write ?
> DMA_DIRECTION_FROM_DEVICE :
> DMA_DIRECTION_TO_DEVICE,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED,
> + &xlat_addr[num_sg]);
> if (!iov[num_sg].iov_base) {
> virtio_error(vdev, "virtio: bogus descriptor or out of
> resources");
> goto out;
> @@ -1618,7 +1619,7 @@ static void virtqueue_undo_map_desc(unsigned int
> out_num, unsigned int in_num,
>
> static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
> hwaddr *addr, unsigned int num_sg,
> - bool is_write)
> + hwaddr *xlat_addr, bool is_write)
> {
> unsigned int i;
> hwaddr len;
> @@ -1629,7 +1630,8 @@ static void virtqueue_map_iovec(VirtIODevice *vdev,
> struct iovec *sg,
> addr[i], &len, is_write ?
> DMA_DIRECTION_FROM_DEVICE :
> DMA_DIRECTION_TO_DEVICE,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED,
> + &xlat_addr[i]);
> if (!sg[i].iov_base) {
> error_report("virtio: error trying to map MMIO memory");
> exit(1);
> @@ -1643,9 +1645,10 @@ static void virtqueue_map_iovec(VirtIODevice *vdev,
> struct iovec *sg,
>
> void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
> {
> - virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num,
> true);
> + virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num,
> + elem->in_xlat_addr, true);
> virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
> -
> false);
> + elem->out_xlat_addr, false);
> }
>
> static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned
> in_num)
> @@ -1654,7 +1657,14 @@ static void *virtqueue_alloc_element(size_t sz,
> unsigned out_num, unsigned in_nu
> size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
> size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
> size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
> - size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end,
> __alignof__(elem->in_sg[0]));
> + size_t in_xlat_addr_ofs =
> + QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_xlat_addr[0]));
> + size_t out_xlat_addr_ofs = in_xlat_addr_ofs + in_num *
> + sizeof(elem->in_xlat_addr[0]);
> + size_t out_xlat_addr_end = out_xlat_addr_ofs + out_num *
> + sizeof(elem->out_xlat_addr[0]);
> + size_t in_sg_ofs =
> + QEMU_ALIGN_UP(out_xlat_addr_end, __alignof__(elem->in_sg[0]));
> size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
> size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
>
> @@ -1665,6 +1675,8 @@ static void *virtqueue_alloc_element(size_t sz,
> unsigned out_num, unsigned in_nu
> elem->in_num = in_num;
> elem->in_addr = (void *)elem + in_addr_ofs;
> elem->out_addr = (void *)elem + out_addr_ofs;
> + elem->in_xlat_addr = (void *)elem + in_xlat_addr_ofs;
> + elem->out_xlat_addr = (void *)elem + out_xlat_addr_ofs;
> elem->in_sg = (void *)elem + in_sg_ofs;
> elem->out_sg = (void *)elem + out_sg_ofs;
> return elem;
> @@ -1681,6 +1693,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t
> sz)
> VirtQueueElement *elem = NULL;
> unsigned out_num, in_num, elem_entries;
> hwaddr addr[VIRTQUEUE_MAX_SIZE];
> + hwaddr xlat_addr[VIRTQUEUE_MAX_SIZE];
> struct iovec iov[VIRTQUEUE_MAX_SIZE];
> VRingDesc desc;
> int rc;
> @@ -1754,7 +1767,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t
> sz)
>
> if (desc.flags & VRING_DESC_F_WRITE) {
> map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
> - iov + out_num,
> + xlat_addr + out_num, iov + out_num,
> VIRTQUEUE_MAX_SIZE - out_num, true,
> desc.addr, desc.len);
> } else {
> @@ -1762,8 +1775,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t
> sz)
> virtio_error(vdev, "Incorrect order for descriptors");
> goto err_undo_map;
> }
> - map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
> - VIRTQUEUE_MAX_SIZE, false,
> + map_ok = virtqueue_map_desc(vdev, &out_num, addr, xlat_addr,
> + iov, VIRTQUEUE_MAX_SIZE, false,
> desc.addr, desc.len);
> }
> if (!map_ok) {
> @@ -1790,10 +1803,12 @@ static void *virtqueue_split_pop(VirtQueue *vq,
> size_t sz)
> for (i = 0; i < out_num; i++) {
> elem->out_addr[i] = addr[i];
> elem->out_sg[i] = iov[i];
> + elem->out_xlat_addr[i] = xlat_addr[i];
> }
> for (i = 0; i < in_num; i++) {
> elem->in_addr[i] = addr[out_num + i];
> elem->in_sg[i] = iov[out_num + i];
> + elem->in_xlat_addr[i] = xlat_addr[out_num + i];
> }
>
> if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
> @@ -1827,6 +1842,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t
> sz)
> VirtQueueElement *elem = NULL;
> unsigned out_num, in_num, elem_entries;
> hwaddr addr[VIRTQUEUE_MAX_SIZE];
> + hwaddr xlat_addr[VIRTQUEUE_MAX_SIZE];
> struct iovec iov[VIRTQUEUE_MAX_SIZE];
> VRingPackedDesc desc;
> uint16_t id;
> @@ -1891,7 +1907,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t
> sz)
>
> if (desc.flags & VRING_DESC_F_WRITE) {
> map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
> - iov + out_num,
> + xlat_addr + out_num, iov + out_num,
> VIRTQUEUE_MAX_SIZE - out_num, true,
> desc.addr, desc.len);
> } else {
> @@ -1899,7 +1915,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t
> sz)
> virtio_error(vdev, "Incorrect order for descriptors");
> goto err_undo_map;
> }
> - map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
> + map_ok = virtqueue_map_desc(vdev, &out_num, addr, xlat_addr, iov,
> VIRTQUEUE_MAX_SIZE, false,
> desc.addr, desc.len);
> }
> @@ -1928,10 +1944,12 @@ static void *virtqueue_packed_pop(VirtQueue *vq,
> size_t sz)
> for (i = 0; i < out_num; i++) {
> elem->out_addr[i] = addr[i];
> elem->out_sg[i] = iov[i];
> + elem->out_xlat_addr[i] = xlat_addr[i];
> }
> for (i = 0; i < in_num; i++) {
> elem->in_addr[i] = addr[out_num + i];
> elem->in_sg[i] = iov[out_num + i];
> + elem->in_xlat_addr[i] = xlat_addr[out_num + i];
> }
>
> elem->index = id;
> @@ -2117,10 +2135,14 @@ void *qemu_get_virtqueue_element(VirtIODevice *vdev,
> QEMUFile *f, size_t sz)
> elem->index = data.index;
>
> for (i = 0; i < elem->in_num; i++) {
> + /* xlat_addr is overwritten by virtqueue_map */
> + elem->in_xlat_addr[i] = 0;
> elem->in_addr[i] = data.in_addr[i];
> }
>
> for (i = 0; i < elem->out_num; i++) {
> + /* xlat_addr is overwritten by virtqueue_map */
> + elem->out_xlat_addr[i] = 0;
> elem->out_addr[i] = data.out_addr[i];
> }
>
> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> index 8eaf0d58bb..e2bb453dcc 100644
> --- a/include/hw/pci/pci_device.h
> +++ b/include/hw/pci/pci_device.h
> @@ -328,7 +328,7 @@ static inline void *pci_dma_map(PCIDevice *dev,
> dma_addr_t addr,
> dma_addr_t *plen, DMADirection dir)
> {
> return dma_memory_map(pci_get_address_space(dev), addr, plen, dir,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED, NULL);
> }
>
> static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t
> len,
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 6386910280..e822aafd91 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -75,6 +75,8 @@ typedef struct VirtQueueElement
> hwaddr *out_addr;
> struct iovec *in_sg;
> struct iovec *out_sg;
> + hwaddr *in_xlat_addr;
> + hwaddr *out_xlat_addr;
> } VirtQueueElement;
>
> #define VIRTIO_QUEUE_MAX 1024
> diff --git a/include/system/dma.h b/include/system/dma.h
> index 5a49a30628..b5d4c07452 100644
> --- a/include/system/dma.h
> +++ b/include/system/dma.h
> @@ -12,6 +12,7 @@
>
> #include "exec/memory.h"
> #include "exec/address-spaces.h"
> +#include "exec/ramblock.h"
> #include "block/block.h"
> #include "block/accounting.h"
>
> @@ -201,10 +202,12 @@ MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t
> addr,
> * @len: pointer to length of buffer; updated on return
> * @dir: indicates the transfer direction
> * @attrs: memory attributes
> + * @guest_addr: optional output for GPA
> */
> static inline void *dma_memory_map(AddressSpace *as,
> dma_addr_t addr, dma_addr_t *len,
> - DMADirection dir, MemTxAttrs attrs)
> + DMADirection dir, MemTxAttrs attrs,
> + hwaddr *guest_addr)
> {
> hwaddr xlen = *len;
> void *p;
> @@ -212,6 +215,26 @@ static inline void *dma_memory_map(AddressSpace *as,
> p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE,
> attrs);
> *len = xlen;
> +
> + /* Attempt to find a backing GPA for this HVA */
> + if (guest_addr) {
> + if (p) {
> + RAMBlock *rb;
> + ram_addr_t offset;
> +
> + rb = qemu_ram_block_from_host(p, false, &offset);
> + if (rb) {
> + /* HVA corresponds to guest memory */
> + *guest_addr = rb->offset + offset;
> + } else {
> + /* HVA doesn't correspond to guest memory */
> + *guest_addr = 0;
> + }
> + } else {
> + /* Mapping failed */
> + *guest_addr = 0;
> + }
> + }
> return p;
> }
>
> diff --git a/system/dma-helpers.c b/system/dma-helpers.c
> index f6403242f5..a6d2352c0f 100644
> --- a/system/dma-helpers.c
> +++ b/system/dma-helpers.c
> @@ -135,7 +135,7 @@ static void dma_blk_cb(void *opaque, int ret)
> cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
> cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
> mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
> - MEMTXATTRS_UNSPECIFIED);
> + MEMTXATTRS_UNSPECIFIED, NULL);
> /*
> * Make reads deterministic in icount mode. Windows sometimes issues
> * disk read requests with overlapping SGs. It leads
> --
> 2.43.5
>
- [RFC v3 0/5] Handling aliased guest memory maps in vhost-vDPA SVQs, Jonah Palmer, 2025/01/10
- [RFC v3 2/5] vhost-iova-tree: Remove range check for IOVA allocator, Jonah Palmer, 2025/01/10
- [RFC v3 3/5] vhost-vdpa: Implement the GPA->IOVA tree, Jonah Palmer, 2025/01/10
- [RFC v3 4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueElement members, Jonah Palmer, 2025/01/10
- Re: [RFC v3 4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueElement members,
Eugenio Perez Martin <=
- [RFC v3 5/5] svq: Support translations via GPAs in vhost_svq_translate_addr, Jonah Palmer, 2025/01/10
- [RFC v3 1/5] vhost-vdpa: Decouple the IOVA allocator, Jonah Palmer, 2025/01/10