qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC v3 4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueEleme


From: Si-Wei Liu
Subject: Re: [RFC v3 4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueElement members
Date: Wed, 22 Jan 2025 16:57:48 -0800
User-agent: Mozilla Thunderbird




On 1/16/2025 11:02 AM, Eugenio Perez Martin wrote:
On Fri, Jan 10, 2025 at 6:09 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
Adds the in_xlat_addr & out_xlat_addr hwaddr arrays to the
VirtQueueElement struct and introduces an optional GPA output parameter
to dma_memory_map().

These arrays will store a VirtQueueElement's input/output descriptors'
GPA of the mapped memory region, if it's backed by guest memory, via
dma_memory_map().

The GPA will always correspond 1:1 to the iovec entry when translating
addresses between Qemu VAs and SVQ IOVAs in vhost_svq_translate_addr().
This helps to avoid extra complicated code in SVQ's
vhost_svq_vring_write_descs() function (e.g. splitting up iovec into
multiple buffers, not breaking devices using aliased mapping, etc.).

Since the translation is only done once inside the DMA API alongside
virtqueue_pop(), the cost should be minimal.

I think this is a very strong change as it touches the dma subsystem.
Let me try to avoid it on 5/5 :).
This change was what I suggested to Jonah actually, the intent for the DMA API change was to lessen the friction of vIOMMU support for Shadow VQ (for which case elem->in_addr and elem->out_addr is GIOVA rather than GPA). This will be the most efficient and performant way to get the needed translation done only once without having to maintain separate GPA mapping & translation out of the memory subsystem. I wonder would it work to create an variant of dma_memory_map() API while leaving the current one as is: the new API variant would return a cookie that is used to represent the address translation, such that SVQ subsystem can use it later on whenever needed in order to fetch the GIOVA to GPA translation in place (the translated GPA address may be cached in the opaque cookie) from the memory system?

Regards,
-Siwei


Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
---
  hw/display/virtio-gpu.c     |  5 ++--
  hw/hyperv/vmbus.c           |  8 +++---
  hw/ide/ahci.c               |  7 +++---
  hw/usb/libhw.c              |  2 +-
  hw/virtio/virtio.c          | 50 ++++++++++++++++++++++++++-----------
  include/hw/pci/pci_device.h |  2 +-
  include/hw/virtio/virtio.h  |  2 ++
  include/system/dma.h        | 25 ++++++++++++++++++-
  system/dma-helpers.c        |  2 +-
  9 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 11a7a85750..afb9a8b69f 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -839,7 +839,7 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
              len = l;
              map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, a, &len,
                                   DMA_DIRECTION_TO_DEVICE,
-                                 MEMTXATTRS_UNSPECIFIED);
+                                 MEMTXATTRS_UNSPECIFIED, NULL);
              if (!map) {
                  qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory 
for"
                                " element %d\n", __func__, e);
@@ -1258,7 +1258,8 @@ static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g,
          hwaddr len = res->iov[i].iov_len;
          res->iov[i].iov_base =
              dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
-                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED,
+                           NULL);

          if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
              /* Clean up the half-a-mapping we just created... */
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 12a7dc4312..c3308a1bfd 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -374,7 +374,7 @@ static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, 
uint32_t len)
              maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | 
off_in_page;

              iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
-                                       MEMTXATTRS_UNSPECIFIED);
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
              if (mlen != pgleft) {
                  dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
                  iter->map = NULL;
@@ -492,7 +492,8 @@ int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, 
struct iovec *iov,
              }

              iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
-                                                   MEMTXATTRS_UNSPECIFIED);
+                                                   MEMTXATTRS_UNSPECIFIED,
+                                                   NULL);
              if (!l) {
                  ret = -EFAULT;
                  goto err;
@@ -568,7 +569,8 @@ static vmbus_ring_buffer 
*ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
      dma_addr_t mlen = sizeof(*rb);

      rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
-                        DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
+                        DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED,
+                        NULL);
      if (mlen != sizeof(*rb)) {
          dma_memory_unmap(ringbuf->as, rb, mlen,
                           DMA_DIRECTION_FROM_DEVICE, 0);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 1303c21cb7..aeea2dc61d 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -221,7 +221,7 @@ static void map_page(AddressSpace *as, uint8_t **ptr, 
uint64_t addr,
      }

      *ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE,
-                          MEMTXATTRS_UNSPECIFIED);
+                          MEMTXATTRS_UNSPECIFIED, NULL);
      if (len < wanted && *ptr) {
          dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
          *ptr = NULL;
@@ -928,7 +928,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist,
      /* map PRDT */
      if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
                                  DMA_DIRECTION_TO_DEVICE,
-                                MEMTXATTRS_UNSPECIFIED))){
+                                MEMTXATTRS_UNSPECIFIED, NULL))) {
          trace_ahci_populate_sglist_no_map(ad->hba, ad->port_no);
          return -1;
      }
@@ -1338,7 +1338,8 @@ static void handle_cmd(AHCIState *s, int port, uint8_t 
slot)
      tbl_addr = le64_to_cpu(cmd->tbl_addr);
      cmd_len = 0x80;
      cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
-                             DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+                             DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED,
+                             NULL);
      if (!cmd_fis) {
          trace_handle_cmd_badfis(s, port);
          return;
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index 4f03ef4ba9..762d70b419 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -37,7 +37,7 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
          while (len) {
              dma_addr_t xlen = len;
              mem = dma_memory_map(sgl->as, base, &xlen, dir,
-                                 MEMTXATTRS_UNSPECIFIED);
+                                 MEMTXATTRS_UNSPECIFIED, NULL);
              if (!mem) {
                  goto err;
              }
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 85110bce37..be756f3ac8 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1553,9 +1553,9 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int 
in_bytes,
  }

  static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
-                               hwaddr *addr, struct iovec *iov,
-                               unsigned int max_num_sg, bool is_write,
-                               hwaddr pa, size_t sz)
+                               hwaddr *addr, hwaddr *xlat_addr,
+                               struct iovec *iov, unsigned int max_num_sg,
+                               bool is_write, hwaddr pa, size_t sz)
  {
      bool ok = false;
      unsigned num_sg = *p_num_sg;
@@ -1579,7 +1579,8 @@ static bool virtqueue_map_desc(VirtIODevice *vdev, 
unsigned int *p_num_sg,
                                                is_write ?
                                                DMA_DIRECTION_FROM_DEVICE :
                                                DMA_DIRECTION_TO_DEVICE,
-                                              MEMTXATTRS_UNSPECIFIED);
+                                              MEMTXATTRS_UNSPECIFIED,
+                                              &xlat_addr[num_sg]);
          if (!iov[num_sg].iov_base) {
              virtio_error(vdev, "virtio: bogus descriptor or out of 
resources");
              goto out;
@@ -1618,7 +1619,7 @@ static void virtqueue_undo_map_desc(unsigned int out_num, 
unsigned int in_num,

  static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
                                  hwaddr *addr, unsigned int num_sg,
-                                bool is_write)
+                                hwaddr *xlat_addr, bool is_write)
  {
      unsigned int i;
      hwaddr len;
@@ -1629,7 +1630,8 @@ static void virtqueue_map_iovec(VirtIODevice *vdev, 
struct iovec *sg,
                                          addr[i], &len, is_write ?
                                          DMA_DIRECTION_FROM_DEVICE :
                                          DMA_DIRECTION_TO_DEVICE,
-                                        MEMTXATTRS_UNSPECIFIED);
+                                        MEMTXATTRS_UNSPECIFIED,
+                                        &xlat_addr[i]);
          if (!sg[i].iov_base) {
              error_report("virtio: error trying to map MMIO memory");
              exit(1);
@@ -1643,9 +1645,10 @@ static void virtqueue_map_iovec(VirtIODevice *vdev, 
struct iovec *sg,

  void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
  {
-    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
+    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num,
+                        elem->in_xlat_addr, true);
      virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
-                                                                        false);
+                        elem->out_xlat_addr, false);
  }

  static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned 
in_num)
@@ -1654,7 +1657,14 @@ static void *virtqueue_alloc_element(size_t sz, unsigned 
out_num, unsigned in_nu
      size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
      size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
      size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
-    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, 
__alignof__(elem->in_sg[0]));
+    size_t in_xlat_addr_ofs =
+        QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_xlat_addr[0]));
+    size_t out_xlat_addr_ofs = in_xlat_addr_ofs + in_num *
+                               sizeof(elem->in_xlat_addr[0]);
+    size_t out_xlat_addr_end = out_xlat_addr_ofs + out_num *
+                               sizeof(elem->out_xlat_addr[0]);
+    size_t in_sg_ofs =
+        QEMU_ALIGN_UP(out_xlat_addr_end, __alignof__(elem->in_sg[0]));
      size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
      size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);

@@ -1665,6 +1675,8 @@ static void *virtqueue_alloc_element(size_t sz, unsigned 
out_num, unsigned in_nu
      elem->in_num = in_num;
      elem->in_addr = (void *)elem + in_addr_ofs;
      elem->out_addr = (void *)elem + out_addr_ofs;
+    elem->in_xlat_addr = (void *)elem + in_xlat_addr_ofs;
+    elem->out_xlat_addr = (void *)elem + out_xlat_addr_ofs;
      elem->in_sg = (void *)elem + in_sg_ofs;
      elem->out_sg = (void *)elem + out_sg_ofs;
      return elem;
@@ -1681,6 +1693,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
      VirtQueueElement *elem = NULL;
      unsigned out_num, in_num, elem_entries;
      hwaddr addr[VIRTQUEUE_MAX_SIZE];
+    hwaddr xlat_addr[VIRTQUEUE_MAX_SIZE];
      struct iovec iov[VIRTQUEUE_MAX_SIZE];
      VRingDesc desc;
      int rc;
@@ -1754,7 +1767,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)

          if (desc.flags & VRING_DESC_F_WRITE) {
              map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
-                                        iov + out_num,
+                                        xlat_addr + out_num, iov + out_num,
                                          VIRTQUEUE_MAX_SIZE - out_num, true,
                                          desc.addr, desc.len);
          } else {
@@ -1762,8 +1775,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
                  virtio_error(vdev, "Incorrect order for descriptors");
                  goto err_undo_map;
              }
-            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
-                                        VIRTQUEUE_MAX_SIZE, false,
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, xlat_addr,
+                                        iov, VIRTQUEUE_MAX_SIZE, false,
                                          desc.addr, desc.len);
          }
          if (!map_ok) {
@@ -1790,10 +1803,12 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t 
sz)
      for (i = 0; i < out_num; i++) {
          elem->out_addr[i] = addr[i];
          elem->out_sg[i] = iov[i];
+        elem->out_xlat_addr[i] = xlat_addr[i];
      }
      for (i = 0; i < in_num; i++) {
          elem->in_addr[i] = addr[out_num + i];
          elem->in_sg[i] = iov[out_num + i];
+        elem->in_xlat_addr[i] = xlat_addr[out_num + i];
      }

      if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
@@ -1827,6 +1842,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
sz)
      VirtQueueElement *elem = NULL;
      unsigned out_num, in_num, elem_entries;
      hwaddr addr[VIRTQUEUE_MAX_SIZE];
+    hwaddr xlat_addr[VIRTQUEUE_MAX_SIZE];
      struct iovec iov[VIRTQUEUE_MAX_SIZE];
      VRingPackedDesc desc;
      uint16_t id;
@@ -1891,7 +1907,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
sz)

          if (desc.flags & VRING_DESC_F_WRITE) {
              map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
-                                        iov + out_num,
+                                        xlat_addr + out_num, iov + out_num,
                                          VIRTQUEUE_MAX_SIZE - out_num, true,
                                          desc.addr, desc.len);
          } else {
@@ -1899,7 +1915,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
sz)
                  virtio_error(vdev, "Incorrect order for descriptors");
                  goto err_undo_map;
              }
-            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, xlat_addr, iov,
                                          VIRTQUEUE_MAX_SIZE, false,
                                          desc.addr, desc.len);
          }
@@ -1928,10 +1944,12 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
sz)
      for (i = 0; i < out_num; i++) {
          elem->out_addr[i] = addr[i];
          elem->out_sg[i] = iov[i];
+        elem->out_xlat_addr[i] = xlat_addr[i];
      }
      for (i = 0; i < in_num; i++) {
          elem->in_addr[i] = addr[out_num + i];
          elem->in_sg[i] = iov[out_num + i];
+        elem->in_xlat_addr[i] = xlat_addr[out_num + i];
      }

      elem->index = id;
@@ -2117,10 +2135,14 @@ void *qemu_get_virtqueue_element(VirtIODevice *vdev, 
QEMUFile *f, size_t sz)
      elem->index = data.index;

      for (i = 0; i < elem->in_num; i++) {
+        /* xlat_addr is overwritten by virtqueue_map */
+        elem->in_xlat_addr[i] = 0;
          elem->in_addr[i] = data.in_addr[i];
      }

      for (i = 0; i < elem->out_num; i++) {
+        /* xlat_addr is overwritten by virtqueue_map */
+        elem->out_xlat_addr[i] = 0;
          elem->out_addr[i] = data.out_addr[i];
      }

diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 8eaf0d58bb..e2bb453dcc 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -328,7 +328,7 @@ static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t 
addr,
                                  dma_addr_t *plen, DMADirection dir)
  {
      return dma_memory_map(pci_get_address_space(dev), addr, plen, dir,
-                          MEMTXATTRS_UNSPECIFIED);
+                          MEMTXATTRS_UNSPECIFIED, NULL);
  }

  static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 6386910280..e822aafd91 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -75,6 +75,8 @@ typedef struct VirtQueueElement
      hwaddr *out_addr;
      struct iovec *in_sg;
      struct iovec *out_sg;
+    hwaddr *in_xlat_addr;
+    hwaddr *out_xlat_addr;
  } VirtQueueElement;

  #define VIRTIO_QUEUE_MAX 1024
diff --git a/include/system/dma.h b/include/system/dma.h
index 5a49a30628..b5d4c07452 100644
--- a/include/system/dma.h
+++ b/include/system/dma.h
@@ -12,6 +12,7 @@

  #include "exec/memory.h"
  #include "exec/address-spaces.h"
+#include "exec/ramblock.h"
  #include "block/block.h"
  #include "block/accounting.h"

@@ -201,10 +202,12 @@ MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t 
addr,
   * @len: pointer to length of buffer; updated on return
   * @dir: indicates the transfer direction
   * @attrs: memory attributes
+ * @guest_addr: optional output for GPA
   */
  static inline void *dma_memory_map(AddressSpace *as,
                                     dma_addr_t addr, dma_addr_t *len,
-                                   DMADirection dir, MemTxAttrs attrs)
+                                   DMADirection dir, MemTxAttrs attrs,
+                                   hwaddr *guest_addr)
  {
      hwaddr xlen = *len;
      void *p;
@@ -212,6 +215,26 @@ static inline void *dma_memory_map(AddressSpace *as,
      p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE,
                            attrs);
      *len = xlen;
+
+    /* Attempt to find a backing GPA for this HVA */
+    if (guest_addr) {
+        if (p) {
+            RAMBlock *rb;
+            ram_addr_t offset;
+
+            rb = qemu_ram_block_from_host(p, false, &offset);
+            if (rb) {
+                /* HVA corresponds to guest memory */
+                *guest_addr = rb->offset + offset;
+            } else {
+                /* HVA doesn't correspond to guest memory */
+                *guest_addr = 0;
+            }
+        } else {
+            /* Mapping failed */
+            *guest_addr = 0;
+        }
+    }
      return p;
  }

diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index f6403242f5..a6d2352c0f 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -135,7 +135,7 @@ static void dma_blk_cb(void *opaque, int ret)
          cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
          cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
          mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
-                             MEMTXATTRS_UNSPECIFIED);
+                             MEMTXATTRS_UNSPECIFIED, NULL);
          /*
           * Make reads deterministic in icount mode. Windows sometimes issues
           * disk read requests with overlapping SGs. It leads
--
2.43.5





reply via email to

[Prev in Thread] Current Thread [Next in Thread]