[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC v3 23/29] vhost: Use a tree to store memory mappings
From: |
Eugenio Pérez |
Subject: |
[RFC v3 23/29] vhost: Use a tree to store memory mappings |
Date: |
Wed, 19 May 2021 18:28:57 +0200 |
At the moment, the tree is only used to store 1:1 maps of the qemu
virtual addresses of shadow virtqueue vring and the guest's addresses.
In other words, the tree only serves to check if the address the guest
exposed is valid at the moment qemu receives the miss.
It does not work if device has restrictions in its iova
range at the moment.
Updates to tree are protected by BQL, each one always run from main
event loop context. vhost_device_iotlb_miss runs in the same one on
reading it.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
include/hw/virtio/vhost.h | 3 +
hw/virtio/vhost.c | 121 ++++++++++++++++++++++++++++++--------
2 files changed, 99 insertions(+), 25 deletions(-)
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index c97a4c0017..773f882145 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -2,6 +2,7 @@
#define VHOST_H
#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/vhost-iova-tree.h"
#include "hw/virtio/virtio.h"
#include "exec/memory.h"
@@ -88,6 +89,8 @@ struct vhost_dev {
bool log_enabled;
bool shadow_vqs_enabled;
uint64_t log_size;
+ /* IOVA mapping used by Shadow Virtqueue */
+ VhostIOVATree iova_map;
struct {
hwaddr first;
hwaddr last;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index c8fa9df9b3..925d2146a4 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1013,31 +1013,45 @@ static int vhost_memory_region_lookup(struct vhost_dev
*hdev,
int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
{
- IOMMUTLBEntry iotlb;
+ IOMMUAccessFlags perm;
uint64_t uaddr, len;
int ret = -EFAULT;
- RCU_READ_LOCK_GUARD();
-
trace_vhost_iotlb_miss(dev, 1);
if (dev->shadow_vqs_enabled) {
- uaddr = iova;
- len = 4096;
- ret = vhost_backend_update_device_iotlb(dev, iova, uaddr, len,
- IOMMU_RW);
- if (ret) {
- trace_vhost_iotlb_miss(dev, 2);
- error_report("Fail to update device iotlb");
+ /* Shadow virtqueue translations in its Virtual Address Space */
+ const VhostDMAMap *result;
+ const VhostDMAMap needle = {
+ .iova = iova,
+ };
+
+ result = vhost_iova_tree_find_taddr(&dev->iova_map, &needle);
+
+ if (unlikely(!result)) {
+ goto out;
}
- return ret;
- }
+ iova = result->iova;
+ uaddr = (uint64_t)result->translated_addr;
+ /*
+ * In IOVATree, result.iova + result.size is the last element of iova.
+ * For vhost, it is one past that last element.
+ */
+ len = result->size + 1;
+ perm = result->perm;
+ } else {
+ IOMMUTLBEntry iotlb;
+
+ RCU_READ_LOCK_GUARD();
+ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
+ iova, write,
+ MEMTXATTRS_UNSPECIFIED);
+
+ if (iotlb.target_as == NULL) {
+ goto out;
+ }
- iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
- iova, write,
- MEMTXATTRS_UNSPECIFIED);
- if (iotlb.target_as != NULL) {
ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
&uaddr, &len);
if (ret) {
@@ -1049,14 +1063,14 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev,
uint64_t iova, int write)
len = MIN(iotlb.addr_mask + 1, len);
iova = iova & ~iotlb.addr_mask;
+ perm = iotlb.perm;
+ }
- ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
- len, iotlb.perm);
- if (ret) {
- trace_vhost_iotlb_miss(dev, 4);
- error_report("Fail to update device iotlb");
- goto out;
- }
+ ret = vhost_backend_update_device_iotlb(dev, iova, uaddr, len, perm);
+ if (ret) {
+ trace_vhost_iotlb_miss(dev, 4);
+ error_report("Fail to update device iotlb");
+ goto out;
}
trace_vhost_iotlb_miss(dev, 2);
@@ -1249,7 +1263,7 @@ static int vhost_sw_live_migration_stop(struct vhost_dev
*dev)
if (r) {
error_report("Fail to invalidate device iotlb");
}
-
+ vhost_iova_tree_destroy(&dev->iova_map);
for (idx = 0; idx < dev->nvqs; ++idx) {
struct vhost_virtqueue *vq = dev->vqs + idx;
if (vhost_dev_has_iommu(dev) &&
@@ -1279,6 +1293,26 @@ static int vhost_sw_live_migration_stop(struct vhost_dev
*dev)
return 0;
}
+static bool vhost_shadow_vq_start_store_sections(struct vhost_dev *dev)
+{
+ int idx;
+
+ for (idx = 0; idx < dev->n_mem_sections; ++idx) {
+ size_t region_size = dev->mem->regions[idx].memory_size;
+ VhostDMAMap region = {
+ .iova = dev->mem->regions[idx].userspace_addr,
+ .translated_addr = (void *)dev->mem->regions[idx].userspace_addr,
+ .size = region_size - 1,
+ .perm = VHOST_ACCESS_RW,
+ };
+
+ VhostDMAMapNewRC r = vhost_iova_tree_insert(&dev->iova_map, ®ion);
+ assert(r == VHOST_DMA_MAP_OK);
+ }
+
+ return true;
+}
+
/*
* Start shadow virtqueue in a given queue.
* In failure case, this function leaves queue working as regular vhost mode.
@@ -1292,9 +1326,37 @@ static bool vhost_sw_live_migration_start_vq(struct
vhost_dev *dev,
struct vhost_vring_state s = {
.index = idx,
};
+ VhostDMAMap driver_region, device_region;
+
int r;
bool ok;
+ assert(dev->shadow_vqs[idx] != NULL);
+ vhost_shadow_vq_get_vring_addr(dev->shadow_vqs[idx], &addr);
+ driver_region = (VhostDMAMap) {
+ .iova = addr.desc_user_addr,
+ .translated_addr = (void *)addr.desc_user_addr,
+
+ /*
+ * DMAMAp.size include the last byte included in the range, while
+ * sizeof marks one past it. Substract one byte to make them match.
+ */
+ .size = vhost_shadow_vq_driver_area_size(dev->shadow_vqs[idx]) - 1,
+ .perm = VHOST_ACCESS_RO,
+ };
+ device_region = (VhostDMAMap) {
+ .iova = addr.used_user_addr,
+ .translated_addr = (void *)addr.used_user_addr,
+ .size = vhost_shadow_vq_device_area_size(dev->shadow_vqs[idx]) - 1,
+ .perm = VHOST_ACCESS_RW,
+ };
+
+ r = vhost_iova_tree_insert(&dev->iova_map, &driver_region);
+ assert(r == VHOST_DMA_MAP_OK);
+
+ r = vhost_iova_tree_insert(&dev->iova_map, &device_region);
+ assert(r == VHOST_DMA_MAP_OK);
+
vhost_virtqueue_stop(dev, dev->vdev, &dev->vqs[idx], dev->vq_index + idx);
ok = vhost_shadow_vq_start(dev, idx, dev->shadow_vqs[idx]);
if (unlikely(!ok)) {
@@ -1302,7 +1364,6 @@ static bool vhost_sw_live_migration_start_vq(struct
vhost_dev *dev,
}
/* From this point, vhost_virtqueue_start can reset these changes */
- vhost_shadow_vq_get_vring_addr(dev->shadow_vqs[idx], &addr);
r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
if (unlikely(r != 0)) {
VHOST_OPS_DEBUG("vhost_set_vring_addr for shadow vq failed");
@@ -1315,6 +1376,7 @@ static bool vhost_sw_live_migration_start_vq(struct
vhost_dev *dev,
goto err;
}
+
if (vhost_dev_has_iommu(dev) && dev->vhost_ops->vhost_set_iotlb_callback) {
/*
* Update used ring information for IOTLB to work correctly,
@@ -1357,6 +1419,15 @@ static int vhost_sw_live_migration_start(struct
vhost_dev *dev)
error_report("Fail to invalidate device iotlb");
}
+ /*
+ * Create new iova mappings. SVQ always expose qemu's VA.
+ * TODO: Fine tune the exported mapping. Default vhost does not expose
+ * everything.
+ */
+
+ vhost_iova_tree_new(&dev->iova_map);
+ vhost_shadow_vq_start_store_sections(dev);
+
/* Can be read by vhost_virtqueue_mask, from vm exit */
dev->shadow_vqs_enabled = true;
for (idx = 0; idx < dev->nvqs; ++idx) {
--
2.27.0
- [RFC v3 13/29] vhost: Add vhost_get_iova_range operation, (continued)
- [RFC v3 13/29] vhost: Add vhost_get_iova_range operation, Eugenio Pérez, 2021/05/19
- [RFC v3 14/29] vhost: add vhost_has_limited_iova_range, Eugenio Pérez, 2021/05/19
- [RFC v3 15/29] vhost: Add enable_custom_iommu to VhostOps, Eugenio Pérez, 2021/05/19
- [RFC v3 16/29] vhost-vdpa: Add vhost_vdpa_enable_custom_iommu, Eugenio Pérez, 2021/05/19
- [RFC v3 17/29] vhost: Shadow virtqueue buffers forwarding, Eugenio Pérez, 2021/05/19
- [RFC v3 18/29] vhost: Use vhost_enable_custom_iommu to unmap everything if available, Eugenio Pérez, 2021/05/19
- [RFC v3 19/29] vhost: Check for device VRING_USED_F_NO_NOTIFY at shadow virtqueue kick, Eugenio Pérez, 2021/05/19
- [RFC v3 20/29] vhost: Use VRING_AVAIL_F_NO_INTERRUPT at device call on shadow virtqueue, Eugenio Pérez, 2021/05/19
- [RFC v3 21/29] vhost: Add VhostIOVATree, Eugenio Pérez, 2021/05/19
- [RFC v3 22/29] vhost: Add iova_rev_maps_find_iova to IOVAReverseMaps, Eugenio Pérez, 2021/05/19
- [RFC v3 23/29] vhost: Use a tree to store memory mappings,
Eugenio Pérez <=
- [RFC v3 24/29] vhost: Add iova_rev_maps_alloc, Eugenio Pérez, 2021/05/19
- [RFC v3 25/29] vhost: Add custom IOTLB translations to SVQ, Eugenio Pérez, 2021/05/19
- [RFC v3 26/29] vhost: Map in vdpa-dev, Eugenio Pérez, 2021/05/19
- [RFC v3 27/29] vhost-vdpa: Implement vhost_vdpa_vring_pause operation, Eugenio Pérez, 2021/05/19
- [RFC v3 28/29] vhost-vdpa: never map with vDPA listener, Eugenio Pérez, 2021/05/19
- [RFC v3 29/29] vhost: Start vhost-vdpa SVQ directly, Eugenio Pérez, 2021/05/19
- Re: [RFC v3 00/29] vDPA software assisted live migration, Michael S. Tsirkin, 2021/05/24