[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spe
From: |
Michael S. Tsirkin |
Subject: |
Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec |
Date: |
Tue, 21 Jul 2020 09:40:10 -0400 |
On Fri, Jul 17, 2020 at 11:29:28AM +0200, Marc Hartmayer wrote:
> Since virtio existed even before it got standardized, the virtio
> standard defines the following types of virtio devices:
>
> + legacy device (pre-virtio 1.0)
> + non-legacy or VIRTIO 1.0 device
> + transitional device (which can act both as legacy and non-legacy)
>
> Virtio 1.0 defines the fields of the virtqueues as little endian,
> while legacy uses guest's native endian [1]. Currently libvhost-user
> does not handle virtio endianness at all, i.e. it works only if the
> native endianness matches with whatever is actually needed. That means
> things break spectacularly on big-endian targets. Let us handle virtio
> endianness for non-legacy as required by the virtio specification
> [1]. We will fence non-legacy virtio devices with the upcoming patch.
>
> [1]
> https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.html#x1-210003
>
> Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
>
> ---
> Note: As we don't support legacy virtio devices
Who's "we" in this sentence? vhost user supports legacy generally ...
> there is dead code in
> libvhost-access.h that could be removed. But for the sake of
> completeness, I left it in the code.
> ---
> contrib/libvhost-user/libvhost-access.h | 61 ++++++++++++
> contrib/libvhost-user/libvhost-user.c | 119 ++++++++++++------------
> 2 files changed, 121 insertions(+), 59 deletions(-)
> create mode 100644 contrib/libvhost-user/libvhost-access.h
>
> diff --git a/contrib/libvhost-user/libvhost-access.h
> b/contrib/libvhost-user/libvhost-access.h
> new file mode 100644
> index 000000000000..868ba3e7bfb8
> --- /dev/null
> +++ b/contrib/libvhost-user/libvhost-access.h
> @@ -0,0 +1,61 @@
> +#ifndef LIBVHOST_ACCESS_H
> +
> +#include "qemu/bswap.h"
> +
> +#include "libvhost-user.h"
> +
> +static inline bool vu_access_is_big_endian(VuDev *dev)
> +{
> + /* Devices conforming to VIRTIO 1.0 or later are always LE. */
> + return false;
> +}
> +
> +static inline void vu_stw_p(VuDev *vdev, void *ptr, uint16_t v)
> +{
> + if (vu_access_is_big_endian(vdev)) {
> + stw_be_p(ptr, v);
> + } else {
> + stw_le_p(ptr, v);
> + }
> +}
> +
> +static inline void vu_stl_p(VuDev *vdev, void *ptr, uint32_t v)
> +{
> + if (vu_access_is_big_endian(vdev)) {
> + stl_be_p(ptr, v);
> + } else {
> + stl_le_p(ptr, v);
> + }
> +}
> +
> +static inline void vu_stq_p(VuDev *vdev, void *ptr, uint64_t v)
> +{
> + if (vu_access_is_big_endian(vdev)) {
> + stq_be_p(ptr, v);
> + } else {
> + stq_le_p(ptr, v);
> + }
> +}
> +
> +static inline int vu_lduw_p(VuDev *vdev, const void *ptr)
> +{
> + if (vu_access_is_big_endian(vdev))
> + return lduw_be_p(ptr);
> + return lduw_le_p(ptr);
> +}
> +
> +static inline int vu_ldl_p(VuDev *vdev, const void *ptr)
> +{
> + if (vu_access_is_big_endian(vdev))
> + return ldl_be_p(ptr);
> + return ldl_le_p(ptr);
> +}
> +
> +static inline uint64_t vu_ldq_p(VuDev *vdev, const void *ptr)
> +{
> + if (vu_access_is_big_endian(vdev))
> + return ldq_be_p(ptr);
> + return ldq_le_p(ptr);
> +}
> +
> +#endif /* LIBVHOST_ACCESS_H */
> diff --git a/contrib/libvhost-user/libvhost-user.c
> b/contrib/libvhost-user/libvhost-user.c
> index d315db139606..0214b04c5291 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -45,6 +45,7 @@
> #include "qemu/memfd.h"
>
> #include "libvhost-user.h"
> +#include "libvhost-access.h"
>
> /* usually provided by GLib */
> #ifndef MIN
> @@ -1074,7 +1075,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
> return false;
> }
>
> - vq->used_idx = vq->vring.used->idx;
> + vq->used_idx = vu_lduw_p(dev, &vq->vring.used->idx);
>
> if (vq->last_avail_idx != vq->used_idx) {
> bool resume = dev->iface->queue_is_processed_in_order &&
> @@ -1191,7 +1192,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
> return 0;
> }
>
> - vq->used_idx = vq->vring.used->idx;
> + vq->used_idx = vu_lduw_p(dev, &vq->vring.used->idx);
> vq->resubmit_num = 0;
> vq->resubmit_list = NULL;
> vq->counter = 0;
> @@ -2019,35 +2020,35 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq)
> }
>
> static inline uint16_t
> -vring_avail_flags(VuVirtq *vq)
> +vring_avail_flags(VuDev *dev, VuVirtq *vq)
> {
> - return vq->vring.avail->flags;
> + return vu_lduw_p(dev, &vq->vring.avail->flags);
> }
>
> static inline uint16_t
> -vring_avail_idx(VuVirtq *vq)
> +vring_avail_idx(VuDev *dev, VuVirtq *vq)
> {
> - vq->shadow_avail_idx = vq->vring.avail->idx;
> + vq->shadow_avail_idx = vu_lduw_p(dev, &vq->vring.avail->idx);
>
> return vq->shadow_avail_idx;
> }
>
> static inline uint16_t
> -vring_avail_ring(VuVirtq *vq, int i)
> +vring_avail_ring(VuDev *dev, VuVirtq *vq, int i)
> {
> - return vq->vring.avail->ring[i];
> + return vu_lduw_p(dev, &vq->vring.avail->ring[i]);
> }
>
> static inline uint16_t
> -vring_get_used_event(VuVirtq *vq)
> +vring_get_used_event(VuDev *dev, VuVirtq *vq)
> {
> - return vring_avail_ring(vq, vq->vring.num);
> + return vring_avail_ring(dev, vq, vq->vring.num);
> }
>
> static int
> virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
> {
> - uint16_t num_heads = vring_avail_idx(vq) - idx;
> + uint16_t num_heads = vring_avail_idx(dev, vq) - idx;
>
> /* Check it isn't doing very strange things with descriptor numbers. */
> if (num_heads > vq->vring.num) {
> @@ -2070,7 +2071,7 @@ virtqueue_get_head(VuDev *dev, VuVirtq *vq,
> {
> /* Grab the next descriptor number they're advertising, and increment
> * the index we've seen. */
> - *head = vring_avail_ring(vq, idx % vq->vring.num);
> + *head = vring_avail_ring(dev, vq, idx % vq->vring.num);
>
> /* If their number is silly, that's a fatal mistake. */
> if (*head >= vq->vring.num) {
> @@ -2123,12 +2124,12 @@ virtqueue_read_next_desc(VuDev *dev, struct
> vring_desc *desc,
> int i, unsigned int max, unsigned int *next)
> {
> /* If this descriptor says it doesn't chain, we're done. */
> - if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
> + if (!(vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_NEXT)) {
> return VIRTQUEUE_READ_DESC_DONE;
> }
>
> /* Check they're not leading us off end of descriptors. */
> - *next = desc[i].next;
> + *next = vu_lduw_p(dev, &desc[i].next);
> /* Make sure compiler knows to grab that: we don't want it changing! */
> smp_wmb();
>
> @@ -2171,8 +2172,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq,
> unsigned int *in_bytes,
> }
> desc = vq->vring.desc;
>
> - if (desc[i].flags & VRING_DESC_F_INDIRECT) {
> - if (desc[i].len % sizeof(struct vring_desc)) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) {
> vu_panic(dev, "Invalid size for indirect buffer table");
> goto err;
> }
> @@ -2185,8 +2186,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq,
> unsigned int *in_bytes,
>
> /* loop over the indirect descriptor table */
> indirect = 1;
> - desc_addr = desc[i].addr;
> - desc_len = desc[i].len;
> + desc_addr = vu_ldq_p(dev, &desc[i].addr);
> + desc_len = vu_ldl_p(dev, &desc[i].len);
> max = desc_len / sizeof(struct vring_desc);
> read_len = desc_len;
> desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2213,10 +2214,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq,
> unsigned int *in_bytes,
> goto err;
> }
>
> - if (desc[i].flags & VRING_DESC_F_WRITE) {
> - in_total += desc[i].len;
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) {
> + in_total += vu_ldl_p(dev, &desc[i].len);
> } else {
> - out_total += desc[i].len;
> + out_total += vu_ldl_p(dev, &desc[i].len);
> }
> if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
> goto done;
> @@ -2277,7 +2278,7 @@ vu_queue_empty(VuDev *dev, VuVirtq *vq)
> return false;
> }
>
> - return vring_avail_idx(vq) == vq->last_avail_idx;
> + return vring_avail_idx(dev, vq) == vq->last_avail_idx;
> }
>
> static bool
> @@ -2296,14 +2297,14 @@ vring_notify(VuDev *dev, VuVirtq *vq)
> }
>
> if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
> - return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
> + return !(vring_avail_flags(dev, vq) & VRING_AVAIL_F_NO_INTERRUPT);
> }
>
> v = vq->signalled_used_valid;
> vq->signalled_used_valid = true;
> old = vq->signalled_used;
> new = vq->signalled_used = vq->used_idx;
> - return !v || vring_need_event(vring_get_used_event(vq), new, old);
> + return !v || vring_need_event(vring_get_used_event(dev, vq), new, old);
> }
>
> static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
> @@ -2361,33 +2362,33 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
> }
>
> static inline void
> -vring_used_flags_set_bit(VuVirtq *vq, int mask)
> +vring_used_flags_set_bit(VuDev *dev, VuVirtq *vq, int mask)
> +{
> + uint16_t *flags;
> +
> + flags = (uint16_t *)(char*)vq->vring.used +
> + offsetof(struct vring_used, flags);
> + vu_stw_p(dev, flags, vu_lduw_p(dev, flags) | mask);
> +}
> +
> +static inline void
> +vring_used_flags_unset_bit(VuDev *dev, VuVirtq *vq, int mask)
> {
> uint16_t *flags;
>
> flags = (uint16_t *)((char*)vq->vring.used +
> offsetof(struct vring_used, flags));
> - *flags |= mask;
> + vu_stw_p(dev, flags, vu_lduw_p(dev, flags) & ~mask);
> }
>
> static inline void
> -vring_used_flags_unset_bit(VuVirtq *vq, int mask)
> -{
> - uint16_t *flags;
> -
> - flags = (uint16_t *)((char*)vq->vring.used +
> - offsetof(struct vring_used, flags));
> - *flags &= ~mask;
> -}
> -
> -static inline void
> -vring_set_avail_event(VuVirtq *vq, uint16_t val)
> +vring_set_avail_event(VuDev *dev, VuVirtq *vq, uint16_t val)
> {
> if (!vq->notification) {
> return;
> }
>
> - *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
> + vu_stw_p(dev, &vq->vring.used->ring[vq->vring.num], val);
> }
>
> void
> @@ -2395,11 +2396,11 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq,
> int enable)
> {
> vq->notification = enable;
> if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
> - vring_set_avail_event(vq, vring_avail_idx(vq));
> + vring_set_avail_event(dev, vq, vring_avail_idx(dev, vq));
> } else if (enable) {
> - vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
> + vring_used_flags_unset_bit(dev, vq, VRING_USED_F_NO_NOTIFY);
> } else {
> - vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
> + vring_used_flags_set_bit(dev, vq, VRING_USED_F_NO_NOTIFY);
> }
> if (enable) {
> /* Expose avail event/used flags before caller checks the avail idx.
> */
> @@ -2476,14 +2477,14 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned
> int idx, size_t sz)
> struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
> int rc;
>
> - if (desc[i].flags & VRING_DESC_F_INDIRECT) {
> - if (desc[i].len % sizeof(struct vring_desc)) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) {
> vu_panic(dev, "Invalid size for indirect buffer table");
> }
>
> /* loop over the indirect descriptor table */
> - desc_addr = desc[i].addr;
> - desc_len = desc[i].len;
> + desc_addr = vu_ldq_p(dev, &desc[i].addr);
> + desc_len = vu_ldl_p(dev, &desc[i].len);
> max = desc_len / sizeof(struct vring_desc);
> read_len = desc_len;
> desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2505,10 +2506,10 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned
> int idx, size_t sz)
>
> /* Collect all the descriptors */
> do {
> - if (desc[i].flags & VRING_DESC_F_WRITE) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) {
> virtqueue_map_desc(dev, &in_num, iov + out_num,
> VIRTQUEUE_MAX_SIZE - out_num, true,
> - desc[i].addr, desc[i].len);
> + vu_ldq_p(dev, &desc[i].addr), vu_ldl_p(dev,
> &desc[i].len));
> } else {
> if (in_num) {
> vu_panic(dev, "Incorrect order for descriptors");
> @@ -2516,7 +2517,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int
> idx, size_t sz)
> }
> virtqueue_map_desc(dev, &out_num, iov,
> VIRTQUEUE_MAX_SIZE, false,
> - desc[i].addr, desc[i].len);
> + vu_ldq_p(dev, &desc[i].addr), vu_ldl_p(dev,
> &desc[i].len));
> }
>
> /* If we've got too many, that implies a descriptor loop. */
> @@ -2642,7 +2643,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
> }
>
> if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
> - vring_set_avail_event(vq, vq->last_avail_idx);
> + vring_set_avail_event(dev, vq, vq->last_avail_idx);
> }
>
> elem = vu_queue_map_desc(dev, vq, head, sz);
> @@ -2712,14 +2713,14 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
> max = vq->vring.num;
> i = elem->index;
>
> - if (desc[i].flags & VRING_DESC_F_INDIRECT) {
> - if (desc[i].len % sizeof(struct vring_desc)) {
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) {
> + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) {
> vu_panic(dev, "Invalid size for indirect buffer table");
> }
>
> /* loop over the indirect descriptor table */
> - desc_addr = desc[i].addr;
> - desc_len = desc[i].len;
> + desc_addr = vu_ldq_p(dev, &desc[i].addr);
> + desc_len = vu_ldl_p(dev, &desc[i].len);
> max = desc_len / sizeof(struct vring_desc);
> read_len = desc_len;
> desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2745,9 +2746,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
> return;
> }
>
> - if (desc[i].flags & VRING_DESC_F_WRITE) {
> - min = MIN(desc[i].len, len);
> - vu_log_write(dev, desc[i].addr, min);
> + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) {
> + min = MIN(vu_ldl_p(dev, &desc[i].len), len);
> + vu_log_write(dev, vu_ldq_p(dev, &desc[i].addr), min);
> len -= min;
> }
>
> @@ -2772,15 +2773,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
>
> idx = (idx + vq->used_idx) % vq->vring.num;
>
> - uelem.id = elem->index;
> - uelem.len = len;
> + vu_stl_p(dev, &uelem.id, elem->index);
> + vu_stl_p(dev, &uelem.len, len);
> vring_used_write(dev, vq, &uelem, idx);
> }
>
> static inline
> void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
> {
> - vq->vring.used->idx = val;
> + vu_stw_p(dev, &vq->vring.used->idx, val);
> vu_log_write(dev,
> vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
> sizeof(vq->vring.used->idx));
> --
> 2.25.4
- [RFC v2 0/3] Enable virtio-fs on s390x, Marc Hartmayer, 2020/07/17
- [RFC v2 1/3] virtio: add vhost-user-fs-ccw device, Marc Hartmayer, 2020/07/17
- [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Marc Hartmayer, 2020/07/17
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Stefan Hajnoczi, 2020/07/21
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec,
Michael S. Tsirkin <=
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Halil Pasic, 2020/07/21
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Cornelia Huck, 2020/07/28
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Halil Pasic, 2020/07/28
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Marc Hartmayer, 2020/07/28
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Michael S. Tsirkin, 2020/07/29
- Re: [RFC v2 2/3] libvhost-user: handle endianness as mandated by the spec, Marc Hartmayer, 2020/07/29
- [RFC v2 3/3] libvhost-user: fence legacy virtio devices, Marc Hartmayer, 2020/07/17
- Re: [RFC v2 0/3] Enable virtio-fs on s390x, no-reply, 2020/07/17
- Re: [RFC v2 0/3] Enable virtio-fs on s390x, no-reply, 2020/07/17