qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH-for-9.1] rdma: Remove RDMA subsystem and pvrdma device


From: Paolo Bonzini
Subject: Re: [PATCH-for-9.1] rdma: Remove RDMA subsystem and pvrdma device
Date: Thu, 28 Mar 2024 08:51:34 +0100



Il mer 27 mar 2024, 11:56 Philippe Mathieu-Daudé <philmd@linaro.org> ha scritto:
The whole RDMA subsystem was deprecated in commit e9a54265f5
("hw/rdma: Deprecate the pvrdma device and the rdma subsystem")
released in v8.2. Time to remove it.

Keep the RAM_SAVE_FLAG_HOOK definition since it might appears
in old migration streams.

Remove the dependencies on libibumad and libibverbs.

Remove the generated vmw_pvrdma/ directory from linux-headers.

Remove RDMA handling from migration.

Remove RDMA handling in GlusterFS block driver.

I don't think these two were deprecated? They are unrelated to pvrdma.

Paolo


Remove rdmacm-mux tool from contrib/.

Remove PVRDMA device.

Cc: Peter Xu <peterx@redhat.com>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Cc: Yuval Shaia <yuval.shaia.ml@gmail.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 MAINTAINERS                                   |   17 -
 docs/about/deprecated.rst                     |    9 -
 docs/about/removed-features.rst               |    4 +
 docs/devel/migration/main.rst                 |    6 -
 docs/pvrdma.txt                               |  345 --
 docs/rdma.txt                                 |  420 --
 docs/system/device-url-syntax.rst.inc         |    4 +-
 docs/system/loongarch/virt.rst                |    2 +-
 docs/system/qemu-block-drivers.rst.inc        |    1 -
 meson.build                                   |   59 -
 qapi/machine.json                             |   17 -
 qapi/migration.json                           |   31 +-
 qapi/qapi-schema.json                         |    1 -
 qapi/rdma.json                                |   38 -
 contrib/rdmacm-mux/rdmacm-mux.h               |   61 -
 hw/rdma/rdma_backend.h                        |  129 -
 hw/rdma/rdma_backend_defs.h                   |   76 -
 hw/rdma/rdma_rm.h                             |   97 -
 hw/rdma/rdma_rm_defs.h                        |  146 -
 hw/rdma/rdma_utils.h                          |   63 -
 hw/rdma/trace.h                               |    1 -
 hw/rdma/vmw/pvrdma.h                          |  144 -
 hw/rdma/vmw/pvrdma_dev_ring.h                 |   46 -
 hw/rdma/vmw/pvrdma_qp_ops.h                   |   28 -
 hw/rdma/vmw/trace.h                           |    1 -
 include/hw/rdma/rdma.h                        |   37 -
 include/monitor/hmp.h                         |    1 -
 .../infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h |  685 ---
 .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.h   |  348 --
 .../standard-headers/rdma/vmw_pvrdma-abi.h    |  310 --
 migration/migration-stats.h                   |    6 +-
 migration/migration.h                         |    9 -
 migration/options.h                           |    2 -
 migration/rdma.h                              |   69 -
 block/gluster.c                               |   39 -
 contrib/rdmacm-mux/main.c                     |  831 ----
 hw/core/machine-qmp-cmds.c                    |   32 -
 hw/rdma/rdma.c                                |   30 -
 hw/rdma/rdma_backend.c                        | 1401 ------
 hw/rdma/rdma_rm.c                             |  812 ----
 hw/rdma/rdma_utils.c                          |  126 -
 hw/rdma/vmw/pvrdma_cmd.c                      |  815 ----
 hw/rdma/vmw/pvrdma_dev_ring.c                 |  141 -
 hw/rdma/vmw/pvrdma_main.c                     |  735 ---
 hw/rdma/vmw/pvrdma_qp_ops.c                   |  298 --
 migration/migration-stats.c                   |    5 +-
 migration/migration.c                         |   31 -
 migration/options.c                           |   16 -
 migration/qemu-file.c                         |    1 -
 migration/ram.c                               |   86 +-
 migration/rdma.c                              | 4184 -----------------
 migration/savevm.c                            |    2 +-
 monitor/qmp-cmds.c                            |    1 -
 Kconfig.host                                  |    3 -
 contrib/rdmacm-mux/meson.build                |    7 -
 hmp-commands-info.hx                          |   13 -
 hw/Kconfig                                    |    1 -
 hw/meson.build                                |    1 -
 hw/rdma/Kconfig                               |    3 -
 hw/rdma/meson.build                           |   12 -
 hw/rdma/trace-events                          |   31 -
 hw/rdma/vmw/trace-events                      |   17 -
 meson_options.txt                             |    4 -
 migration/meson.build                         |    1 -
 migration/trace-events                        |   68 +-
 qapi/meson.build                              |    1 -
 qemu-options.hx                               |    6 -
 .../ci/org.centos/stream/8/x86_64/configure   |    1 -
 scripts/ci/setup/build-environment.yml        |    2 -
 scripts/coverity-scan/run-coverity-scan       |    2 +-
 scripts/meson-buildoptions.sh                 |    6 -
 scripts/update-linux-headers.sh               |   27 -
 tests/lcitool/projects/qemu.yml               |    2 -
 tests/migration/guestperf/engine.py           |    4 +-
 74 files changed, 20 insertions(+), 12991 deletions(-)
 delete mode 100644 docs/pvrdma.txt
 delete mode 100644 docs/rdma.txt
 delete mode 100644 qapi/rdma.json
 delete mode 100644 contrib/rdmacm-mux/rdmacm-mux.h
 delete mode 100644 hw/rdma/rdma_backend.h
 delete mode 100644 hw/rdma/rdma_backend_defs.h
 delete mode 100644 hw/rdma/rdma_rm.h
 delete mode 100644 hw/rdma/rdma_rm_defs.h
 delete mode 100644 hw/rdma/rdma_utils.h
 delete mode 100644 hw/rdma/trace.h
 delete mode 100644 hw/rdma/vmw/pvrdma.h
 delete mode 100644 hw/rdma/vmw/pvrdma_dev_ring.h
 delete mode 100644 hw/rdma/vmw/pvrdma_qp_ops.h
 delete mode 100644 hw/rdma/vmw/trace.h
 delete mode 100644 include/hw/rdma/rdma.h
 delete mode 100644 include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
 delete mode 100644 include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
 delete mode 100644 include/standard-headers/rdma/vmw_pvrdma-abi.h
 delete mode 100644 migration/rdma.h
 delete mode 100644 contrib/rdmacm-mux/main.c
 delete mode 100644 hw/rdma/rdma.c
 delete mode 100644 hw/rdma/rdma_backend.c
 delete mode 100644 hw/rdma/rdma_rm.c
 delete mode 100644 hw/rdma/rdma_utils.c
 delete mode 100644 hw/rdma/vmw/pvrdma_cmd.c
 delete mode 100644 hw/rdma/vmw/pvrdma_dev_ring.c
 delete mode 100644 hw/rdma/vmw/pvrdma_main.c
 delete mode 100644 hw/rdma/vmw/pvrdma_qp_ops.c
 delete mode 100644 migration/rdma.c
 delete mode 100644 contrib/rdmacm-mux/meson.build
 delete mode 100644 hw/rdma/Kconfig
 delete mode 100644 hw/rdma/meson.build
 delete mode 100644 hw/rdma/trace-events
 delete mode 100644 hw/rdma/vmw/trace-events

diff --git a/MAINTAINERS b/MAINTAINERS
index a07af6b9d4..05226cea0a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3426,13 +3426,6 @@ F: docs/devel/migration.rst
 F: qapi/migration.json
 F: tests/migration/
 F: util/userfaultfd.c
-X: migration/rdma*
-
-RDMA Migration
-R: Li Zhijian <lizhijian@fujitsu.com>
-R: Peter Xu <peterx@redhat.com>
-S: Odd Fixes
-F: migration/rdma*

 Migration dirty limit and dirty page rate
 M: Hyman Huang <yong.huang@smartx.com>
@@ -4060,16 +4053,6 @@ F: block/replication.c
 F: tests/unit/test-replication.c
 F: docs/block-replication.txt

-PVRDMA
-M: Yuval Shaia <yuval.shaia.ml@gmail.com>
-M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
-S: Odd Fixes
-F: hw/rdma/*
-F: hw/rdma/vmw/*
-F: docs/pvrdma.txt
-F: contrib/rdmacm-mux/*
-F: qapi/rdma.json
-
 Semihosting
 M: Alex Bennée <alex.bennee@linaro.org>
 S: Maintained
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 7b548519b5..29eae69e50 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -376,15 +376,6 @@ recommending to switch to their stable counterparts:
 - "Zve64f" should be replaced with "zve64f"
 - "Zve64d" should be replaced with "zve64d"

-``-device pvrdma`` and the rdma subsystem (since 8.2)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The pvrdma device and the whole rdma subsystem are in a bad shape and
-without active maintenance. The QEMU project intends to remove this
-device and subsystem from the code base in a future release without
-replacement unless somebody steps up and improves the situation.
-
-
 Block device options
 ''''''''''''''''''''

diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index f9cf874f7b..4d5bdc43b4 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -909,6 +909,10 @@ contains native support for this feature and thus use of the option
 ROM approach was obsolete. The native SeaBIOS support can be activated
 by using ``-machine graphics=off``.

+``pvrdma`` and the RDMA subsystem (removed in 9.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The 'pvrdma' device and the whole RDMA subsystem have been removed.

 Related binaries
 ----------------
diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst
index 54385a23e5..70278ce1e3 100644
--- a/docs/devel/migration/main.rst
+++ b/docs/devel/migration/main.rst
@@ -47,12 +47,6 @@ over any transport.
   QEMU interference. Note that QEMU does not flush cached file
   data/metadata at the end of migration.

-In addition, support is included for migration using RDMA, which
-transports the page data using ``RDMA``, where the hardware takes care of
-transporting the pages, and the load on the CPU is much lower.  While the
-internals of RDMA migration are a bit different, this isn't really visible
-outside the RAM migration code.
-
 All these migration protocols use the same infrastructure to
 save/restore state devices.  This infrastructure is shared with the
 savevm/loadvm functionality.
diff --git a/docs/pvrdma.txt b/docs/pvrdma.txt
deleted file mode 100644
index 5c122fe818..0000000000
--- a/docs/pvrdma.txt
+++ /dev/null
@@ -1,345 +0,0 @@
-Paravirtualized RDMA Device (PVRDMA)
-====================================
-
-
-1. Description
-===============
-PVRDMA is the QEMU implementation of VMware's paravirtualized RDMA device.
-It works with its Linux Kernel driver AS IS, no need for any special guest
-modifications.
-
-While it complies with the VMware device, it can also communicate with bare
-metal RDMA-enabled machines as peers.
-
-It does not require an RDMA HCA in the host, it can work with Soft-RoCE (rxe).
-
-It does not require the whole guest RAM to be pinned allowing memory
-over-commit and, even if not implemented yet, migration support will be
-possible with some HW assistance.
-
-A project presentation accompany this document:
-- https://blog.linuxplumbersconf.org/2017/ocw/system/presentations/4730/original/lpc-2017-pvrdma-marcel-apfelbaum-yuval-shaia.pdf
-
-
-
-2. Setup
-========
-
-
-2.1 Guest setup
-===============
-Fedora 27+ kernels work out of the box, older distributions
-require updating the kernel to 4.14 to include the pvrdma driver.
-
-However the libpvrdma library needed by User Level Software is still
-not available as part of the distributions, so the rdma-core library
-needs to be compiled and optionally installed.
-
-Please follow the instructions at:
https://github.com/linux-rdma/rdma-core.git
-
-
-2.2 Host Setup
-==============
-The pvrdma backend is an ibdevice interface that can be exposed
-either by a Soft-RoCE(rxe) device on machines with no RDMA device,
-or an HCA SRIOV function(VF/PF).
-Note that ibdevice interfaces can't be shared between pvrdma devices,
-each one requiring a separate instance (rxe or SRIOV VF).
-
-
-2.2.1 Soft-RoCE backend(rxe)
-===========================
-A stable version of rxe is required, Fedora 27+ or a Linux
-Kernel 4.14+ is preferred.
-
-The rdma_rxe module is part of the Linux Kernel but not loaded by default.
-Install the User Level library (librxe) following the instructions from:
-https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
-
-Associate an ETH interface with rxe by running:
-   rxe_cfg add eth0
-An rxe0 ibdevice interface will be created and can be used as pvrdma backend.
-
-
-2.2.2 RDMA device Virtual Function backend
-==========================================
-Nothing special is required, the pvrdma device can work not only with
-Ethernet Links, but also Infinibands Links.
-All is needed is an ibdevice with an active port, for Mellanox cards
-will be something like mlx5_6 which can be the backend.
-
-
-2.2.3 QEMU setup
-================
-Configure QEMU with --enable-rdma flag, installing
-the required RDMA libraries.
-
-
-
-3. Usage
-========
-
-
-3.1 VM Memory settings
-======================
-Currently the device is working only with memory backed RAM
-and it must be mark as "shared":
-   -m 1G \
-   -object memory-backend-ram,id=mb1,size=1G,share \
-   -numa node,memdev=mb1 \
-
-
-3.2 MAD Multiplexer
-===================
-MAD Multiplexer is a service that exposes MAD-like interface for VMs in
-order to overcome the limitation where only single entity can register with
-MAD layer to send and receive RDMA-CM MAD packets.
-
-To build rdmacm-mux run
-# make rdmacm-mux
-
-Before running the rdmacm-mux make sure that both ib_cm and rdma_cm kernel
-modules aren't loaded, otherwise the rdmacm-mux service will fail to start.
-
-The application accepts 3 command line arguments and exposes a UNIX socket
-to pass control and data to it.
--d rdma-device-name  Name of RDMA device to register with
--s unix-socket-path  Path to unix socket to listen (default /var/run/rdmacm-mux)
--p rdma-device-port  Port number of RDMA device to register with (default 1)
-The final UNIX socket file name is a concatenation of the 3 arguments so
-for example for device mlx5_0 on port 2 this /var/run/rdmacm-mux-mlx5_0-2
-will be created.
-
-pvrdma requires this service.
-
-Please refer to contrib/rdmacm-mux for more details.
-
-
-3.3 Service exposed by libvirt daemon
-=====================================
-The control over the RDMA device's GID table is done by updating the
-device's Ethernet function addresses.
-Usually the first GID entry is determined by the MAC address, the second by
-the first IPv6 address and the third by the IPv4 address. Other entries can
-be added by adding more IP addresses. The opposite is the same, i.e.
-whenever an address is removed, the corresponding GID entry is removed.
-The process is done by the network and RDMA stacks. Whenever an address is
-added the ib_core driver is notified and calls the device driver add_gid
-function which in turn update the device.
-To support this in pvrdma device the device hooks into the create_bind and
-destroy_bind HW commands triggered by pvrdma driver in guest.
-
-Whenever changed is made to the pvrdma port's GID table a special QMP
-messages is sent to be processed by libvirt to update the address of the
-backend Ethernet device.
-
-pvrdma requires that libvirt service will be up.
-
-
-3.4 PCI devices settings
-========================
-RoCE device exposes two functions - an Ethernet and RDMA.
-To support it, pvrdma device is composed of two PCI functions, an Ethernet
-device of type vmxnet3 on PCI slot 0 and a PVRDMA device on PCI slot 1. The
-Ethernet function can be used for other Ethernet purposes such as IP.
-
-
-3.5 Device parameters
-=====================
-- netdev: Specifies the Ethernet device function name on the host for
-  example enp175s0f0. For Soft-RoCE device (rxe) this would be the Ethernet
-  device used to create it.
-- ibdev: The IB device name on host for example rxe0, mlx5_0 etc.
-- mad-chardev: The name of the MAD multiplexer char device.
-- ibport: In case of multi-port device (such as Mellanox's HCA) this
-  specify the port to use. If not set 1 will be used.
-- dev-caps-max-mr-size: The maximum size of MR.
-- dev-caps-max-qp:      Maximum number of QPs.
-- dev-caps-max-cq:      Maximum number of CQs.
-- dev-caps-max-mr:      Maximum number of MRs.
-- dev-caps-max-pd:      Maximum number of PDs.
-- dev-caps-max-ah:      Maximum number of AHs.
-
-Notes:
-- The first 3 parameters are mandatory settings, the rest have their
-  defaults.
-- The last 8 parameters (the ones that prefixed by dev-caps) defines the top
-  limits but the final values is adjusted by the backend device limitations.
-- netdev can be extracted from ibdev's sysfs
-  (/sys/class/infiniband/<ibdev>/device/net/)
-
-
-3.6 Example
-===========
-Define bridge device with vmxnet3 network backend:
-<interface type='bridge'>
-  <mac address='56:b4:44:e9:62:dc'/>
-  <source bridge='bridge1'/>
-  <model type='vmxnet3'/>
-  <address type='pci' domain='0x0000' bus='0x00' slot='0x10' function='0x0' multifunction='on'/>
-</interface>
-
-Define pvrdma device:
-<qemu:commandline>
-  <qemu:arg value='-object'/>
-  <qemu:arg value='memory-backend-ram,id=mb1,size=1G,share'/>
-  <qemu:arg value='-numa'/>
-  <qemu:arg value='node,memdev=mb1'/>
-  <qemu:arg value='-chardev'/>
-  <qemu:arg value='socket,path=/var/run/rdmacm-mux-rxe0-1,id=mads'/>
-  <qemu:arg value='-device'/>
-  <qemu:arg value='pvrdma,addr=10.1,ibdev=rxe0,netdev=bridge0,mad-chardev=mads'/>
-</qemu:commandline>
-
-
-
-4. Implementation details
-=========================
-
-
-4.1 Overview
-============
-The device acts like a proxy between the Guest Driver and the host
-ibdevice interface.
-On configuration path:
- - For every hardware resource request (PD/QP/CQ/...) the pvrdma will request
-   a resource from the backend interface, maintaining a 1-1 mapping
-   between the guest and host.
-On data path:
- - Every post_send/receive received from the guest will be converted into
-   a post_send/receive for the backend. The buffers data will not be touched
-   or copied resulting in near bare-metal performance for large enough buffers.
- - Completions from the backend interface will result in completions for
-   the pvrdma device.
-
-
-4.2 PCI BARs
-============
-PCI Bars:
-       BAR 0 - MSI-X
-        MSI-X vectors:
-               (0) Command - used when execution of a command is completed.
-               (1) Async - not in use.
-               (2) Completion - used when a completion event is placed in
-                 device's CQ ring.
-       BAR 1 - Registers
-        --------------------------------------------------------
-        | VERSION |  DSR | CTL | REQ | ERR |  ICR | IMR  | MAC |
-        --------------------------------------------------------
-               DSR - Address of driver/device shared memory used
-              for the command channel, used for passing:
-                           - General info such as driver version
-                           - Address of 'command' and 'response'
-                           - Address of async ring
-                           - Address of device's CQ ring
-                           - Device capabilities
-               CTL - Device control operations (activate, reset etc)
-               IMG - Set interrupt mask
-               REQ - Command execution register
-               ERR - Operation status
-
-       BAR 2 - UAR
-        ---------------------------------------------------------
-        | QP_NUM  | SEND/RECV Flag ||  CQ_NUM |   ARM/POLL Flag |
-        ---------------------------------------------------------
-               - Offset 0 used for QP operations (send and recv)
-               - Offset 4 used for CQ operations (arm and poll)
-
-
-4.3 Major flows
-===============
-
-4.3.1 Create CQ
-===============
-    - Guest driver
-        - Allocates pages for CQ ring
-        - Creates page directory (pdir) to hold CQ ring's pages
-        - Initializes CQ ring
-        - Initializes 'Create CQ' command object (cqe, pdir etc)
-        - Copies the command to 'command' address
-        - Writes 0 into REQ register
-    - Device
-        - Reads the request object from the 'command' address
-        - Allocates CQ object and initialize CQ ring based on pdir
-        - Creates the backend CQ
-        - Writes operation status to ERR register
-        - Posts command-interrupt to guest
-    - Guest driver
-        - Reads the HW response code from ERR register
-
-4.3.2 Create QP
-===============
-    - Guest driver
-        - Allocates pages for send and receive rings
-        - Creates page directory(pdir) to hold the ring's pages
-        - Initializes 'Create QP' command object (max_send_wr,
-          send_cq_handle, recv_cq_handle, pdir etc)
-        - Copies the object to 'command' address
-        - Write 0 into REQ register
-    - Device
-        - Reads the request object from 'command' address
-        - Allocates the QP object and initialize
-            - Send and recv rings based on pdir
-            - Send and recv ring state
-        - Creates the backend QP
-        - Writes the operation status to ERR register
-        - Posts command-interrupt to guest
-    - Guest driver
-        - Reads the HW response code from ERR register
-
-4.3.3 Post receive
-==================
-    - Guest driver
-        - Initializes a wqe and place it on recv ring
-        - Write to qpn|qp_recv_bit (31) to QP offset in UAR
-    - Device
-        - Extracts qpn from UAR
-        - Walks through the ring and does the following for each wqe
-            - Prepares the backend CQE context to be used when
-              receiving completion from backend (wr_id, op_code, emu_cq_num)
-            - For each sge prepares backend sge
-            - Calls backend's post_recv
-
-4.3.4 Process backend events
-============================
-    - Done by a dedicated thread used to process backend events;
-      at initialization is attached to the device and creates
-      the communication channel.
-    - Thread main loop:
-        - Polls for completions
-        - Extracts QEMU _cq_num, wr_id and op_code from context
-        - Writes CQE to CQ ring
-        - Writes CQ number to device CQ
-        - Sends completion-interrupt to guest
-        - Deallocates context
-        - Acks the event to backend
-
-
-
-5. Limitations
-==============
-- The device obviously is limited by the Guest Linux Driver features implementation
-  of the VMware device API.
-- Memory registration mechanism requires mremap for every page in the buffer in order
-  to map it to a contiguous virtual address range. Since this is not the data path
-  it should not matter much. If the default max mr size is increased, be aware that
-  memory registration can take up to 0.5 seconds for 1GB of memory.
-- The device requires target page size to be the same as the host page size,
-  otherwise it will fail to init.
-- QEMU cannot map guest RAM from a file descriptor if a pvrdma device is attached,
-  so it can't work with huge pages. The limitation will be addressed in the future,
-  however QEMU allocates Guest RAM with MADV_HUGEPAGE so if there are enough huge
-  pages available, QEMU will use them. QEMU will fail to init if the requirements
-  are not met.
-
-
-
-6. Performance
-==============
-By design the pvrdma device exits on each post-send/receive, so for small buffers
-the performance is affected; however for medium buffers it will became close to
-bare metal and from 1MB buffers and  up it reaches bare metal performance.
-(tested with 2 VMs, the pvrdma devices connected to 2 VFs of the same device)
-
-All the above assumes no memory registration is done on data path.
diff --git a/docs/rdma.txt b/docs/rdma.txt
deleted file mode 100644
index bd8dd799a9..0000000000
--- a/docs/rdma.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-(RDMA: Remote Direct Memory Access)
-RDMA Live Migration Specification, Version # 1
-==============================================
-Wiki: https://wiki.qemu.org/Features/RDMALiveMigration
-Github: git@github.com:hinesmr/qemu.git, 'rdma' branch
-
-Copyright (C) 2013 Michael R. Hines <mrhines@us.ibm.com>
-
-An *exhaustive* paper (2010) shows additional performance details
-linked on the QEMU wiki above.
-
-Contents:
-=========
-* Introduction
-* Before running
-* Running
-* Performance
-* RDMA Migration Protocol Description
-* Versioning and Capabilities
-* QEMUFileRDMA Interface
-* Migration of VM's ram
-* Error handling
-* TODO
-
-Introduction:
-=============
-
-RDMA helps make your migration more deterministic under heavy load because
-of the significantly lower latency and higher throughput over TCP/IP. This is
-because the RDMA I/O architecture reduces the number of interrupts and
-data copies by bypassing the host networking stack. In particular, a TCP-based
-migration, under certain types of memory-bound workloads, may take a more
-unpredictable amount of time to complete the migration if the amount of
-memory tracked during each live migration iteration round cannot keep pace
-with the rate of dirty memory produced by the workload.
-
-RDMA currently comes in two flavors: both Ethernet based (RoCE, or RDMA
-over Converged Ethernet) as well as Infiniband-based. This implementation of
-migration using RDMA is capable of using both technologies because of
-the use of the OpenFabrics OFED software stack that abstracts out the
-programming model irrespective of the underlying hardware.
-
-Refer to openfabrics.org or your respective RDMA hardware vendor for
-an understanding on how to verify that you have the OFED software stack
-installed in your environment. You should be able to successfully link
-against the "librdmacm" and "libibverbs" libraries and development headers
-for a working build of QEMU to run successfully using RDMA Migration.
-
-BEFORE RUNNING:
-===============
-
-Use of RDMA during migration requires pinning and registering memory
-with the hardware. This means that memory must be physically resident
-before the hardware can transmit that memory to another machine.
-If this is not acceptable for your application or product, then the use
-of RDMA migration may in fact be harmful to co-located VMs or other
-software on the machine if there is not sufficient memory available to
-relocate the entire footprint of the virtual machine. If so, then the
-use of RDMA is discouraged and it is recommended to use standard TCP migration.
-
-Experimental: Next, decide if you want dynamic page registration.
-For example, if you have an 8GB RAM virtual machine, but only 1GB
-is in active use, then enabling this feature will cause all 8GB to
-be pinned and resident in memory. This feature mostly affects the
-bulk-phase round of the migration and can be enabled for extremely
-high-performance RDMA hardware using the following command:
-
-QEMU Monitor Command:
-$ migrate_set_capability rdma-pin-all on # disabled by default
-
-Performing this action will cause all 8GB to be pinned, so if that's
-not what you want, then please ignore this step altogether.
-
-On the other hand, this will also significantly speed up the bulk round
-of the migration, which can greatly reduce the "total" time of your migration.
-Example performance of this using an idle VM in the previous example
-can be found in the "Performance" section.
-
-Note: for very large virtual machines (hundreds of GBs), pinning all
-*all* of the memory of your virtual machine in the kernel is very expensive
-may extend the initial bulk iteration time by many seconds,
-and thus extending the total migration time. However, this will not
-affect the determinism or predictability of your migration you will
-still gain from the benefits of advanced pinning with RDMA.
-
-RUNNING:
-========
-
-First, set the migration speed to match your hardware's capabilities:
-
-QEMU Monitor Command:
-$ migrate_set_parameter max-bandwidth 40g # or whatever is the MAX of your RDMA device
-
-Next, on the destination machine, add the following to the QEMU command line:
-
-qemu ..... -incoming rdma:host:port
-
-Finally, perform the actual migration on the source machine:
-
-QEMU Monitor Command:
-$ migrate -d rdma:host:port
-
-PERFORMANCE
-===========
-
-Here is a brief summary of total migration time and downtime using RDMA:
-Using a 40gbps infiniband link performing a worst-case stress test,
-using an 8GB RAM virtual machine:
-
-Using the following command:
-$ apt-get install stress
-$ stress --vm-bytes 7500M --vm 1 --vm-keep
-
-1. Migration throughput: 26 gigabits/second.
-2. Downtime (stop time) varies between 15 and 100 milliseconds.
-
-EFFECTS of memory registration on bulk phase round:
-
-For example, in the same 8GB RAM example with all 8GB of memory in
-active use and the VM itself is completely idle using the same 40 gbps
-infiniband link:
-
-1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
-
-These numbers would of course scale up to whatever size virtual machine
-you have to migrate using RDMA.
-
-Enabling this feature does *not* have any measurable affect on
-migration *downtime*. This is because, without this feature, all of the
-memory will have already been registered already in advance during
-the bulk round and does not need to be re-registered during the successive
-iteration rounds.
-
-RDMA Protocol Description:
-==========================
-
-Migration with RDMA is separated into two parts:
-
-1. The transmission of the pages using RDMA
-2. Everything else (a control channel is introduced)
-
-"Everything else" is transmitted using a formal
-protocol now, consisting of infiniband SEND messages.
-
-An infiniband SEND message is the standard ibverbs
-message used by applications of infiniband hardware.
-The only difference between a SEND message and an RDMA
-message is that SEND messages cause notifications
-to be posted to the completion queue (CQ) on the
-infiniband receiver side, whereas RDMA messages (used
-for VM's ram) do not (to behave like an actual DMA).
-
-Messages in infiniband require two things:
-
-1. registration of the memory that will be transmitted
-2. (SEND only) work requests to be posted on both
-   sides of the network before the actual transmission
-   can occur.
-
-RDMA messages are much easier to deal with. Once the memory
-on the receiver side is registered and pinned, we're
-basically done. All that is required is for the sender
-side to start dumping bytes onto the link.
-
-(Memory is not released from pinning until the migration
-completes, given that RDMA migrations are very fast.)
-
-SEND messages require more coordination because the
-receiver must have reserved space (using a receive
-work request) on the receive queue (RQ) before QEMUFileRDMA
-can start using them to carry all the bytes as
-a control transport for migration of device state.
-
-To begin the migration, the initial connection setup is
-as follows (migration-rdma.c):
-
-1. Receiver and Sender are started (command line or libvirt):
-2. Both sides post two RQ work requests
-3. Receiver does listen()
-4. Sender does connect()
-5. Receiver accept()
-6. Check versioning and capabilities (described later)
-
-At this point, we define a control channel on top of SEND messages
-which is described by a formal protocol. Each SEND message has a
-header portion and a data portion (but together are transmitted
-as a single SEND message).
-
-Header:
-    * Length               (of the data portion, uint32, network byte order)
-    * Type                 (what command to perform, uint32, network byte order)
-    * Repeat               (Number of commands in data portion, same type only)
-
-The 'Repeat' field is here to support future multiple page registrations
-in a single message without any need to change the protocol itself
-so that the protocol is compatible against multiple versions of QEMU.
-Version #1 requires that all server implementations of the protocol must
-check this field and register all requests found in the array of commands located
-in the data portion and return an equal number of results in the response.
-The maximum number of repeats is hard-coded to 4096. This is a conservative
-limit based on the maximum size of a SEND message along with empirical
-observations on the maximum future benefit of simultaneous page registrations.
-
-The 'type' field has 12 different command values:
-     1. Unused
-     2. Error                      (sent to the source during bad things)
-     3. Ready                      (control-channel is available)
-     4. QEMU File                  (for sending non-live device state)
-     5. RAM Blocks request         (used right after connection setup)
-     6. RAM Blocks result          (used right after connection setup)
-     7. Compress page              (zap zero page and skip registration)
-     8. Register request           (dynamic chunk registration)
-     9. Register result            ('rkey' to be used by sender)
-    10. Register finished          (registration for current iteration finished)
-    11. Unregister request         (unpin previously registered memory)
-    12. Unregister finished        (confirmation that unpin completed)
-
-A single control message, as hinted above, can contain within the data
-portion an array of many commands of the same type. If there is more than
-one command, then the 'repeat' field will be greater than 1.
-
-After connection setup, message 5 & 6 are used to exchange ram block
-information and optionally pin all the memory if requested by the user.
-
-After ram block exchange is completed, we have two protocol-level
-functions, responsible for communicating control-channel commands
-using the above list of values:
-
-Logically:
-
-qemu_rdma_exchange_recv(header, expected command type)
-
-1. We transmit a READY command to let the sender know that
-   we are *ready* to receive some data bytes on the control channel.
-2. Before attempting to receive the expected command, we post another
-   RQ work request to replace the one we just used up.
-3. Block on a CQ event channel and wait for the SEND to arrive.
-4. When the send arrives, librdmacm will unblock us.
-5. Verify that the command-type and version received matches the one we expected.
-
-qemu_rdma_exchange_send(header, data, optional response header & data):
-
-1. Block on the CQ event channel waiting for a READY command
-   from the receiver to tell us that the receiver
-   is *ready* for us to transmit some new bytes.
-2. Optionally: if we are expecting a response from the command
-   (that we have not yet transmitted), let's post an RQ
-   work request to receive that data a few moments later.
-3. When the READY arrives, librdmacm will
-   unblock us and we immediately post a RQ work request
-   to replace the one we just used up.
-4. Now, we can actually post the work request to SEND
-   the requested command type of the header we were asked for.
-5. Optionally, if we are expecting a response (as before),
-   we block again and wait for that response using the additional
-   work request we previously posted. (This is used to carry
-   'Register result' commands #6 back to the sender which
-   hold the rkey need to perform RDMA. Note that the virtual address
-   corresponding to this rkey was already exchanged at the beginning
-   of the connection (described below).
-
-All of the remaining command types (not including 'ready')
-described above all use the aforementioned two functions to do the hard work:
-
-1. After connection setup, RAMBlock information is exchanged using
-   this protocol before the actual migration begins. This information includes
-   a description of each RAMBlock on the server side as well as the virtual addresses
-   and lengths of each RAMBlock. This is used by the client to determine the
-   start and stop locations of chunks and how to register them dynamically
-   before performing the RDMA operations.
-2. During runtime, once a 'chunk' becomes full of pages ready to
-   be sent with RDMA, the registration commands are used to ask the
-   other side to register the memory for this chunk and respond
-   with the result (rkey) of the registration.
-3. Also, the QEMUFile interfaces also call these functions (described below)
-   when transmitting non-live state, such as devices or to send
-   its own protocol information during the migration process.
-4. Finally, zero pages are only checked if a page has not yet been registered
-   using chunk registration (or not checked at all and unconditionally
-   written if chunk registration is disabled. This is accomplished using
-   the "Compress" command listed above. If the page *has* been registered
-   then we check the entire chunk for zero. Only if the entire chunk is
-   zero, then we send a compress command to zap the page on the other side.
-
-Versioning and Capabilities
-===========================
-Current version of the protocol is version #1.
-
-The same version applies to both for protocol traffic and capabilities
-negotiation. (i.e. There is only one version number that is referred to
-by all communication).
-
-librdmacm provides the user with a 'private data' area to be exchanged
-at connection-setup time before any infiniband traffic is generated.
-
-Header:
-    * Version (protocol version validated before send/recv occurs),
-                                               uint32, network byte order
-    * Flags   (bitwise OR of each capability),
-                                               uint32, network byte order
-
-There is no data portion of this header right now, so there is
-no length field. The maximum size of the 'private data' section
-is only 192 bytes per the Infiniband specification, so it's not
-very useful for data anyway. This structure needs to remain small.
-
-This private data area is a convenient place to check for protocol
-versioning because the user does not need to register memory to
-transmit a few bytes of version information.
-
-This is also a convenient place to negotiate capabilities
-(like dynamic page registration).
-
-If the version is invalid, we throw an error.
-
-If the version is new, we only negotiate the capabilities that the
-requested version is able to perform and ignore the rest.
-
-Currently there is only one capability in Version #1: dynamic page registration
-
-Finally: Negotiation happens with the Flags field: If the primary-VM
-sets a flag, but the destination does not support this capability, it
-will return a zero-bit for that flag and the primary-VM will understand
-that as not being an available capability and will thus disable that
-capability on the primary-VM side.
-
-QEMUFileRDMA Interface:
-=======================
-
-QEMUFileRDMA introduces a couple of new functions:
-
-1. qemu_rdma_get_buffer()               (QEMUFileOps rdma_read_ops)
-2. qemu_rdma_put_buffer()               (QEMUFileOps rdma_write_ops)
-
-These two functions are very short and simply use the protocol
-describe above to deliver bytes without changing the upper-level
-users of QEMUFile that depend on a bytestream abstraction.
-
-Finally, how do we handoff the actual bytes to get_buffer()?
-
-Again, because we're trying to "fake" a bytestream abstraction
-using an analogy not unlike individual UDP frames, we have
-to hold on to the bytes received from control-channel's SEND
-messages in memory.
-
-Each time we receive a complete "QEMU File" control-channel
-message, the bytes from SEND are copied into a small local holding area.
-
-Then, we return the number of bytes requested by get_buffer()
-and leave the remaining bytes in the holding area until get_buffer()
-comes around for another pass.
-
-If the buffer is empty, then we follow the same steps
-listed above and issue another "QEMU File" protocol command,
-asking for a new SEND message to re-fill the buffer.
-
-Migration of VM's ram:
-====================
-
-At the beginning of the migration, (migration-rdma.c),
-the sender and the receiver populate the list of RAMBlocks
-to be registered with each other into a structure.
-Then, using the aforementioned protocol, they exchange a
-description of these blocks with each other, to be used later
-during the iteration of main memory. This description includes
-a list of all the RAMBlocks, their offsets and lengths, virtual
-addresses and possibly includes pre-registered RDMA keys in case dynamic
-page registration was disabled on the server-side, otherwise not.
-
-Main memory is not migrated with the aforementioned protocol,
-but is instead migrated with normal RDMA Write operations.
-
-Pages are migrated in "chunks" (hard-coded to 1 Megabyte right now).
-Chunk size is not dynamic, but it could be in a future implementation.
-There's nothing to indicate that this is useful right now.
-
-When a chunk is full (or a flush() occurs), the memory backed by
-the chunk is registered with librdmacm is pinned in memory on
-both sides using the aforementioned protocol.
-After pinning, an RDMA Write is generated and transmitted
-for the entire chunk.
-
-Chunks are also transmitted in batches: This means that we
-do not request that the hardware signal the completion queue
-for the completion of *every* chunk. The current batch size
-is about 64 chunks (corresponding to 64 MB of memory).
-Only the last chunk in a batch must be signaled.
-This helps keep everything as asynchronous as possible
-and helps keep the hardware busy performing RDMA operations.
-
-Error-handling:
-===============
-
-Infiniband has what is called a "Reliable, Connected"
-link (one of 4 choices). This is the mode in which
-we use for RDMA migration.
-
-If a *single* message fails,
-the decision is to abort the migration entirely and
-cleanup all the RDMA descriptors and unregister all
-the memory.
-
-After cleanup, the Virtual Machine is returned to normal
-operation the same way that would happen if the TCP
-socket is broken during a non-RDMA based migration.
-
-TODO:
-=====
-1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
-   are not compatible with infiniband memory pinning and will result in
-   an aborted migration (but with the source VM left unaffected).
-2. Use of the recent /proc/<pid>/pagemap would likely speed up
-   the use of KSM and ballooning while using RDMA.
-3. Also, some form of balloon-device usage tracking would also
-   help alleviate some issues.
-4. Use LRU to provide more fine-grained direction of UNREGISTER
-   requests for unpinning memory in an overcommitted environment.
-5. Expose UNREGISTER support to the user by way of workload-specific
-   hints about application behavior.
diff --git a/docs/system/device-url-syntax.rst.inc b/docs/system/device-url-syntax.rst.inc
index 7dbc525fa8..43b5c2596b 100644
--- a/docs/system/device-url-syntax.rst.inc
+++ b/docs/system/device-url-syntax.rst.inc
@@ -87,8 +87,8 @@ These are specified using a special URL syntax.

 ``GlusterFS``
    GlusterFS is a user space distributed file system. QEMU supports the
-   use of GlusterFS volumes for hosting VM disk images using TCP, Unix
-   Domain Sockets and RDMA transport protocols.
+   use of GlusterFS volumes for hosting VM disk images using TCP and Unix
+   Domain Sockets transport protocols.

    Syntax for specifying a VM disk image on GlusterFS volume is

diff --git a/docs/system/loongarch/virt.rst b/docs/system/loongarch/virt.rst
index c37268b404..0a8e0766e4 100644
--- a/docs/system/loongarch/virt.rst
+++ b/docs/system/loongarch/virt.rst
@@ -39,7 +39,7 @@ can be accessed by following steps.

 .. code-block:: bash

-  ./configure --disable-rdma --disable-pvrdma --prefix=/usr \
+  ./configure --prefix=/usr \
               --target-list="loongarch64-softmmu" \
               --disable-libiscsi --disable-libnfs --disable-libpmem \
               --disable-glusterfs --enable-libusb --enable-usb-redir \
diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc
index 105cb9679c..384e95ba76 100644
--- a/docs/system/qemu-block-drivers.rst.inc
+++ b/docs/system/qemu-block-drivers.rst.inc
@@ -737,7 +737,6 @@ Examples
   |qemu_system| -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
   |qemu_system| -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
   |qemu_system| -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
-  |qemu_system| -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img
   |qemu_system| -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log
   |qemu_system| 'json:{"driver":"qcow2",
                            "file":{"driver":"gluster",
diff --git a/meson.build b/meson.build
index c9c3217ba4..bd65abad13 100644
--- a/meson.build
+++ b/meson.build
@@ -1854,21 +1854,6 @@ if numa.found() and not cc.links('''
   endif
 endif

-rdma = not_found
-if not get_option('rdma').auto() or have_system
-  libumad = cc.find_library('ibumad', required: get_option('rdma'))
-  rdma_libs = [cc.find_library('rdmacm', has_headers: ['rdma/rdma_cma.h'],
-                               required: get_option('rdma')),
-               cc.find_library('ibverbs', required: get_option('rdma')),
-               libumad]
-  rdma = declare_dependency(dependencies: rdma_libs)
-  foreach lib: rdma_libs
-    if not lib.found()
-      rdma = not_found
-    endif
-  endforeach
-endif
-
 cacard = not_found
 if not get_option('smartcard').auto() or have_system
   cacard = dependency('libcacard', required: get_option('smartcard'),
@@ -2246,7 +2231,6 @@ endif
 config_host_data.set('CONFIG_OPENGL', opengl.found())
 config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
 config_host_data.set('CONFIG_RBD', rbd.found())
-config_host_data.set('CONFIG_RDMA', rdma.found())
 config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
 config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
 config_host_data.set('CONFIG_SDL', sdl.found())
@@ -2399,12 +2383,6 @@ if rbd.found()
                                        dependencies: rbd,
                                        prefix: '#include <rbd/librbd.h>'))
 endif
-if rdma.found()
-  config_host_data.set('HAVE_IBV_ADVISE_MR',
-                       cc.has_function('ibv_advise_mr',
-                                       dependencies: rdma,
-                                       prefix: '#include <infiniband/verbs.h>'))
-endif

 have_asan_fiber = false
 if get_option('sanitizers') and \
@@ -2829,37 +2807,6 @@ config_host_data.set('CONFIG_ARM_AES_BUILTIN', cc.compiles('''
     void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
   '''))

-have_pvrdma = get_option('pvrdma') \
-  .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
-  .require(cc.compiles(gnu_source_prefix + '''
-    #include <sys/mman.h>
-    int main(void)
-    {
-      char buf = 0;
-      void *addr = &buf;
-      addr = mremap(addr, 0, 1, MREMAP_MAYMOVE | MREMAP_FIXED);
-
-      return 0;
-    }'''), error_message: 'PVRDMA requires mremap').allowed()
-
-if have_pvrdma
-  config_host_data.set('LEGACY_RDMA_REG_MR', not cc.links('''
-    #include <infiniband/verbs.h>
-    int main(void)
-    {
-      struct ibv_mr *mr;
-      struct ibv_pd *pd = NULL;
-      size_t length = 10;
-      uint64_t iova = 0;
-      int access = 0;
-      void *addr = NULL;
-
-      mr = ibv_reg_mr_iova(pd, addr, length, iova, access);
-      ibv_dereg_mr(mr);
-      return 0;
-    }'''))
-endif
-
 if get_option('membarrier').disabled()
   have_membarrier = false
 elif host_os == 'windows'
@@ -2993,7 +2940,6 @@ host_kconfig = \
   (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
   (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
   (host_os == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
-  (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
   (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
   (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + \
   (hv_balloon ? ['CONFIG_HV_BALLOON_POSSIBLE=y'] : [])
@@ -3357,8 +3303,6 @@ if have_system
     'hw/pci',
     'hw/pci-host',
     'hw/ppc',
-    'hw/rdma',
-    'hw/rdma/vmw',
     'hw/rtc',
     'hw/s390x',
     'hw/scsi',
@@ -4028,7 +3972,6 @@ if have_tools
     }]
   endforeach

-  subdir('contrib/rdmacm-mux')
   subdir('contrib/elf2dmp')

   executable('qemu-edid', files('qemu-edid.c', 'hw/display/edid-generate.c'),
@@ -4433,8 +4376,6 @@ summary_info += {'Multipath support': mpathpersist}
 summary_info += {'Linux AIO support': libaio}
 summary_info += {'Linux io_uring support': linux_io_uring}
 summary_info += {'ATTR/XATTR support': libattr}
-summary_info += {'RDMA support':      rdma}
-summary_info += {'PVRDMA support':    have_pvrdma}
 summary_info += {'fdt support':       fdt_opt == 'disabled' ? false : fdt_opt}
 summary_info += {'libcap-ng support': libcap_ng}
 summary_info += {'bpf support':       libbpf}
diff --git a/qapi/machine.json b/qapi/machine.json
index e8b60641f2..e9f0f0c49a 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1737,23 +1737,6 @@
   'returns': 'HumanReadableText',
   'features': [ 'unstable' ] }

-##
-# @x-query-rdma:
-#
-# Query RDMA state
-#
-# Features:
-#
-# @unstable: This command is meant for debugging.
-#
-# Returns: RDMA state
-#
-# Since: 6.2
-##
-{ 'command': 'x-query-rdma',
-  'returns': 'HumanReadableText',
-  'features': [ 'unstable' ] }
-
 ##
 # @x-query-roms:
 #
diff --git a/qapi/migration.json b/qapi/migration.json
index 8c65b90328..9a56d403be 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -221,8 +221,8 @@
 #
 # @setup-time: amount of setup time in milliseconds *before* the
 #     iterations begin but *after* the QMP command is issued.  This is
-#     designed to provide an accounting of any activities (such as
-#     RDMA pinning) which may be expensive, but do not actually occur
+#     designed to provide an accounting of any activities which may be
+#     expensive, but do not actually occur
 #     during the iterative migration rounds themselves.  (since 1.6)
 #
 # @cpu-throttle-percentage: percentage of time guest cpus are being
@@ -430,10 +430,6 @@
 #     for certain work loads, by sending compressed difference of the
 #     pages
 #
-# @rdma-pin-all: Controls whether or not the entire VM memory
-#     footprint is mlock()'d on demand or all at once.  Refer to
-#     docs/rdma.txt for usage.  Disabled by default.  (since 2.0)
-#
 # @zero-blocks: During storage migration encode blocks of zeroes
 #     efficiently.  This essentially saves 1MB of zeroes per block on
 #     the wire.  Enabling requires source and target VM to support
@@ -547,7 +543,7 @@
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
-  'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
+  'data': ['xbzrle', 'auto-converge', 'zero-blocks',
            { 'name': 'compress', 'features': [ 'deprecated' ] },
            'events', 'postcopy-ram',
            { 'name': 'x-colo', 'features': [ 'unstable' ] },
@@ -606,7 +602,6 @@
 #     -> { "execute": "query-migrate-capabilities" }
 #     <- { "return": [
 #           {"state": false, "capability": "xbzrle"},
-#           {"state": false, "capability": "rdma-pin-all"},
 #           {"state": false, "capability": "auto-converge"},
 #           {"state": false, "capability": "zero-blocks"},
 #           {"state": false, "capability": "compress"},
@@ -1654,14 +1649,12 @@
 #
 # @exec: Direct the migration stream to another process.
 #
-# @rdma: Migrate via RDMA.
-#
 # @file: Direct the migration stream to a file.
 #
 # Since: 8.2
 ##
 { 'enum': 'MigrationAddressType',
-  'data': [ 'socket', 'exec', 'rdma', 'file' ] }
+  'data': [ 'socket', 'exec', 'file' ] }

 ##
 # @FileMigrationArgs:
@@ -1701,7 +1694,6 @@
   'data': {
     'socket': 'SocketAddress',
     'exec': 'MigrationExecCommand',
-    'rdma': 'InetSocketAddress',
     'file': 'FileMigrationArgs' } }

 ##
@@ -1804,14 +1796,6 @@
 #     -> { "execute": "migrate",
 #          "arguments": {
 #              "channels": [ { "channel-type": "main",
-#                              "addr": { "transport": "rdma",
-#                                        "host": "10.12.34.9",
-#                                        "port": "1050" } } ] } }
-#     <- { "return": {} }
-#
-#     -> { "execute": "migrate",
-#          "arguments": {
-#              "channels": [ { "channel-type": "main",
 #                              "addr": { "transport": "file",
 #                                        "filename": "/tmp/migfile",
 #                                        "offset": "0x1000" } } ] } }
@@ -1879,13 +1863,6 @@
 #                                                  "/some/sock" ] } } ] } }
 #     <- { "return": {} }
 #
-#     -> { "execute": "migrate-incoming",
-#          "arguments": {
-#              "channels": [ { "channel-type": "main",
-#                              "addr": { "transport": "rdma",
-#                                        "host": "10.12.34.9",
-#                                        "port": "1050" } } ] } }
-#     <- { "return": {} }
 ##
 { 'command': 'migrate-incoming',
              'data': {'*uri': 'str',
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 8304d45625..5e33da7228 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -54,7 +54,6 @@
 { 'include': 'dump.json' }
 { 'include': 'net.json' }
 { 'include': 'ebpf.json' }
-{ 'include': 'rdma.json' }
 { 'include': 'rocker.json' }
 { 'include': 'tpm.json' }
 { 'include': 'ui.json' }
diff --git a/qapi/rdma.json b/qapi/rdma.json
deleted file mode 100644
index 195c001850..0000000000
--- a/qapi/rdma.json
+++ /dev/null
@@ -1,38 +0,0 @@
-# -*- Mode: Python -*-
-# vim: filetype=python
-#
-
-##
-# = RDMA device
-##
-
-##
-# @RDMA_GID_STATUS_CHANGED:
-#
-# Emitted when guest driver adds/deletes GID to/from device
-#
-# @netdev: RoCE Network Device name
-#
-# @gid-status: Add or delete indication
-#
-# @subnet-prefix: Subnet Prefix
-#
-# @interface-id: Interface ID
-#
-# Since: 4.0
-#
-# Example:
-#
-#     <- {"timestamp": {"seconds": 1541579657, "microseconds": 986760},
-#         "event": "RDMA_GID_STATUS_CHANGED",
-#         "data":
-#             {"netdev": "bridge0",
-#             "interface-id": 15880512517475447892,
-#             "gid-status": true,
-#             "subnet-prefix": 33022}}
-##
-{ 'event': 'RDMA_GID_STATUS_CHANGED',
-  'data': { 'netdev'        : 'str',
-            'gid-status'    : 'bool',
-            'subnet-prefix' : 'uint64',
-            'interface-id'  : 'uint64' } }
diff --git a/contrib/rdmacm-mux/rdmacm-mux.h b/contrib/rdmacm-mux/rdmacm-mux.h
deleted file mode 100644
index 07a4722913..0000000000
--- a/contrib/rdmacm-mux/rdmacm-mux.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * QEMU paravirtual RDMA - rdmacm-mux declarations
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMACM_MUX_H
-#define RDMACM_MUX_H
-
-#include "linux/if.h"
-#include <infiniband/verbs.h>
-#include <infiniband/umad.h>
-#include <rdma/rdma_user_cm.h>
-
-typedef enum RdmaCmMuxMsgType {
-    RDMACM_MUX_MSG_TYPE_REQ   = 0,
-    RDMACM_MUX_MSG_TYPE_RESP  = 1,
-} RdmaCmMuxMsgType;
-
-typedef enum RdmaCmMuxOpCode {
-    RDMACM_MUX_OP_CODE_REG   = 0,
-    RDMACM_MUX_OP_CODE_UNREG = 1,
-    RDMACM_MUX_OP_CODE_MAD   = 2,
-} RdmaCmMuxOpCode;
-
-typedef enum RdmaCmMuxErrCode {
-    RDMACM_MUX_ERR_CODE_OK        = 0,
-    RDMACM_MUX_ERR_CODE_EINVAL    = 1,
-    RDMACM_MUX_ERR_CODE_EEXIST    = 2,
-    RDMACM_MUX_ERR_CODE_EACCES    = 3,
-    RDMACM_MUX_ERR_CODE_ENOTFOUND = 4,
-} RdmaCmMuxErrCode;
-
-typedef struct RdmaCmMuxHdr {
-    RdmaCmMuxMsgType msg_type;
-    RdmaCmMuxOpCode op_code;
-    union ibv_gid sgid;
-    RdmaCmMuxErrCode err_code;
-} RdmaCmUHdr;
-
-typedef struct RdmaCmUMad {
-    struct ib_user_mad hdr;
-    char mad[RDMA_MAX_PRIVATE_DATA];
-} RdmaCmUMad;
-
-typedef struct RdmaCmMuxMsg {
-    RdmaCmUHdr hdr;
-    int umad_len;
-    RdmaCmUMad umad;
-} RdmaCmMuxMsg;
-
-#endif
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
deleted file mode 100644
index 225af481e0..0000000000
--- a/hw/rdma/rdma_backend.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- *  RDMA device: Definitions of Backend Device functions
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMA_BACKEND_H
-#define RDMA_BACKEND_H
-
-#include "qapi/error.h"
-#include "chardev/char-fe.h"
-
-#include "rdma_rm_defs.h"
-#include "rdma_backend_defs.h"
-
-/* Vendor Errors */
-#define VENDOR_ERR_FAIL_BACKEND     0x201
-#define VENDOR_ERR_TOO_MANY_SGES    0x202
-#define VENDOR_ERR_NOMEM            0x203
-#define VENDOR_ERR_QP0              0x204
-#define VENDOR_ERR_INV_NUM_SGE      0x205
-#define VENDOR_ERR_MAD_SEND         0x206
-#define VENDOR_ERR_INVLKEY          0x207
-#define VENDOR_ERR_MR_SMALL         0x208
-#define VENDOR_ERR_INV_MAD_BUFF     0x209
-#define VENDOR_ERR_INV_GID_IDX      0x210
-
-/* Add definition for QP0 and QP1 as there is no userspace enums for them */
-enum ibv_special_qp_type {
-    IBV_QPT_SMI = 0,
-    IBV_QPT_GSI = 1,
-};
-
-static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp)
-{
-    return qp->ibqp ? qp->ibqp->qp_num : 1;
-}
-
-static inline uint32_t rdma_backend_mr_lkey(const RdmaBackendMR *mr)
-{
-    return mr->ibmr ? mr->ibmr->lkey : 0;
-}
-
-static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr)
-{
-    return mr->ibmr ? mr->ibmr->rkey : 0;
-}
-
-int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
-                      RdmaDeviceResources *rdma_dev_res,
-                      const char *backend_device_name, uint8_t port_num,
-                      struct ibv_device_attr *dev_attr,
-                      CharBackend *mad_chr_be);
-void rdma_backend_fini(RdmaBackendDev *backend_dev);
-int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
-                         union ibv_gid *gid);
-int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
-                         union ibv_gid *gid);
-int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
-                               union ibv_gid *gid);
-void rdma_backend_start(RdmaBackendDev *backend_dev);
-void rdma_backend_stop(RdmaBackendDev *backend_dev);
-void rdma_backend_register_comp_handler(void (*handler)(void *ctx,
-                                                        struct ibv_wc *wc));
-void rdma_backend_unregister_comp_handler(void);
-
-int rdma_backend_query_port(RdmaBackendDev *backend_dev,
-                            struct ibv_port_attr *port_attr);
-int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd);
-void rdma_backend_destroy_pd(RdmaBackendPD *pd);
-
-int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
-                           size_t length, uint64_t guest_start, int access);
-void rdma_backend_destroy_mr(RdmaBackendMR *mr);
-
-int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq,
-                           int cqe);
-void rdma_backend_destroy_cq(RdmaBackendCQ *cq);
-void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
-
-int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
-                           RdmaBackendPD *pd, RdmaBackendCQ *scq,
-                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
-                           uint32_t max_send_wr, uint32_t max_recv_wr,
-                           uint32_t max_send_sge, uint32_t max_recv_sge);
-int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
-                               uint8_t qp_type, uint32_t qkey);
-int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
-                              uint8_t qp_type, uint8_t sgid_idx,
-                              union ibv_gid *dgid, uint32_t dqpn,
-                              uint32_t rq_psn, uint32_t qkey, bool use_qkey);
-int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
-                              uint32_t sq_psn, uint32_t qkey, bool use_qkey);
-int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
-                          int attr_mask, struct ibv_qp_init_attr *init_attr);
-void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res);
-
-void rdma_backend_post_send(RdmaBackendDev *backend_dev,
-                            RdmaBackendQP *qp, uint8_t qp_type,
-                            struct ibv_sge *sge, uint32_t num_sge,
-                            uint8_t sgid_idx, union ibv_gid *sgid,
-                            union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
-                            void *ctx);
-void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
-                            RdmaBackendQP *qp, uint8_t qp_type,
-                            struct ibv_sge *sge, uint32_t num_sge, void *ctx);
-
-int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
-                            uint32_t max_wr, uint32_t max_sge,
-                            uint32_t srq_limit);
-int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
-int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
-                            int srq_attr_mask);
-void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
-                              RdmaDeviceResources *dev_res);
-void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
-                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
-                                uint32_t num_sge, void *ctx);
-
-#endif
diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
deleted file mode 100644
index 4e6c0ad695..0000000000
--- a/hw/rdma/rdma_backend_defs.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  RDMA device: Definitions of Backend Device structures
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMA_BACKEND_DEFS_H
-#define RDMA_BACKEND_DEFS_H
-
-#include "qemu/thread.h"
-#include "chardev/char-fe.h"
-#include <infiniband/verbs.h>
-#include "contrib/rdmacm-mux/rdmacm-mux.h"
-#include "rdma_utils.h"
-
-typedef struct RdmaDeviceResources RdmaDeviceResources;
-
-typedef struct RdmaBackendThread {
-    QemuThread thread;
-    bool run; /* Set by thread manager to let thread know it should exit */
-    bool is_running; /* Set by the thread to report its status */
-} RdmaBackendThread;
-
-typedef struct RdmaCmMux {
-    CharBackend *chr_be;
-    int can_receive;
-} RdmaCmMux;
-
-typedef struct RdmaBackendDev {
-    RdmaBackendThread comp_thread;
-    PCIDevice *dev;
-    RdmaDeviceResources *rdma_dev_res;
-    struct ibv_device *ib_dev;
-    struct ibv_context *context;
-    struct ibv_comp_channel *channel;
-    uint8_t port_num;
-    RdmaProtectedGQueue recv_mads_list;
-    RdmaCmMux rdmacm_mux;
-} RdmaBackendDev;
-
-typedef struct RdmaBackendPD {
-    struct ibv_pd *ibpd;
-} RdmaBackendPD;
-
-typedef struct RdmaBackendMR {
-    struct ibv_pd *ibpd;
-    struct ibv_mr *ibmr;
-} RdmaBackendMR;
-
-typedef struct RdmaBackendCQ {
-    RdmaBackendDev *backend_dev;
-    struct ibv_cq *ibcq;
-} RdmaBackendCQ;
-
-typedef struct RdmaBackendQP {
-    struct ibv_pd *ibpd;
-    struct ibv_qp *ibqp;
-    uint8_t sgid_idx;
-    RdmaProtectedGSList cqe_ctx_list;
-} RdmaBackendQP;
-
-typedef struct RdmaBackendSRQ {
-    struct ibv_srq *ibsrq;
-    RdmaProtectedGSList cqe_ctx_list;
-} RdmaBackendSRQ;
-
-#endif
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
deleted file mode 100644
index d69a917795..0000000000
--- a/hw/rdma/rdma_rm.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * RDMA device: Definitions of Resource Manager functions
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMA_RM_H
-#define RDMA_RM_H
-
-#include "qapi/error.h"
-#include "rdma_backend_defs.h"
-#include "rdma_rm_defs.h"
-
-int rdma_rm_init(RdmaDeviceResources *dev_res,
-                 struct ibv_device_attr *dev_attr);
-void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                  const char *ifname);
-
-int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                     uint32_t *pd_handle, uint32_t ctx_handle);
-RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle);
-void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle);
-
-int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
-                     uint64_t guest_start, uint64_t guest_length,
-                     void *host_virt, int access_flags, uint32_t *mr_handle,
-                     uint32_t *lkey, uint32_t *rkey);
-RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle);
-void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle);
-
-int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
-                     uint32_t *uc_handle);
-RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle);
-void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle);
-
-int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                     uint32_t cqe, uint32_t *cq_handle, void *opaque);
-RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle);
-void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
-                           bool notify);
-void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle);
-
-int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
-                     uint8_t qp_type, uint32_t max_send_wr,
-                     uint32_t max_send_sge, uint32_t send_cq_handle,
-                     uint32_t max_recv_wr, uint32_t max_recv_sge,
-                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
-                     uint8_t is_srq, uint32_t srq_handle);
-RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
-int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                      uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
-                      union ibv_gid *dgid, uint32_t dqpn,
-                      enum ibv_qp_state qp_state, uint32_t qkey,
-                      uint32_t rq_psn, uint32_t sq_psn);
-int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                     uint32_t qp_handle, struct ibv_qp_attr *attr,
-                     int attr_mask, struct ibv_qp_init_attr *init_attr);
-void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);
-
-RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
-int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
-                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
-                      uint32_t *srq_handle, void *opaque);
-int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
-                      struct ibv_srq_attr *srq_attr);
-int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
-                       struct ibv_srq_attr *srq_attr, int srq_attr_mask);
-void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
-
-int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
-                          void *ctx);
-void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
-void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
-
-int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                    const char *ifname, union ibv_gid *gid, int gid_idx);
-int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
-                    const char *ifname, int gid_idx);
-int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
-                                  RdmaBackendDev *backend_dev, int sgid_idx);
-static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
-                                             int sgid_idx)
-{
-    return &dev_res->port.gid_tbl[sgid_idx].gid;
-}
-void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf);
-
-#endif
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
deleted file mode 100644
index 534f2f74d3..0000000000
--- a/hw/rdma/rdma_rm_defs.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * RDMA device: Definitions of Resource Manager structures
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMA_RM_DEFS_H
-#define RDMA_RM_DEFS_H
-
-#include "rdma_backend_defs.h"
-
-#define MAX_PORTS             1 /* Do not change - we support only one port */
-#define MAX_PORT_GIDS         255
-#define MAX_GIDS              MAX_PORT_GIDS
-#define MAX_PORT_PKEYS        1
-#define MAX_PKEYS             MAX_PORT_PKEYS
-#define MAX_UCS               512
-#define MAX_MR_SIZE           (1UL << 27)
-#define MAX_QP                1024
-#define MAX_SGE               4
-#define MAX_CQ                2048
-#define MAX_MR                1024
-#define MAX_PD                1024
-#define MAX_QP_RD_ATOM        16
-#define MAX_QP_INIT_RD_ATOM   16
-#define MAX_AH                64
-#define MAX_SRQ               512
-
-#define MAX_RM_TBL_NAME             16
-#define MAX_CONSEQ_EMPTY_POLL_CQ    4096 /* considered as error above this */
-
-typedef struct RdmaRmResTbl {
-    char name[MAX_RM_TBL_NAME];
-    QemuMutex lock;
-    unsigned long *bitmap;
-    size_t tbl_sz;
-    size_t res_sz;
-    void *tbl;
-    uint32_t used; /* number of used entries in the table */
-} RdmaRmResTbl;
-
-typedef struct RdmaRmPD {
-    RdmaBackendPD backend_pd;
-    uint32_t ctx_handle;
-} RdmaRmPD;
-
-typedef enum CQNotificationType {
-    CNT_CLEAR,
-    CNT_ARM,
-    CNT_SET,
-} CQNotificationType;
-
-typedef struct RdmaRmCQ {
-    RdmaBackendCQ backend_cq;
-    void *opaque;
-    CQNotificationType notify;
-} RdmaRmCQ;
-
-/* MR (DMA region) */
-typedef struct RdmaRmMR {
-    RdmaBackendMR backend_mr;
-    void *virt;
-    uint64_t start;
-    size_t length;
-    uint32_t pd_handle;
-    uint32_t lkey;
-    uint32_t rkey;
-} RdmaRmMR;
-
-typedef struct RdmaRmUC {
-    uint64_t uc_handle;
-} RdmaRmUC;
-
-typedef struct RdmaRmQP {
-    RdmaBackendQP backend_qp;
-    void *opaque;
-    uint32_t qp_type;
-    uint32_t qpn;
-    uint32_t send_cq_handle;
-    uint32_t recv_cq_handle;
-    enum ibv_qp_state qp_state;
-    uint8_t is_srq;
-} RdmaRmQP;
-
-typedef struct RdmaRmSRQ {
-    RdmaBackendSRQ backend_srq;
-    uint32_t recv_cq_handle;
-    void *opaque;
-} RdmaRmSRQ;
-
-typedef struct RdmaRmGid {
-    union ibv_gid gid;
-    int backend_gid_index;
-} RdmaRmGid;
-
-typedef struct RdmaRmPort {
-    RdmaRmGid gid_tbl[MAX_PORT_GIDS];
-    enum ibv_port_state state;
-} RdmaRmPort;
-
-typedef struct RdmaRmStats {
-    uint64_t tx;
-    uint64_t tx_len;
-    uint64_t tx_err;
-    uint64_t rx_bufs;
-    uint64_t rx_bufs_len;
-    uint64_t rx_bufs_err;
-    uint64_t rx_srq;
-    uint64_t completions;
-    uint64_t mad_tx;
-    uint64_t mad_tx_err;
-    uint64_t mad_rx;
-    uint64_t mad_rx_err;
-    uint64_t mad_rx_bufs;
-    uint64_t mad_rx_bufs_err;
-    uint64_t poll_cq_from_bk;
-    uint64_t poll_cq_from_guest;
-    uint64_t poll_cq_from_guest_empty;
-    uint64_t poll_cq_ppoll_to;
-    uint32_t missing_cqe;
-} RdmaRmStats;
-
-struct RdmaDeviceResources {
-    RdmaRmPort port;
-    RdmaRmResTbl pd_tbl;
-    RdmaRmResTbl mr_tbl;
-    RdmaRmResTbl uc_tbl;
-    RdmaRmResTbl qp_tbl;
-    RdmaRmResTbl cq_tbl;
-    RdmaRmResTbl cqe_ctx_tbl;
-    RdmaRmResTbl srq_tbl;
-    GHashTable *qp_hash; /* Keeps mapping between real and emulated */
-    QemuMutex lock;
-    RdmaRmStats stats;
-};
-
-#endif
diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h
deleted file mode 100644
index 54e4f56edd..0000000000
--- a/hw/rdma/rdma_utils.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * RDMA device: Debug utilities
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMA_UTILS_H
-#define RDMA_UTILS_H
-
-#include "qemu/error-report.h"
-#include "sysemu/dma.h"
-
-#define rdma_error_report(fmt, ...) \
-    error_report("%s: " fmt, "rdma", ## __VA_ARGS__)
-#define rdma_warn_report(fmt, ...) \
-    warn_report("%s: " fmt, "rdma", ## __VA_ARGS__)
-#define rdma_info_report(fmt, ...) \
-    info_report("%s: " fmt, "rdma", ## __VA_ARGS__)
-
-typedef struct RdmaProtectedGQueue {
-    QemuMutex lock;
-    GQueue *list;
-} RdmaProtectedGQueue;
-
-typedef struct RdmaProtectedGSList {
-    QemuMutex lock;
-    GSList *list;
-} RdmaProtectedGSList;
-
-void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t len);
-void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len);
-void rdma_protected_gqueue_init(RdmaProtectedGQueue *list);
-void rdma_protected_gqueue_destroy(RdmaProtectedGQueue *list);
-void rdma_protected_gqueue_append_int64(RdmaProtectedGQueue *list,
-                                        int64_t value);
-int64_t rdma_protected_gqueue_pop_int64(RdmaProtectedGQueue *list);
-void rdma_protected_gslist_init(RdmaProtectedGSList *list);
-void rdma_protected_gslist_destroy(RdmaProtectedGSList *list);
-void rdma_protected_gslist_append_int32(RdmaProtectedGSList *list,
-                                        int32_t value);
-void rdma_protected_gslist_remove_int32(RdmaProtectedGSList *list,
-                                        int32_t value);
-
-static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr)
-{
-    memcpy(eui, addr, 3);
-    eui[3] = 0xFF;
-    eui[4] = 0xFE;
-    memcpy(eui + 5, addr + 3, 3);
-    eui[0] ^= 2;
-}
-
-#endif
diff --git a/hw/rdma/trace.h b/hw/rdma/trace.h
deleted file mode 100644
index b3fa8ebc51..0000000000
--- a/hw/rdma/trace.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "trace/trace-hw_rdma.h"
diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
deleted file mode 100644
index 4cbc10c980..0000000000
--- a/hw/rdma/vmw/pvrdma.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * QEMU VMWARE paravirtual RDMA device definitions
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef PVRDMA_PVRDMA_H
-#define PVRDMA_PVRDMA_H
-
-#include "qemu/units.h"
-#include "qemu/notify.h"
-#include "hw/pci/msix.h"
-#include "hw/pci/pci_device.h"
-#include "chardev/char-fe.h"
-#include "hw/net/vmxnet3_defs.h"
-
-#include "../rdma_backend_defs.h"
-#include "../rdma_rm_defs.h"
-
-#include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h"
-#include "pvrdma_dev_ring.h"
-#include "qom/object.h"
-
-/* BARs */
-#define RDMA_MSIX_BAR_IDX    0
-#define RDMA_REG_BAR_IDX     1
-#define RDMA_UAR_BAR_IDX     2
-#define RDMA_BAR0_MSIX_SIZE  (16 * KiB)
-#define RDMA_BAR1_REGS_SIZE  64
-#define RDMA_BAR2_UAR_SIZE   (0x1000 * MAX_UCS) /* each uc gets page */
-
-/* MSIX */
-#define RDMA_MAX_INTRS       3
-#define RDMA_MSIX_TABLE      0x0000
-#define RDMA_MSIX_PBA        0x2000
-
-/* Interrupts Vectors */
-#define INTR_VEC_CMD_RING            0
-#define INTR_VEC_CMD_ASYNC_EVENTS    1
-#define INTR_VEC_CMD_COMPLETION_Q    2
-
-/* HW attributes */
-#define PVRDMA_HW_NAME       "pvrdma"
-#define PVRDMA_HW_VERSION    17
-#define PVRDMA_FW_VERSION    14
-
-/* Some defaults */
-#define PVRDMA_PKEY          0xFFFF
-
-typedef struct DSRInfo {
-    dma_addr_t dma;
-    struct pvrdma_device_shared_region *dsr;
-
-    union pvrdma_cmd_req *req;
-    union pvrdma_cmd_resp *rsp;
-
-    PvrdmaRingState *async_ring_state;
-    PvrdmaRing async;
-
-    PvrdmaRingState *cq_ring_state;
-    PvrdmaRing cq;
-} DSRInfo;
-
-typedef struct PVRDMADevStats {
-    uint64_t commands;
-    uint64_t regs_reads;
-    uint64_t regs_writes;
-    uint64_t uar_writes;
-    uint64_t interrupts;
-} PVRDMADevStats;
-
-struct PVRDMADev {
-    PCIDevice parent_obj;
-    MemoryRegion msix;
-    MemoryRegion regs;
-    uint32_t regs_data[RDMA_BAR1_REGS_SIZE];
-    MemoryRegion uar;
-    uint32_t uar_data[RDMA_BAR2_UAR_SIZE];
-    DSRInfo dsr_info;
-    int interrupt_mask;
-    struct ibv_device_attr dev_attr;
-    uint64_t node_guid;
-    char *backend_eth_device_name;
-    char *backend_device_name;
-    uint8_t backend_port_num;
-    RdmaBackendDev backend_dev;
-    RdmaDeviceResources rdma_dev_res;
-    CharBackend mad_chr;
-    VMXNET3State *func0;
-    Notifier shutdown_notifier;
-    PVRDMADevStats stats;
-};
-typedef struct PVRDMADev PVRDMADev;
-DECLARE_INSTANCE_CHECKER(PVRDMADev, PVRDMA_DEV,
-                         PVRDMA_HW_NAME)
-
-static inline int get_reg_val(PVRDMADev *dev, hwaddr addr, uint32_t *val)
-{
-    int idx = addr >> 2;
-
-    if (idx >= RDMA_BAR1_REGS_SIZE) {
-        return -EINVAL;
-    }
-
-    *val = dev->regs_data[idx];
-
-    return 0;
-}
-
-static inline int set_reg_val(PVRDMADev *dev, hwaddr addr, uint32_t val)
-{
-    int idx = addr >> 2;
-
-    if (idx >= RDMA_BAR1_REGS_SIZE) {
-        return -EINVAL;
-    }
-
-    dev->regs_data[idx] = val;
-
-    return 0;
-}
-
-static inline void post_interrupt(PVRDMADev *dev, unsigned vector)
-{
-    PCIDevice *pci_dev = PCI_DEVICE(dev);
-
-    if (likely(!dev->interrupt_mask)) {
-        dev->stats.interrupts++;
-        msix_notify(pci_dev, vector);
-    }
-}
-
-int pvrdma_exec_cmd(PVRDMADev *dev);
-
-#endif
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.h b/hw/rdma/vmw/pvrdma_dev_ring.h
deleted file mode 100644
index d231588ce0..0000000000
--- a/hw/rdma/vmw/pvrdma_dev_ring.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * QEMU VMWARE paravirtual RDMA ring utilities
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef PVRDMA_DEV_RING_H
-#define PVRDMA_DEV_RING_H
-
-
-#define MAX_RING_NAME_SZ 32
-
-typedef struct PvrdmaRingState {
-    int prod_tail; /* producer tail */
-    int cons_head; /* consumer head */
-} PvrdmaRingState;
-
-typedef struct PvrdmaRing {
-    char name[MAX_RING_NAME_SZ];
-    PCIDevice *dev;
-    uint32_t max_elems;
-    size_t elem_sz;
-    PvrdmaRingState *ring_state; /* used only for unmap */
-    int npages;
-    void **pages;
-} PvrdmaRing;
-
-int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev,
-                     PvrdmaRingState *ring_state, uint32_t max_elems,
-                     size_t elem_sz, dma_addr_t *tbl, uint32_t npages);
-void *pvrdma_ring_next_elem_read(PvrdmaRing *ring);
-void pvrdma_ring_read_inc(PvrdmaRing *ring);
-void *pvrdma_ring_next_elem_write(PvrdmaRing *ring);
-void pvrdma_ring_write_inc(PvrdmaRing *ring);
-void pvrdma_ring_free(PvrdmaRing *ring);
-
-#endif
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
deleted file mode 100644
index bf2b15c5ce..0000000000
--- a/hw/rdma/vmw/pvrdma_qp_ops.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * QEMU VMWARE paravirtual RDMA QP Operations
- *
- * Copyright (C) 2018 Oracle
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *     Marcel Apfelbaum <marcel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef PVRDMA_QP_OPS_H
-#define PVRDMA_QP_OPS_H
-
-#include "pvrdma.h"
-
-int pvrdma_qp_ops_init(void);
-void pvrdma_qp_ops_fini(void);
-void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
-void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
-void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
-void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
-
-#endif
diff --git a/hw/rdma/vmw/trace.h b/hw/rdma/vmw/trace.h
deleted file mode 100644
index 3ebc9fb7ad..0000000000
--- a/hw/rdma/vmw/trace.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "trace/trace-hw_rdma_vmw.h"
diff --git a/include/hw/rdma/rdma.h b/include/hw/rdma/rdma.h
deleted file mode 100644
index 80b2e531c4..0000000000
--- a/include/hw/rdma/rdma.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * RDMA device interface
- *
- * Copyright (C) 2019 Oracle
- * Copyright (C) 2019 Red Hat Inc
- *
- * Authors:
- *     Yuval Shaia <yuval.shaia@oracle.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef RDMA_H
-#define RDMA_H
-
-#include "qom/object.h"
-
-#define INTERFACE_RDMA_PROVIDER "rdma"
-
-typedef struct RdmaProviderClass RdmaProviderClass;
-DECLARE_CLASS_CHECKERS(RdmaProviderClass, RDMA_PROVIDER,
-                       INTERFACE_RDMA_PROVIDER)
-#define RDMA_PROVIDER(obj) \
-    INTERFACE_CHECK(RdmaProvider, (obj), \
-                    INTERFACE_RDMA_PROVIDER)
-
-typedef struct RdmaProvider RdmaProvider;
-
-struct RdmaProviderClass {
-    InterfaceClass parent;
-
-    void (*format_statistics)(RdmaProvider *obj, GString *buf);
-};
-
-#endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 13f9a2dedb..f4cf8f6717 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -37,7 +37,6 @@ void hmp_info_spice(Monitor *mon, const QDict *qdict);
 void hmp_info_balloon(Monitor *mon, const QDict *qdict);
 void hmp_info_irq(Monitor *mon, const QDict *qdict);
 void hmp_info_pic(Monitor *mon, const QDict *qdict);
-void hmp_info_rdma(Monitor *mon, const QDict *qdict);
 void hmp_info_pci(Monitor *mon, const QDict *qdict);
 void hmp_info_tpm(Monitor *mon, const QDict *qdict);
 void hmp_info_iothreads(Monitor *mon, const QDict *qdict);
diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
deleted file mode 100644
index a5a1c8234e..0000000000
--- a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ /dev/null
@@ -1,685 +0,0 @@
-/*
- * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of EITHER the GNU General Public License
- * version 2 as published by the Free Software Foundation or the BSD
- * 2-Clause License. This program is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
- * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License version 2 for more details at
- *

reply via email to

[Prev in Thread] Current Thread [Next in Thread]