qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 2/2] Sample mtty: Add migration capability to mtty module


From: Yan Zhao
Subject: Re: [PATCH v1 2/2] Sample mtty: Add migration capability to mtty module
Date: Wed, 6 May 2020 21:01:26 -0400
User-agent: Mutt/1.9.4 (2018-02-28)

On Tue, May 05, 2020 at 01:54:20AM +0800, Kirti Wankhede wrote:
> This patch makes mtty device migration capable. Purpose od this code is
> to test migration interface. Only stop-and-copy phase is implemented.
> Postcopy migration is not supported.
> 
> Actual data for mtty device migration is very less. Appended dummy data to
> migration data stream, default 100 Mbytes. Added sysfs file
> 'dummy_data_size_MB' to get dummy data size from user which can be used
> to check performance of based of data size. During resuming dummy data is
> read and discarded.
> 
> Signed-off-by: Kirti Wankhede <address@hidden>
> ---
>  samples/vfio-mdev/mtty.c | 602 
> ++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 574 insertions(+), 28 deletions(-)
> 
> diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
> index bf666cce5bb7..f9194234fc6a 100644
> --- a/samples/vfio-mdev/mtty.c
> +++ b/samples/vfio-mdev/mtty.c
> @@ -44,9 +44,23 @@
>  
>  #define MTTY_STRING_LEN              16
>  
> -#define MTTY_CONFIG_SPACE_SIZE  0xff
> -#define MTTY_IO_BAR_SIZE        0x8
> -#define MTTY_MMIO_BAR_SIZE      0x100000
> +#define MTTY_CONFIG_SPACE_SIZE               0xff
> +#define MTTY_IO_BAR_SIZE             0x8
> +#define MTTY_MMIO_BAR_SIZE           0x100000
> +#define MTTY_MIGRATION_REGION_SIZE   0x1000000       // 16M
> +
> +#define MTTY_MIGRATION_REGION_INDEX  VFIO_PCI_NUM_REGIONS
> +#define MTTY_REGIONS_MAX             (MTTY_MIGRATION_REGION_INDEX + 1)
> +
> +/* Data section start from page aligned offset */
> +#define MTTY_MIGRATION_REGION_DATA_OFFSET    (0x1000)
> +
> +/* First page is used for struct vfio_device_migration_info */
> +#define MTTY_MIGRATION_REGION_SIZE_MMAP     \
> +     (MTTY_MIGRATION_REGION_SIZE - MTTY_MIGRATION_REGION_DATA_OFFSET)
> +
> +#define MIGRATION_INFO_OFFSET(MEMBER)        \
> +             offsetof(struct vfio_device_migration_info, MEMBER)
>  
>  #define STORE_LE16(addr, val)   (*(u16 *)addr = val)
>  #define STORE_LE32(addr, val)   (*(u32 *)addr = val)
> @@ -129,6 +143,28 @@ struct serial_port {
>       u8 intr_trigger_level;  /* interrupt trigger level */
>  };
>  
> +/* Migration packet */
> +#define PACKET_ID            (u16)(0xfeedbaba)
> +
> +#define PACKET_FLAGS_ACTUAL_DATA     (1 << 0)
> +#define PACKET_FLAGS_DUMMY_DATA              (1 << 1)
> +
> +#define PACKET_DATA_SIZE_MAX         (8 * 1024 * 1024)
> +
> +struct packet {
> +     u16 id;
> +     u16 flags;
> +     u32 data_size;
> +     u8 data[];
> +};
> +
> +enum {
> +     PACKET_STATE_NONE = 0,
> +     PACKET_STATE_PREPARED,
> +     PACKET_STATE_COPIED,
> +     PACKET_STATE_LAST,
> +};
> +
>  /* State of each mdev device */
>  struct mdev_state {
>       int irq_fd;
> @@ -138,22 +174,37 @@ struct mdev_state {
>       u8 *vconfig;
>       struct mutex ops_lock;
>       struct mdev_device *mdev;
> -     struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
> -     u32 bar_mask[VFIO_PCI_NUM_REGIONS];
> +     struct mdev_region_info region_info[MTTY_REGIONS_MAX];
> +     u32 bar_mask[MTTY_REGIONS_MAX];
>       struct list_head next;
>       struct serial_port s[2];
>       struct mutex rxtx_lock;
>       struct vfio_device_info dev_info;
> -     int nr_ports;
> +     u32 nr_ports;
>  
>       /* List of pinned gpfns, gpfn as index and content is translated hpfn */
>       unsigned long *gpfn_to_hpfn;
>       struct notifier_block nb;
> +
> +     u32 device_state;
> +     u64 saved_size;
> +     void *mig_region_base;
> +     bool is_actual_data_sent;
> +     struct packet *pkt;
> +     u32 packet_state;
> +     u64 dummy_data_size;
>  };
>  
>  static struct mutex mdev_list_lock;
>  static struct list_head mdev_devices_list;
>  
> +/*
> + * Default dummy data size set to 100 MB. To change value of dummy data size 
> at
> + * runtime but before migration write size in MB to sysfs file
> + * dummy_data_size_MB
> + */
> +static unsigned long user_dummy_data_size = (100 * 1024 * 1024);
> +
>  static const struct file_operations vd_fops = {
>       .owner          = THIS_MODULE,
>  };
> @@ -639,6 +690,288 @@ static void mdev_read_base(struct mdev_state 
> *mdev_state)
>       }
>  }
>  
> +static int save_setup(struct mdev_state *mdev_state)
> +{
> +     mdev_state->is_actual_data_sent = false;
> +
> +     memset(mdev_state->pkt, 0, sizeof(struct packet) +
> +                                PACKET_DATA_SIZE_MAX);
> +
> +     return 0;
> +}
> +
> +static int set_device_state(struct mdev_state *mdev_state, u32 device_state)
> +{
> +     int ret = 0;
> +
> +     if (mdev_state->device_state == device_state)
> +             return 0;
> +
> +     if (device_state & VFIO_DEVICE_STATE_RUNNING) {
> +#if defined(DEBUG)
> +             if (device_state & VFIO_DEVICE_STATE_SAVING) {
> +                     pr_info("%s: %s Pre-copy\n", __func__,
> +                             dev_name(mdev_dev(mdev_state->mdev)));
> +             } else
> +                     pr_info("%s: %s Running\n", __func__,
> +                             dev_name(mdev_dev(mdev_state->mdev)));
> +#endif
> +     } else {
> +             if (device_state & VFIO_DEVICE_STATE_SAVING) {
> +#if defined(DEBUG)
> +                     pr_info("%s: %s Stop-n-copy\n", __func__,
> +                             dev_name(mdev_dev(mdev_state->mdev)));
> +#endif
> +                     ret = save_setup(mdev_state);
> +
> +             } else if (device_state & VFIO_DEVICE_STATE_RESUMING) {
> +#if defined(DEBUG)
> +                     pr_info("%s: %s Resuming\n", __func__,
> +                             dev_name(mdev_dev(mdev_state->mdev)));
> +             } else {
> +                     pr_info("%s: %s Stopped\n", __func__,
> +                             dev_name(mdev_dev(mdev_state->mdev)));
> +#endif
> +             }
> +     }
> +
> +     mdev_state->device_state = device_state;
> +
> +     return ret;
> +}
> +
> +static u32 get_device_state(struct mdev_state *mdev_state)
> +{
> +     return mdev_state->device_state;
> +}
> +
> +static void write_to_packet(struct packet *pkt, u8 *data, size_t size)
> +{
> +     if ((pkt->data_size + size) > PACKET_DATA_SIZE_MAX) {
> +             pr_err("%s: packet data overflow\n", __func__);
> +             return;
> +     }
> +     memcpy((void *)&pkt->data[pkt->data_size], (void *)data, size);
> +     pkt->data_size += size;
> +}
> +
> +static void read_from_packet(struct packet *pkt, u8 *data,
> +                          int index, size_t size)
> +{
> +     if ((index + size) > PACKET_DATA_SIZE_MAX) {
> +             pr_err("%s: packet data overflow\n", __func__);
> +             return;
> +     }
> +
> +     memcpy((void *)data, (void *)&pkt->data[index], size);
> +}
> +
> +static int save_device_data(struct mdev_state *mdev_state, u64 *pending)
> +{
> +     /* Save device data only during stop-and-copy phase */
> +     if (mdev_state->device_state != VFIO_DEVICE_STATE_SAVING) {
> +             *pending = 0;
> +             return 0;
> +     }
> +
> +     if (mdev_state->packet_state == PACKET_STATE_PREPARED) {
> +             *pending = sizeof(struct packet) + mdev_state->pkt->data_size;
> +             return 0;
> +     }
> +
> +     if (!mdev_state->is_actual_data_sent) {
> +
> +             /* create actual data packet */
> +             write_to_packet(mdev_state->pkt, (u8 *)&mdev_state->nr_ports,
> +                             sizeof(mdev_state->nr_ports));
> +             write_to_packet(mdev_state->pkt, (u8 *)&mdev_state->s,
> +                             sizeof(struct serial_port) * 2);
> +
> +             write_to_packet(mdev_state->pkt, mdev_state->vconfig,
> +                             MTTY_CONFIG_SPACE_SIZE);
> +
> +             write_to_packet(mdev_state->pkt, (u8 *)mdev_state->gpfn_to_hpfn,
> +                             sizeof(unsigned long) * MAX_GPFN_COUNT);
> +
> +             mdev_state->pkt->id = PACKET_ID;
> +             mdev_state->pkt->flags = PACKET_FLAGS_ACTUAL_DATA;
> +
> +             mdev_state->is_actual_data_sent = true;
> +     } else {
> +             /* create dummy data packet */
> +             if (mdev_state->dummy_data_size > user_dummy_data_size) {
> +                     *pending = 0;
> +                     mdev_state->packet_state = PACKET_STATE_NONE;
> +                     return 0;
> +             }
> +
> +             memset(mdev_state->pkt->data, 0xa5, PACKET_DATA_SIZE_MAX);
> +
> +             mdev_state->pkt->id = PACKET_ID;
> +             mdev_state->pkt->flags = PACKET_FLAGS_DUMMY_DATA;
> +             mdev_state->pkt->data_size = PACKET_DATA_SIZE_MAX;
> +             mdev_state->dummy_data_size += PACKET_DATA_SIZE_MAX;
> +     }
> +
> +     *pending = sizeof(struct packet) + mdev_state->pkt->data_size;
> +     mdev_state->packet_state = PACKET_STATE_PREPARED;
> +     mdev_state->saved_size = 0;
> +
> +     return 0;
> +}
> +
> +static int copy_device_data(struct mdev_state *mdev_state)
> +{
> +     u64 size;
> +
> +     if (!mdev_state->pkt || !mdev_state->mig_region_base)
> +             return -EINVAL;
> +
> +     if (mdev_state->packet_state == PACKET_STATE_COPIED)
> +             return 0;
> +
> +     if (!mdev_state->pkt->data_size)
> +             return 0;
> +
> +     size = sizeof(struct packet) + mdev_state->pkt->data_size;
> +
> +     memcpy(mdev_state->mig_region_base, mdev_state->pkt, size);
> +
if data area is mmaped, who is going to copy data from mdev_state->pkt
to mdev_state->mig_region_base ?
actually, I do see this area is mmaped in this sample.

> +     mdev_state->saved_size = size;
> +     mdev_state->packet_state = PACKET_STATE_COPIED;
> +     memset(mdev_state->pkt, 0, sizeof(struct packet));
> +     return 0;
> +}
> +
> +static int resume_device_data(struct mdev_state *mdev_state, u64 data_size)
> +{
> +     unsigned long i;
> +
> +     if (mdev_state->device_state != VFIO_DEVICE_STATE_RESUMING)
> +             return -EINVAL;
> +
> +     if (!mdev_state->pkt || !mdev_state->mig_region_base)
> +             return -EINVAL;
> +
> +     memcpy(mdev_state->pkt, mdev_state->mig_region_base, data_size);
> +
> +     if (mdev_state->pkt->flags & PACKET_FLAGS_ACTUAL_DATA) {
> +             int index = 0;
> +             /* restore device data */
> +             read_from_packet(mdev_state->pkt, (u8 *)&mdev_state->nr_ports,
> +                              index, sizeof(mdev_state->nr_ports));
> +             index += sizeof(mdev_state->nr_ports);
> +
> +             read_from_packet(mdev_state->pkt, (u8 *)&mdev_state->s,
> +                             index, sizeof(struct serial_port) * 2);
> +             index += sizeof(struct serial_port) * 2;
> +
> +             read_from_packet(mdev_state->pkt, mdev_state->vconfig,
> +                              index, MTTY_CONFIG_SPACE_SIZE);
> +             index += MTTY_CONFIG_SPACE_SIZE;
> +
> +             read_from_packet(mdev_state->pkt,
> +                             (u8 *)mdev_state->gpfn_to_hpfn,
> +                             index, sizeof(unsigned long) * MAX_GPFN_COUNT);
> +             index += sizeof(unsigned long) * MAX_GPFN_COUNT;
> +
> +             for (i = 0; i < MAX_GPFN_COUNT; i++) {
> +                     if (mdev_state->gpfn_to_hpfn[i] != PFN_NULL) {
> +                             int ret;
> +                             unsigned long hpfn;
> +
> +                             ret = vfio_pin_pages(mdev_dev(mdev_state->mdev),
> +                                    &i, 1, IOMMU_READ | IOMMU_WRITE, &hpfn);
> +                             if (ret <= 0) {
> +                                     pr_err("%s: 0x%lx unpin error %d\n",
> +                                                     __func__, i, ret);
> +                                     continue;
> +                             }
> +                             mdev_state->gpfn_to_hpfn[i] = hpfn;
> +                     }
> +             }
> +     } else {
> +#if defined(DEBUG)
> +             pr_info("%s: %s discard data 0x%llx\n",
> +                      __func__, dev_name(mdev_dev(mdev_state->mdev)),
> +                     data_size);
> +#endif
> +     }
> +
> +     return 0;
> +}
> +
> +static int handle_mig_read(unsigned int index, struct mdev_state *mdev_state,
> +                        loff_t offset, u8 *buf, u32 count)
> +{
> +     int ret = 0;
> +     u64 pending = 0;
> +
> +     switch (offset) {
> +     case MIGRATION_INFO_OFFSET(device_state):       // 0x00
> +             *(u32 *)buf = get_device_state(mdev_state);
> +             break;
> +
> +     case MIGRATION_INFO_OFFSET(pending_bytes):      // 0x08
> +             ret = save_device_data(mdev_state, &pending);
> +             if (ret)
> +                     break;
> +             *(u64 *)buf = pending;
> +             break;
> +
> +     case MIGRATION_INFO_OFFSET(data_offset):        // 0x10
> +             if (mdev_state->device_state & VFIO_DEVICE_STATE_SAVING) {
> +                     ret = copy_device_data(mdev_state);
> +                     if (ret)
> +                             break;
> +             }
> +             *(u64 *)buf = MTTY_MIGRATION_REGION_DATA_OFFSET;
what is this?
> +             break;
> +
> +     case MIGRATION_INFO_OFFSET(data_size):          // 0x18
> +             *(u64 *)buf = mdev_state->saved_size;
> +             break;
> +
> +     default:
> +             ret = -EINVAL;
> +     }
> +
> +#if defined(DEBUG)
> +     pr_info("%s: %s MIG  RD @0x%llx bytes: %d data: 0x%x\n",
> +                     __func__, dev_name(mdev_dev(mdev_state->mdev)),
> +                     offset, count, *(u32 *)buf);
> +#endif
> +     return ret;
> +}
> +
> +static int handle_mig_write(unsigned int index, struct mdev_state 
> *mdev_state,
> +                             loff_t offset, u8 *buf, u32 count)
> +{
> +     int ret = 0;
> +
> +#if defined(DEBUG)
> +     pr_info("%s: %s MIG  WR @0x%llx bytes: %d data: 0x%x\n",
> +                     __func__, dev_name(mdev_dev(mdev_state->mdev)),
> +                     offset, count, *(u32 *)buf);
> +#endif
> +     switch (offset) {
> +     case MIGRATION_INFO_OFFSET(device_state):       // 0x00
> +             ret = set_device_state(mdev_state, *(u32 *)buf);
> +             break;
> +
> +     case MIGRATION_INFO_OFFSET(data_size):          // 0x18
> +             ret = resume_device_data(mdev_state, *(u64 *)buf);
> +             break;
> +
> +     case MIGRATION_INFO_OFFSET(pending_bytes):      // 0x08
> +     case MIGRATION_INFO_OFFSET(data_offset):        // 0x10
> +     default:
> +             ret = -EINVAL;
> +     }
> +
> +     return ret;
> +}
> +
>  static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,
>                          loff_t pos, bool is_write)
>  {
> @@ -702,6 +1035,18 @@ static ssize_t mdev_access(struct mdev_device *mdev, u8 
> *buf, size_t count,
>               }
>               break;
>  
> +     case MTTY_MIGRATION_REGION_INDEX:
> +             if (is_write) {
> +                     ret = handle_mig_write(index, mdev_state, offset, buf,
> +                                           count);
> +             } else {
> +                     ret = handle_mig_read(index, mdev_state, offset, buf,
> +                                           count);
> +             }
> +             if (ret)
> +                     goto accessfailed;
> +             break;
> +
>       default:
>               ret = -1;
>               goto accessfailed;
> @@ -709,7 +1054,6 @@ static ssize_t mdev_access(struct mdev_device *mdev, u8 
> *buf, size_t count,
>  
>       ret = count;
>  
> -
>  accessfailed:
>       mutex_unlock(&mdev_state->ops_lock);
>  
> @@ -819,13 +1163,29 @@ static int mtty_reset(struct mdev_device *mdev)
>  static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf,
>                        size_t count, loff_t *ppos)
>  {
> -     unsigned int done = 0;
> +     unsigned int done = 0, index;
>       int ret;
>  
> +     index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(*ppos);
> +
>       while (count) {
>               size_t filled;
>  
> -             if (count >= 4 && !(*ppos % 4)) {
> +             if ((index == MTTY_MIGRATION_REGION_INDEX) &&
> +                 (count >= 8 && !(*ppos % 8))) {
> +                     u64 val;
> +
> +                     ret =  mdev_access(mdev, (u8 *)&val, sizeof(val),
> +                                        *ppos, false);
> +                     if (ret <= 0)
> +                             goto read_err;
> +
> +                     if (copy_to_user(buf, &val, sizeof(val)))
> +                             goto read_err;
> +
> +                     filled = 8;
> +
> +             } else if (count >= 4 && !(*ppos % 4)) {
>                       u32 val;
>  
>                       ret =  mdev_access(mdev, (u8 *)&val, sizeof(val),
> @@ -878,13 +1238,27 @@ static ssize_t mtty_read(struct mdev_device *mdev, 
> char __user *buf,
>  static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf,
>                  size_t count, loff_t *ppos)
>  {
> -     unsigned int done = 0;
> +     unsigned int done = 0, index;
>       int ret;
>  
> +     index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(*ppos);
>       while (count) {
>               size_t filled;
>  
> -             if (count >= 4 && !(*ppos % 4)) {
> +             if ((index == MTTY_MIGRATION_REGION_INDEX) &&
> +                 (count >= 8 && !(*ppos % 8))) {
> +                     u64 val;
> +
> +                     if (copy_from_user(&val, buf, sizeof(val)))
> +                             goto write_err;
> +
> +                     ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
> +                                       *ppos, true);
> +                     if (ret <= 0)
> +                             goto write_err;
> +
> +                     filled = 8;
> +             } else if (count >= 4 && !(*ppos % 4)) {
>                       u32 val;
>  
>                       if (copy_from_user(&val, buf, sizeof(val)))
> @@ -1061,12 +1435,13 @@ static int mtty_trigger_interrupt(struct mdev_state 
> *mdev_state)
>  }
>  
>  static int mtty_get_region_info(struct mdev_device *mdev,
> -                      struct vfio_region_info *region_info,
> -                      u16 *cap_type_id, void **cap_type)
> +                             struct vfio_region_info *region_info,
> +                             struct vfio_info_cap *caps)
>  {
>       unsigned int size = 0;
>       struct mdev_state *mdev_state;
> -     u32 bar_index;
> +     u32 index;
> +     int ret = 0;
>  
>       if (!mdev)
>               return -EINVAL;
> @@ -1075,13 +1450,13 @@ static int mtty_get_region_info(struct mdev_device 
> *mdev,
>       if (!mdev_state)
>               return -EINVAL;
>  
> -     bar_index = region_info->index;
> -     if (bar_index >= VFIO_PCI_NUM_REGIONS)
> +     index = region_info->index;
> +     if (index >= MTTY_REGIONS_MAX)
>               return -EINVAL;
>  
>       mutex_lock(&mdev_state->ops_lock);
>  
> -     switch (bar_index) {
> +     switch (index) {
>       case VFIO_PCI_CONFIG_REGION_INDEX:
>               size = MTTY_CONFIG_SPACE_SIZE;
>               break;
> @@ -1092,21 +1467,63 @@ static int mtty_get_region_info(struct mdev_device 
> *mdev,
>               if (mdev_state->nr_ports == 2)
>                       size = MTTY_IO_BAR_SIZE;
>               break;
> +     case MTTY_MIGRATION_REGION_INDEX:
> +             size = MTTY_MIGRATION_REGION_SIZE;
> +             break;
>       default:
>               size = 0;
>               break;
>       }
>  
> -     mdev_state->region_info[bar_index].size = size;
> -     mdev_state->region_info[bar_index].vfio_offset =
> -             MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
> +     mdev_state->region_info[index].size = size;
> +     mdev_state->region_info[index].vfio_offset =
> +                                     MTTY_VFIO_PCI_INDEX_TO_OFFSET(index);
>  
>       region_info->size = size;
> -     region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
> +     region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(index);
>       region_info->flags = VFIO_REGION_INFO_FLAG_READ |
> -             VFIO_REGION_INFO_FLAG_WRITE;
> +                          VFIO_REGION_INFO_FLAG_WRITE;
> +
> +     if (index == MTTY_MIGRATION_REGION_INDEX) {
> +             struct vfio_region_info_cap_sparse {
> +                     struct vfio_region_info_cap_sparse_mmap sparse;
> +                     struct vfio_region_sparse_mmap_area area;
> +             };
> +
> +             struct vfio_region_info_cap_sparse mig_region;
> +
> +             struct vfio_region_info_cap_type cap_type = {
> +                     .header.id = VFIO_REGION_INFO_CAP_TYPE,
> +                     .header.version = 1,
> +                     .type = VFIO_REGION_TYPE_MIGRATION,
> +                     .subtype = VFIO_REGION_SUBTYPE_MIGRATION
> +             };
> +
> +             /* Add REGION CAP type */
> +             ret = vfio_info_add_capability(caps, &cap_type.header,
> +                                             sizeof(cap_type));
> +             if (ret)
> +                     goto exit;
> +
> +             /* Add sparse mmap cap type */
> +             mig_region.sparse.nr_areas = 1;
> +             mig_region.sparse.header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> +             mig_region.sparse.header.version = 1;
> +
> +             mig_region.area.offset = MTTY_MIGRATION_REGION_DATA_OFFSET;
> +             mig_region.area.size = MTTY_MIGRATION_REGION_SIZE_MMAP;
> +
> +             region_info->flags |= VFIO_REGION_INFO_FLAG_CAPS;
> +
> +             if (region_info->argsz > sizeof(*region_info))
> +                     region_info->flags |= VFIO_REGION_INFO_FLAG_MMAP;
> +
> +             ret = vfio_info_add_capability(caps, &mig_region.sparse.header,
> +                                             sizeof(mig_region));
> +     }
> +exit:
>       mutex_unlock(&mdev_state->ops_lock);
> -     return 0;
> +     return ret;
>  }
>  
>  static int mtty_get_irq_info(struct mdev_device *mdev,
> @@ -1138,7 +1555,7 @@ static int mtty_get_device_info(struct mdev_device 
> *mdev,
>                        struct vfio_device_info *dev_info)
>  {
>       dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
> -     dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
> +     dev_info->num_regions = MTTY_REGIONS_MAX;
>       dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
>  
>       return 0;
> @@ -1150,6 +1567,7 @@ static long mtty_ioctl(struct mdev_device *mdev, 
> unsigned int cmd,
>       int ret = 0;
>       unsigned long minsz;
>       struct mdev_state *mdev_state;
> +     struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
>  
>       if (!mdev)
>               return -EINVAL;
> @@ -1185,8 +1603,6 @@ static long mtty_ioctl(struct mdev_device *mdev, 
> unsigned int cmd,
>       case VFIO_DEVICE_GET_REGION_INFO:
>       {
>               struct vfio_region_info info;
> -             u16 cap_type_id = 0;
> -             void *cap_type = NULL;
>  
>               minsz = offsetofend(struct vfio_region_info, offset);
>  
> @@ -1196,11 +1612,29 @@ static long mtty_ioctl(struct mdev_device *mdev, 
> unsigned int cmd,
>               if (info.argsz < minsz)
>                       return -EINVAL;
>  
> -             ret = mtty_get_region_info(mdev, &info, &cap_type_id,
> -                                        &cap_type);
> +             ret = mtty_get_region_info(mdev, &info, &caps);
>               if (ret)
>                       return ret;
>  
> +             if (caps.size) {
> +                     info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
> +                     if (info.argsz < sizeof(info) + caps.size) {
> +                             info.argsz = sizeof(info) + caps.size;
> +                             info.cap_offset = 0;
> +                     } else {
> +                             vfio_info_cap_shift(&caps, sizeof(info));
> +                             if (copy_to_user((void __user *)arg +
> +                                                     sizeof(info), caps.buf,
> +                                                     caps.size)) {
> +                                     kfree(caps.buf);
> +                                     ret = -EFAULT;
> +                                     break;
> +                             }
> +                             info.cap_offset = sizeof(info);
> +                     }
> +                     kfree(caps.buf);
> +             }
> +
>               if (copy_to_user((void __user *)arg, &info, minsz))
>                       return -EFAULT;
>  
> @@ -1266,6 +1700,89 @@ static long mtty_ioctl(struct mdev_device *mdev, 
> unsigned int cmd,
>       return -ENOTTY;
>  }
>  
> +void mmap_close(struct vm_area_struct *vma)
> +{
> +     struct mdev_device *mdev = vma->vm_private_data;
> +     struct mdev_state *mdev_state;
> +     uint32_t index = 0;
> +
> +     if (!mdev)
> +             return;
> +
> +     mdev_state = mdev_get_drvdata(mdev);
> +     if (!mdev_state)
> +             return;
> +
> +     mutex_lock(&mdev_state->ops_lock);
> +     index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(vma->vm_pgoff << PAGE_SHIFT);
> +     if (index == MTTY_MIGRATION_REGION_INDEX) {
> +             if (mdev_state->mig_region_base != NULL) {
> +                     vfree(mdev_state->mig_region_base);
> +                     mdev_state->mig_region_base = NULL;
> +             }
> +
> +             if (mdev_state->pkt != NULL) {
> +                     vfree(mdev_state->pkt);
> +                     mdev_state->pkt = NULL;
> +             }
> +     }
> +     mutex_unlock(&mdev_state->ops_lock);
> +}
> +
> +static const struct vm_operations_struct mdev_vm_ops = {
> +     .close = mmap_close,
> +};
> +
> +static int mtty_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
> +{
> +     struct mdev_state *mdev_state;
> +     unsigned int index;
> +     int ret = 0;
> +
> +     if (!mdev)
> +             return -EINVAL;
> +
> +     mdev_state = mdev_get_drvdata(mdev);
> +     if (!mdev_state)
> +             return -ENODEV;
> +
> +     mutex_lock(&mdev_state->ops_lock);
> +
> +     index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(vma->vm_pgoff << PAGE_SHIFT);
> +     if (index == MTTY_MIGRATION_REGION_INDEX) {
> +             mdev_state->mig_region_base =
> +                              vmalloc_user(MTTY_MIGRATION_REGION_SIZE_MMAP);
> +             if (mdev_state->mig_region_base == NULL) {
> +                     ret = -ENOMEM;
> +                     goto mmap_exit;
> +             }
> +
> +             mdev_state->pkt = vzalloc(sizeof(struct packet) +
> +                                       PACKET_DATA_SIZE_MAX);
> +             if (mdev_state->pkt == NULL) {
> +                     vfree(mdev_state->mig_region_base);
> +                     mdev_state->mig_region_base = NULL;
> +                     ret = -ENOMEM;
> +                     goto mmap_exit;
> +             }
> +
> +             vma->vm_ops = &mdev_vm_ops;
> +
> +             ret = remap_vmalloc_range(vma, mdev_state->mig_region_base, 0);
> +             if (ret != 0) {
> +                     pr_err("remap_vmalloc_range failed, ret= %d\n", ret);
> +                     vfree(mdev_state->mig_region_base);
> +                     mdev_state->mig_region_base = NULL;
> +                     vfree(mdev_state->pkt);
> +                     mdev_state->pkt = NULL;
> +                     goto mmap_exit;
> +             }
> +     }
> +mmap_exit:
> +     mutex_unlock(&mdev_state->ops_lock);
> +     return ret;
> +}
> +
>  static void unpin_pages_all(struct mdev_state *mdev_state)
>  {
>       struct mdev_device *mdev = mdev_state->mdev;
> @@ -1339,6 +1856,8 @@ static int mtty_open(struct mdev_device *mdev)
>  
>       ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
>                                    &mdev_state->nb);
> +     mdev_state->dummy_data_size = 0;
> +     mdev_state->mig_region_base = NULL;
>       return ret;
>  }
>  
> @@ -1355,6 +1874,15 @@ static void mtty_close(struct mdev_device *mdev)
>       unpin_pages_all(mdev_state);
>       vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>                                &mdev_state->nb);
> +     if (mdev_state->pkt != NULL) {
> +             vfree(mdev_state->pkt);
> +             mdev_state->pkt = NULL;
> +     }
> +
> +     if (mdev_state->mig_region_base != NULL) {
> +             vfree(mdev_state->mig_region_base);
> +             mdev_state->mig_region_base = NULL;
> +     }
>  }
>  
>  static ssize_t
> @@ -1466,9 +1994,26 @@ pin_pages_store(struct device *dev, struct 
> device_attribute *attr,
>  
>  static DEVICE_ATTR_RW(pin_pages);
>  
> +static ssize_t
> +dummy_data_size_MB_store(struct device *dev, struct device_attribute *attr,
> +                      const char *buf, size_t count)
> +{
> +     int ret;
> +
> +     ret = kstrtoul(buf, 0, &user_dummy_data_size);
> +     if (ret)
> +             return ret;
> +
> +     user_dummy_data_size = user_dummy_data_size << 20;
> +     return count;
> +}
> +
> +static DEVICE_ATTR_WO(dummy_data_size_MB);
> +
>  static struct attribute *mdev_dev_attrs[] = {
>       &dev_attr_sample_mdev_dev.attr,
>       &dev_attr_pin_pages.attr,
> +     &dev_attr_dummy_data_size_MB.attr,
>       NULL,
>  };
>  
> @@ -1573,6 +2118,7 @@ static const struct mdev_parent_ops mdev_fops = {
>       .read                   = mtty_read,
>       .write                  = mtty_write,
>       .ioctl                  = mtty_ioctl,
> +     .mmap                   = mtty_mmap,
>  };
>  
>  static void mtty_device_release(struct device *dev)
> -- 
> 2.7.0
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]