qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 2/2] Sample mtty: Add migration capability to mtty module


From: Yan Zhao
Subject: Re: [PATCH v1 2/2] Sample mtty: Add migration capability to mtty module
Date: Thu, 7 May 2020 21:24:19 -0400
User-agent: Mutt/1.9.4 (2018-02-28)

On Thu, May 07, 2020 at 11:19:40AM +0530, Kirti Wankhede wrote:
> 
> 
> On 5/7/2020 6:31 AM, Yan Zhao wrote:
> > On Tue, May 05, 2020 at 01:54:20AM +0800, Kirti Wankhede wrote:
> > > This patch makes mtty device migration capable. Purpose od this code is
> > > to test migration interface. Only stop-and-copy phase is implemented.
> > > Postcopy migration is not supported.
> > > 
> > > Actual data for mtty device migration is very less. Appended dummy data to
> > > migration data stream, default 100 Mbytes. Added sysfs file
> > > 'dummy_data_size_MB' to get dummy data size from user which can be used
> > > to check performance of based of data size. During resuming dummy data is
> > > read and discarded.
> > > 
> > > Signed-off-by: Kirti Wankhede <address@hidden>
> > > ---
> > >   samples/vfio-mdev/mtty.c | 602 
> > > ++++++++++++++++++++++++++++++++++++++++++++---
> > >   1 file changed, 574 insertions(+), 28 deletions(-)
> > > 
> > > diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
> > > index bf666cce5bb7..f9194234fc6a 100644
> > > --- a/samples/vfio-mdev/mtty.c
> > > +++ b/samples/vfio-mdev/mtty.c
> > > @@ -44,9 +44,23 @@
> > >   #define MTTY_STRING_LEN         16
> > > -#define MTTY_CONFIG_SPACE_SIZE  0xff
> > > -#define MTTY_IO_BAR_SIZE        0x8
> > > -#define MTTY_MMIO_BAR_SIZE      0x100000
> > > +#define MTTY_CONFIG_SPACE_SIZE           0xff
> > > +#define MTTY_IO_BAR_SIZE         0x8
> > > +#define MTTY_MMIO_BAR_SIZE               0x100000
> > > +#define MTTY_MIGRATION_REGION_SIZE       0x1000000       // 16M
> > > +
> > > +#define MTTY_MIGRATION_REGION_INDEX      VFIO_PCI_NUM_REGIONS
> > > +#define MTTY_REGIONS_MAX         (MTTY_MIGRATION_REGION_INDEX + 1)
> > > +
> > > +/* Data section start from page aligned offset */
> > > +#define MTTY_MIGRATION_REGION_DATA_OFFSET        (0x1000)
> > > +
> > > +/* First page is used for struct vfio_device_migration_info */
> > > +#define MTTY_MIGRATION_REGION_SIZE_MMAP     \
> > > + (MTTY_MIGRATION_REGION_SIZE - MTTY_MIGRATION_REGION_DATA_OFFSET)
> > > +
> > > +#define MIGRATION_INFO_OFFSET(MEMBER)    \
> > > +         offsetof(struct vfio_device_migration_info, MEMBER)
> > >   #define STORE_LE16(addr, val)   (*(u16 *)addr = val)
> > >   #define STORE_LE32(addr, val)   (*(u32 *)addr = val)
> > > @@ -129,6 +143,28 @@ struct serial_port {
> > >           u8 intr_trigger_level;  /* interrupt trigger level */
> > >   };
> > > +/* Migration packet */
> > > +#define PACKET_ID                (u16)(0xfeedbaba)
> > > +
> > > +#define PACKET_FLAGS_ACTUAL_DATA (1 << 0)
> > > +#define PACKET_FLAGS_DUMMY_DATA          (1 << 1)
> > > +
> > > +#define PACKET_DATA_SIZE_MAX             (8 * 1024 * 1024)
> > > +
> > > +struct packet {
> > > + u16 id;
> > > + u16 flags;
> > > + u32 data_size;
> > > + u8 data[];
> > > +};
> > > +
> > > +enum {
> > > + PACKET_STATE_NONE = 0,
> > > + PACKET_STATE_PREPARED,
> > > + PACKET_STATE_COPIED,
> > > + PACKET_STATE_LAST,
> > > +};
> > > +
> > >   /* State of each mdev device */
> > >   struct mdev_state {
> > >           int irq_fd;
> > > @@ -138,22 +174,37 @@ struct mdev_state {
> > >           u8 *vconfig;
> > >           struct mutex ops_lock;
> > >           struct mdev_device *mdev;
> > > - struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
> > > - u32 bar_mask[VFIO_PCI_NUM_REGIONS];
> > > + struct mdev_region_info region_info[MTTY_REGIONS_MAX];
> > > + u32 bar_mask[MTTY_REGIONS_MAX];
> > >           struct list_head next;
> > >           struct serial_port s[2];
> > >           struct mutex rxtx_lock;
> > >           struct vfio_device_info dev_info;
> > > - int nr_ports;
> > > + u32 nr_ports;
> > >           /* List of pinned gpfns, gpfn as index and content is 
> > > translated hpfn */
> > >           unsigned long *gpfn_to_hpfn;
> > >           struct notifier_block nb;
> > > +
> > > + u32 device_state;
> > > + u64 saved_size;
> > > + void *mig_region_base;
> > > + bool is_actual_data_sent;
> > > + struct packet *pkt;
> > > + u32 packet_state;
> > > + u64 dummy_data_size;
> > >   };
> > >   static struct mutex mdev_list_lock;
> > >   static struct list_head mdev_devices_list;
> > > +/*
> > > + * Default dummy data size set to 100 MB. To change value of dummy data 
> > > size at
> > > + * runtime but before migration write size in MB to sysfs file
> > > + * dummy_data_size_MB
> > > + */
> > > +static unsigned long user_dummy_data_size = (100 * 1024 * 1024);
> > > +
> > >   static const struct file_operations vd_fops = {
> > >           .owner          = THIS_MODULE,
> > >   };
> > > @@ -639,6 +690,288 @@ static void mdev_read_base(struct mdev_state 
> > > *mdev_state)
> > >           }
> > >   }
> > > +static int save_setup(struct mdev_state *mdev_state)
> > > +{
> > > + mdev_state->is_actual_data_sent = false;
> > > +
> > > + memset(mdev_state->pkt, 0, sizeof(struct packet) +
> > > +                            PACKET_DATA_SIZE_MAX);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int set_device_state(struct mdev_state *mdev_state, u32 
> > > device_state)
> > > +{
> > > + int ret = 0;
> > > +
> > > + if (mdev_state->device_state == device_state)
> > > +         return 0;
> > > +
> > > + if (device_state & VFIO_DEVICE_STATE_RUNNING) {
> > > +#if defined(DEBUG)
> > > +         if (device_state & VFIO_DEVICE_STATE_SAVING) {
> > > +                 pr_info("%s: %s Pre-copy\n", __func__,
> > > +                         dev_name(mdev_dev(mdev_state->mdev)));
> > > +         } else
> > > +                 pr_info("%s: %s Running\n", __func__,
> > > +                         dev_name(mdev_dev(mdev_state->mdev)));
> > > +#endif
> > > + } else {
> > > +         if (device_state & VFIO_DEVICE_STATE_SAVING) {
> > > +#if defined(DEBUG)
> > > +                 pr_info("%s: %s Stop-n-copy\n", __func__,
> > > +                         dev_name(mdev_dev(mdev_state->mdev)));
> > > +#endif
> > > +                 ret = save_setup(mdev_state);
> > > +
> > > +         } else if (device_state & VFIO_DEVICE_STATE_RESUMING) {
> > > +#if defined(DEBUG)
> > > +                 pr_info("%s: %s Resuming\n", __func__,
> > > +                         dev_name(mdev_dev(mdev_state->mdev)));
> > > +         } else {
> > > +                 pr_info("%s: %s Stopped\n", __func__,
> > > +                         dev_name(mdev_dev(mdev_state->mdev)));
> > > +#endif
> > > +         }
> > > + }
> > > +
> > > + mdev_state->device_state = device_state;
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +static u32 get_device_state(struct mdev_state *mdev_state)
> > > +{
> > > + return mdev_state->device_state;
> > > +}
> > > +
> > > +static void write_to_packet(struct packet *pkt, u8 *data, size_t size)
> > > +{
> > > + if ((pkt->data_size + size) > PACKET_DATA_SIZE_MAX) {
> > > +         pr_err("%s: packet data overflow\n", __func__);
> > > +         return;
> > > + }
> > > + memcpy((void *)&pkt->data[pkt->data_size], (void *)data, size);
> > > + pkt->data_size += size;
> > > +}
> > > +
> > > +static void read_from_packet(struct packet *pkt, u8 *data,
> > > +                      int index, size_t size)
> > > +{
> > > + if ((index + size) > PACKET_DATA_SIZE_MAX) {
> > > +         pr_err("%s: packet data overflow\n", __func__);
> > > +         return;
> > > + }
> > > +
> > > + memcpy((void *)data, (void *)&pkt->data[index], size);
> > > +}
> > > +
> > > +static int save_device_data(struct mdev_state *mdev_state, u64 *pending)
> > > +{
> > > + /* Save device data only during stop-and-copy phase */
> > > + if (mdev_state->device_state != VFIO_DEVICE_STATE_SAVING) {
> > > +         *pending = 0;
> > > +         return 0;
> > > + }
> > > +
> > > + if (mdev_state->packet_state == PACKET_STATE_PREPARED) {
> > > +         *pending = sizeof(struct packet) + mdev_state->pkt->data_size;
> > > +         return 0;
> > > + }
> > > +
> > > + if (!mdev_state->is_actual_data_sent) {
> > > +
> > > +         /* create actual data packet */
> > > +         write_to_packet(mdev_state->pkt, (u8 *)&mdev_state->nr_ports,
> > > +                         sizeof(mdev_state->nr_ports));
> > > +         write_to_packet(mdev_state->pkt, (u8 *)&mdev_state->s,
> > > +                         sizeof(struct serial_port) * 2);
> > > +
> > > +         write_to_packet(mdev_state->pkt, mdev_state->vconfig,
> > > +                         MTTY_CONFIG_SPACE_SIZE);
> > > +
> > > +         write_to_packet(mdev_state->pkt, (u8 *)mdev_state->gpfn_to_hpfn,
> > > +                         sizeof(unsigned long) * MAX_GPFN_COUNT);
> > > +
> > > +         mdev_state->pkt->id = PACKET_ID;
> > > +         mdev_state->pkt->flags = PACKET_FLAGS_ACTUAL_DATA;
> > > +
> > > +         mdev_state->is_actual_data_sent = true;
> > > + } else {
> > > +         /* create dummy data packet */
> > > +         if (mdev_state->dummy_data_size > user_dummy_data_size) {
> > > +                 *pending = 0;
> > > +                 mdev_state->packet_state = PACKET_STATE_NONE;
> > > +                 return 0;
> > > +         }
> > > +
> > > +         memset(mdev_state->pkt->data, 0xa5, PACKET_DATA_SIZE_MAX);
> > > +
> > > +         mdev_state->pkt->id = PACKET_ID;
> > > +         mdev_state->pkt->flags = PACKET_FLAGS_DUMMY_DATA;
> > > +         mdev_state->pkt->data_size = PACKET_DATA_SIZE_MAX;
> > > +         mdev_state->dummy_data_size += PACKET_DATA_SIZE_MAX;
> > > + }
> > > +
> > > + *pending = sizeof(struct packet) + mdev_state->pkt->data_size;
> > > + mdev_state->packet_state = PACKET_STATE_PREPARED;
> > > + mdev_state->saved_size = 0;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int copy_device_data(struct mdev_state *mdev_state)
> > > +{
> > > + u64 size;
> > > +
> > > + if (!mdev_state->pkt || !mdev_state->mig_region_base)
> > > +         return -EINVAL;
> > > +
> > > + if (mdev_state->packet_state == PACKET_STATE_COPIED)
> > > +         return 0;
> > > +
> > > + if (!mdev_state->pkt->data_size)
> > > +         return 0;
> > > +
> > > + size = sizeof(struct packet) + mdev_state->pkt->data_size;
> > > +
> > > + memcpy(mdev_state->mig_region_base, mdev_state->pkt, size);
> > > +
> > if data area is mmaped, who is going to copy data from mdev_state->pkt
> > to mdev_state->mig_region_base ?
> > actually, I do see this area is mmaped in this sample.
> > 
> 
> This area ia mmap and is backed by memory, see mtty_mmap(), on read access
> to data_offset, packet data is copied to mmaped memory.
> 
> > > + mdev_state->saved_size = size;
> > > + mdev_state->packet_state = PACKET_STATE_COPIED;
> > > + memset(mdev_state->pkt, 0, sizeof(struct packet));
> > > + return 0;
> > > +}
> > > +
> > > +static int resume_device_data(struct mdev_state *mdev_state, u64 
> > > data_size)
> > > +{
> > > + unsigned long i;
> > > +
> > > + if (mdev_state->device_state != VFIO_DEVICE_STATE_RESUMING)
> > > +         return -EINVAL;
> > > +
> > > + if (!mdev_state->pkt || !mdev_state->mig_region_base)
> > > +         return -EINVAL;
> > > +
> > > + memcpy(mdev_state->pkt, mdev_state->mig_region_base, data_size);
> > > +
> > > + if (mdev_state->pkt->flags & PACKET_FLAGS_ACTUAL_DATA) {
> > > +         int index = 0;
> > > +         /* restore device data */
> > > +         read_from_packet(mdev_state->pkt, (u8 *)&mdev_state->nr_ports,
> > > +                          index, sizeof(mdev_state->nr_ports));
> > > +         index += sizeof(mdev_state->nr_ports);
> > > +
> > > +         read_from_packet(mdev_state->pkt, (u8 *)&mdev_state->s,
> > > +                         index, sizeof(struct serial_port) * 2);
> > > +         index += sizeof(struct serial_port) * 2;
> > > +
> > > +         read_from_packet(mdev_state->pkt, mdev_state->vconfig,
> > > +                          index, MTTY_CONFIG_SPACE_SIZE);
> > > +         index += MTTY_CONFIG_SPACE_SIZE;
> > > +
> > > +         read_from_packet(mdev_state->pkt,
> > > +                         (u8 *)mdev_state->gpfn_to_hpfn,
> > > +                         index, sizeof(unsigned long) * MAX_GPFN_COUNT);
> > > +         index += sizeof(unsigned long) * MAX_GPFN_COUNT;
> > > +
> > > +         for (i = 0; i < MAX_GPFN_COUNT; i++) {
> > > +                 if (mdev_state->gpfn_to_hpfn[i] != PFN_NULL) {
> > > +                         int ret;
> > > +                         unsigned long hpfn;
> > > +
> > > +                         ret = vfio_pin_pages(mdev_dev(mdev_state->mdev),
> > > +                                &i, 1, IOMMU_READ | IOMMU_WRITE, &hpfn);
> > > +                         if (ret <= 0) {
> > > +                                 pr_err("%s: 0x%lx unpin error %d\n",
> > > +                                                 __func__, i, ret);
> > > +                                 continue;
> > > +                         }
> > > +                         mdev_state->gpfn_to_hpfn[i] = hpfn;
> > > +                 }
> > > +         }
> > > + } else {
> > > +#if defined(DEBUG)
> > > +         pr_info("%s: %s discard data 0x%llx\n",
> > > +                  __func__, dev_name(mdev_dev(mdev_state->mdev)),
> > > +                 data_size);
> > > +#endif
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int handle_mig_read(unsigned int index, struct mdev_state 
> > > *mdev_state,
> > > +                    loff_t offset, u8 *buf, u32 count)
> > > +{
> > > + int ret = 0;
> > > + u64 pending = 0;
> > > +
> > > + switch (offset) {
> > > + case MIGRATION_INFO_OFFSET(device_state):       // 0x00
> > > +         *(u32 *)buf = get_device_state(mdev_state);
> > > +         break;
> > > +
> > > + case MIGRATION_INFO_OFFSET(pending_bytes):      // 0x08
> > > +         ret = save_device_data(mdev_state, &pending);
> > > +         if (ret)
> > > +                 break;
> > > +         *(u64 *)buf = pending;
> > > +         break;
> > > +
> > > + case MIGRATION_INFO_OFFSET(data_offset):        // 0x10
> > > +         if (mdev_state->device_state & VFIO_DEVICE_STATE_SAVING) {
> > > +                 ret = copy_device_data(mdev_state);
> > > +                 if (ret)
> > > +                         break;
> > > +         }
> > > +         *(u64 *)buf = MTTY_MIGRATION_REGION_DATA_OFFSET;
> > what is this?
> 
> I think macro is self explanatory, its data offset within migration region
> where vendor driver has copied data and user application should data from
> this offset of migration region.
>
ok. just mixed the data_offset with real data offset of the data area.
BTW, it's really confusing to take reading of data_offset as the indicator
of data preparing and writing of data_size as the indicator of data loading.

Thanks
Yan



reply via email to

[Prev in Thread] Current Thread [Next in Thread]