[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] migration: Add more error handling to analyze-migration.py
From: |
Peter Xu |
Subject: |
Re: [PATCH] migration: Add more error handling to analyze-migration.py |
Date: |
Thu, 2 Jan 2025 17:03:17 -0500 |
On Thu, Jan 02, 2025 at 03:58:31PM -0300, Fabiano Rosas wrote:
> The analyze-migration script was seen failing in s390x in misterious
> ways. It seems we're reaching the subsection constructor without any
It might be a good idea to add some debug lines indeed. Though are you sure
it's from parsing a subsection?
https://lore.kernel.org/all/20241219123213.GA692742@fedora/
====8<====
stderr:
Traceback (most recent call last):
File
"/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py",
line 688, in <module>
dump.read(dump_memory = args.memory)
File
"/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py",
line 625, in read
section.read()
File
"/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py",
line 461, in read
field['data'] = reader(field, self.file)
File
"/home/gitlab-runner/builds/4S3awx_3/0/qemu-project/qemu/build/scripts/analyze-migration.py",
line 434, in __init__
for field in self.desc['struct']['fields']:
KeyError: 'fields'
====8<====
It reads to me that it's not in "if 'subsections' in self.desc['struct']"
loop yet, instead it looks like a real STRUCT field in one of the device
sections? If that's true, then...
> fields, which would indicate an empty .subsection entry in the vmstate
> definition. We don't have any of those, at least not without the
> unmigratable flag set, so this should never happen.
>
> Add some debug statements so that we can see what's going on the next
> time the issue happens.
>
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
> scripts/analyze-migration.py | 33 +++++++++++++++++++++++++++------
> 1 file changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
> index 8a254a5b6a..1dd98f1d5a 100755
> --- a/scripts/analyze-migration.py
> +++ b/scripts/analyze-migration.py
> @@ -429,6 +429,10 @@ def __init__(self, desc, file):
> super(VMSDFieldStruct, self).__init__(desc, file)
> self.data = collections.OrderedDict()
>
> + if 'fields' not in self.desc['struct']:
> + raise Exception("No fields in subsection key=%s name=%s" %
> + (self.section_key, self.vmsd_name))
... this self.section_key / self.vmsd_name may not exist..
In all cases, IMHO this would be better the debug lines work with both pure
structs and sections.
> +
> # When we see compressed array elements, unfold them here
> new_fields = []
> for field in self.desc['struct']['fields']:
> @@ -477,6 +481,11 @@ def read(self):
> raise Exception("Subsection %s not found at offset %x" %
> ( subsection['vmsd_name'], self.file.tell()))
> name = self.file.readstr()
> version_id = self.file.read32()
> +
> + if not subsection:
> + raise Exception("Empty description for subsection %s" %
> + name)
This is checking subsection==None?? I doubt whether this will hit..
> +
> self.data[name] = VMSDSection(self.file, version_id,
> subsection, (name, 0))
> self.data[name].read()
>
> @@ -575,9 +584,8 @@ def __init__(self, filename):
> self.filename = filename
> self.vmsd_desc = None
>
> - def read(self, desc_only = False, dump_memory = False, write_memory =
> False):
> - # Read in the whole file
> - file = MigrationFile(self.filename)
> + def _read(self, file, vmsd_json, desc_only = False, dump_memory = False,
> + write_memory = False):
>
> # File magic
> data = file.read32()
> @@ -589,7 +597,7 @@ def read(self, desc_only = False, dump_memory = False,
> write_memory = False):
> if data != self.QEMU_VM_FILE_VERSION:
> raise Exception("Invalid version number %d" % data)
>
> - self.load_vmsd_json(file)
> + self.load_vmsd_json(file, vmsd_json)
>
> # Read sections
> self.sections = collections.OrderedDict()
> @@ -632,12 +640,25 @@ def read(self, desc_only = False, dump_memory = False,
> write_memory = False):
> raise Exception("Mismatched section footer: %x vs %x" %
> (read_section_id, section_id))
> else:
> raise Exception("Unknown section type: %d" % section_type)
> - file.close()
>
> - def load_vmsd_json(self, file):
> + def read(self, desc_only = False, dump_memory = False,
> + write_memory = False):
> + file = MigrationFile(self.filename)
> vmsd_json = file.read_migration_debug_json()
> +
> + try:
> + self._read(file, vmsd_json, desc_only, dump_memory, write_memory)
> + except:
> + raise Exception("Full JSON dump:\n%s", vmsd_json)
Better dump the Exception line itself too? IIUC that needs things like:
except Exception as e:
raise Exception(XXX, e, vmsd_json)
More below..
> + finally:
> + file.close()
> +
> + def load_vmsd_json(self, file, vmsd_json):
> self.vmsd_desc = json.loads(vmsd_json,
> object_pairs_hook=collections.OrderedDict)
> for device in self.vmsd_desc['devices']:
> + if 'fields' not in device:
> + raise Exception("vmstate for device %s has no fields" %
> + device['name'])
This looks a valid check, though I still doubt whether this would hit at
all for this specific error (which looks like a top level STRUCT of a
section, that is missing "fields").
> key = (device['name'], device['instance_id'])
> value = ( VMSDSection, device )
> self.section_classes[key] = value
> --
> 2.35.3
>
For the capture part, would it be easier if we trap at the very top level
once and for all, then try to dump the vmsd_desc as long as existed? Like
this:
====8<====
@@ -685,9 +686,15 @@ def default(self, o):
f.close()
elif args.dump == "state":
dump = MigrationDump(args.file)
- dump.read(dump_memory = args.memory)
- dict = dump.getDict()
- print(jsonenc.encode(dict))
+ try:
+ dump.read(dump_memory = args.memory)
+ dict = dump.getDict()
+ print(jsonenc.encode(dict))
+ except Exception as e:
+ # If loading vmstate stream failed, try the best to dump vmsd desc
+ raise Exception(
+ f"Caught exception when reading dump: {e}\n"
+ f"Trying to dump vmsd_desc: {jsonenc.encode(dump.vmsd_desc)}")
elif args.dump == "desc":
dump = MigrationDump(args.file)
dump.read(desc_only = True)
====8<====
So no matter what caused error, fallback to try dump vmsd_desc as long as
available.
Thanks,
--
Peter Xu