grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Patch] Robustly search for ZFS labels & uberblocks


From: Vladimir 'φ-coder/phcoder' Serbinenko
Subject: Re: [Patch] Robustly search for ZFS labels & uberblocks
Date: Sun, 22 Jan 2012 15:18:37 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:8.0) Gecko/20120104 Icedove/8.0

I've cleaned this patch up (attached). Can you test it on these 4K FSes or tell me how to create one w/o really having such a disk?
On 19.01.2012 12:36, Richard Laager wrote:
  static grub_err_t
-uberblock_verify (uberblock_phys_t * ub, int offset)
+uberblock_verify (uberblock_t * uber, int offset, struct grub_zfs_data *data)
  {
-  uberblock_t *uber =&ub->ubp_uberblock;
    grub_err_t err;
    grub_zfs_endian_t endian = UNKNOWN_ENDIAN;
    zio_cksum_t zc;
+  void *osp = NULL;
+  grub_size_t ospsize;
+
+  if (uber->ub_txg<  data->label_txg)
+    return grub_error (GRUB_ERR_BAD_FS,
+       "ignoring partially written label: uber_txg<  label_txg");
I've left this part out since outside loop already looks for the newest txg.
    if (grub_zfs_to_cpu64 (uber->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
        &&  grub_zfs_to_cpu64 (uber->ub_version, LITTLE_ENDIAN)>  0
@@ -361,7 +390,19 @@

    zc.zc_word[0] = grub_cpu_to_zfs64 (offset, endian);
    err = zio_checksum_verify (zc, ZIO_CHECKSUM_LABEL, endian,
-                            (char *) ub, UBERBLOCK_SIZE);
+                            (char *) uber, UBERBLOCK_SIZE(data->vdev_ashift));
+
+  if (!err)
+    {
+      /* Check that the data pointed by the rootbp is usable. */
+      void *osp = NULL;
+      grub_size_t ospsize;
+      err = zio_read (&uber->ub_rootbp, endian,&osp,&ospsize, data);
+      grub_free (osp);
+
+      if (!err&&  ospsize<  OBJSET_PHYS_SIZE_V14)
+       return grub_error (GRUB_ERR_BAD_FS, "uberblock rootbp points to invalid 
data");
+    }
Left this one out: why would it happen that uberblock checksum is valid but the other data in it isn't?
    return err;
  }
@@ -372,32 +413,40 @@
   *    Success - Pointer to the best uberblock.
   *    Failure - NULL
   */
-static uberblock_phys_t *
-find_bestub (uberblock_phys_t * ub_array, grub_disk_addr_t sector)
+static uberblock_t *
+find_bestub (char *ub_array, struct grub_zfs_data *data)
  {
-  uberblock_phys_t *ubbest = NULL;
-  int i;
-  grub_disk_addr_t offset;
+  const grub_disk_addr_t sector = data->vdev_phys_sector;
+  uberblock_t *ubbest = NULL;
+  uberblock_t *ubnext;
+  unsigned int i, offset, pickedub = 0;
    grub_err_t err = GRUB_ERR_NONE;

-  for (i = 0; i<  (VDEV_UBERBLOCK_RING>>  VDEV_UBERBLOCK_SHIFT); i++)
+  const unsigned int UBCOUNT = UBERBLOCK_COUNT (data->vdev_ashift);
+  const grub_uint64_t UBBYTES = UBERBLOCK_SIZE (data->vdev_ashift);
+
+  for (i = 0; i<  UBCOUNT; i++)
      {
-      offset = (sector<<  SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE
-       + (i<<  VDEV_UBERBLOCK_SHIFT);
+      ubnext = (uberblock_t *) (i * UBBYTES + ub_array);
+      offset = (sector<<  SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE + (i * UBBYTES);
This is wrong with respect to alignment invariants.
-      err = uberblock_verify (&ub_array[i], offset);
+      err = uberblock_verify (ubnext, offset, data);
        if (err)
        {
          grub_errno = GRUB_ERR_NONE;
          continue;
        }
-      if (ubbest == NULL
-         || vdev_uberblock_compare (&(ub_array[i].ubp_uberblock),
-                               &(ubbest->ubp_uberblock))>  0)
-       ubbest =&ub_array[i];
+      if (ubbest == NULL || vdev_uberblock_compare (ubnext, ubbest)>  0)
+       {
+         ubbest = ubnext;
+         pickedub = i;
+       }
Skipped, it sets the variable but never uses it.
      }
    if (!ubbest)
      grub_errno = err;
+  else
+    grub_dprintf ("zfs", "Found best uberblock at idx %d, txg %llu\n",
+                 pickedub, (unsigned long long) ubbest->ub_txg);
Skipped, not needed.
    return (ubbest);
  }
@@ -515,8 +564,18 @@
          sector = DVA_OFFSET_TO_PHYS_SECTOR (offset);
          err = grub_disk_read (data->disk, sector, 0, psize, buf);
        }
+
        if (!err)
-       return GRUB_ERR_NONE;
+       {
+         /* Check the underlying checksum before we rule this DVA as "good" */
+         grub_uint32_t checkalgo = (grub_zfs_to_cpu64 ((bp)->blk_prop, endian)>>  
40)&  0xff;
+         err = zio_checksum_verify (bp->blk_cksum, checkalgo, endian, buf, 
psize);
+         if (!err)
+           return GRUB_ERR_NONE;
+       }
+
+      /* If read failed or checksum bad, reset the error.  Hopefully we've got
+       * some more DVA's to try. */
Skipped, already commented on this one.
        grub_errno = GRUB_ERR_NONE;
      }

@@ -539,12 +598,9 @@
    unsigned int comp;
    char *compbuf = NULL;
    grub_err_t err;
-  zio_cksum_t zc = bp->blk_cksum;
-  grub_uint32_t checksum;

    *buf = NULL;

-  checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>  40)&  0xff;
    comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32)&  0xff;
    lsize = (BP_IS_HOLE(bp) ? 0 :
           (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian)&  0xffff) + 1)
@@ -580,15 +636,6 @@
        return err;
      }

-  err = zio_checksum_verify (zc, checksum, endian, compbuf, psize);
-  if (err)
-    {
-      grub_dprintf ("zfs", "incorrect checksum\n");
-      grub_free (compbuf);
-      *buf = NULL;
-      return err;
-    }
-
    if (comp != ZIO_COMPRESS_OFF)
      {
        *buf = grub_malloc (lsize);
@@ -1864,11 +1911,9 @@
  static grub_err_t
  check_pool_label (struct grub_zfs_data *data)
  {
-  grub_uint64_t pool_state, txg = 0;
-  char *nvlist;
-#if 0
-  char *nv;
-#endif
+  grub_uint64_t pool_state;
+  char *nvlist;                        /* for the pool */
+  char *vdevnvlist;            /* for the vdev */
    grub_uint64_t diskguid;
    grub_uint64_t version;
    int found;
@@ -1898,7 +1943,9 @@
      }
    grub_dprintf ("zfs", "check 4 passed\n");

-  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,&txg);
+  data->label_txg = 0;
+  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_TXG,
+                                       &data->label_txg);
Not needed. Skipped.
    if (!found)
      {
        grub_free (nvlist);
@@ -1909,7 +1956,7 @@
    grub_dprintf ("zfs", "check 6 passed\n");

    /* not an active device */
-  if (txg == 0)
+  if (data->label_txg == 0)
      {
        grub_free (nvlist);
        return grub_error (GRUB_ERR_BAD_FS, "zpool isn't active");
@@ -1931,20 +1978,32 @@
      {
        grub_free (nvlist);
        return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET,
-                        "too new version %llu>  %llu",
+                        "SPA version too new %llu>  %llu",
                         (unsigned long long) version,
                         (unsigned long long) SPA_VERSION);
      }
    grub_dprintf ("zfs", "check 9 passed\n");
-#if 0
-  if (nvlist_lookup_value (nvlist, ZPOOL_CONFIG_VDEV_TREE,&nv,
-                          DATA_TYPE_NVLIST, NULL))
+
+  vdevnvlist = grub_zfs_nvlist_lookup_nvlist (nvlist, ZPOOL_CONFIG_VDEV_TREE);
+  if (!vdevnvlist)
      {
-      grub_free (vdev);
-      return (GRUB_ERR_BAD_FS);
+      grub_free (nvlist);
+      if (!grub_errno)
+       grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_VDEV_TREE " not found");
+      return grub_errno;
      }
    grub_dprintf ("zfs", "check 10 passed\n");
-#endif
+
+  found = grub_zfs_nvlist_lookup_uint64 (vdevnvlist, ZPOOL_CONFIG_ASHIFT,
+                                       &data->vdev_ashift);
+  grub_free (vdevnvlist);
+  if (!found)
+    {
+      grub_free (nvlist);
+      if (!grub_errno)
+       grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_ASHIFT " not found");
+      return grub_errno;
+    }

    found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_GUID,&diskguid);
    if (! found)
@@ -1956,11 +2015,42 @@
      }
    grub_dprintf ("zfs", "check 11 passed\n");

+  found = grub_zfs_nvlist_lookup_uint64 (nvlist, 
ZPOOL_CONFIG_POOL_GUID,&data->pool_guid);
+  if (! found)
+    {
+      grub_free (nvlist);
+      if (! grub_errno)
+        grub_error (GRUB_ERR_BAD_FS, ZPOOL_CONFIG_POOL_GUID " not found");
+      return grub_errno;
+    }
+   grub_dprintf ("zfs", "check 12 passed\n");
+
+
This check is already in. Skipped.
    grub_free (nvlist);

+  grub_dprintf ("zfs", "ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), 
txg: %llu, SPA v%llu, ashift: %llu\n",
+    (unsigned long long) data->pool_guid,
+    (unsigned long long) data->pool_guid,
+    (unsigned long long) diskguid,
+    (unsigned long long) diskguid,
+    (unsigned long long) data->label_txg,
+    (unsigned long long) version,
+    (unsigned long long) data->vdev_ashift);
+
Useless. Skipped.
    return GRUB_ERR_NONE;
  }

+/*
+ * vdev_label_start returns the physical disk offset (in bytes) of
+ * label "l".
+ */
+static grub_uint64_t vdev_label_start (grub_uint64_t psize, int l)
+{
+  return (l * sizeof (vdev_label_t) + (l<  VDEV_LABELS / 2 ?
+                                      0 : psize -
+                                      VDEV_LABELS * sizeof (vdev_label_t)));
+}
+
  static void
  zfs_unmount (struct grub_zfs_data *data)
  {
@@ -1979,13 +2069,13 @@
  zfs_mount (grub_device_t dev)
  {
    struct grub_zfs_data *data = 0;
-  int label = 0;
-  uberblock_phys_t *ub_array, *ubbest = NULL;
-  vdev_boot_header_t *bh;
+  int label = 0, bestlabel = -1;
+  char *ub_array;
+  uberblock_t *ubbest;
+  uberblock_t *ubcur = NULL;
    void *osp = 0;
    grub_size_t ospsize;
    grub_err_t err;
-  int vdevnum;

    if (! dev->disk)
      {
@@ -1997,11 +2087,6 @@
    if (!data)
      return 0;
    grub_memset (data, 0, sizeof (*data));
-#if 0
-  /* if it's our first time here, zero the best uberblock out */
-  if (data->best_drive == 0&&  data->best_part == 0&&  find_best_root)
-    grub_memset (&current_uberblock, 0, sizeof (uberblock_t));
-#endif

    data->disk = dev->disk;

@@ -2012,100 +2097,129 @@
        return 0;
      }

-  bh = grub_malloc (VDEV_BOOT_HEADER_SIZE);
-  if (!bh)
+  ubbest = grub_malloc (sizeof (*ubbest));
+  if (!ubbest)
      {
        zfs_unmount (data);
-      grub_free (ub_array);
        return 0;
      }
-
-  vdevnum = VDEV_LABELS;
-
-  /* Don't check back labels on CDROM.  */
-  if (grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN)
-    vdevnum = VDEV_LABELS / 2;
-
-  for (label = 0; ubbest == NULL&&  label<  vdevnum; label++)
+  grub_memset (ubbest, 0, sizeof (*ubbest));
+
+  /* Establish some constants for where things are on the device: */
+
+  /*
+   *  Some eltorito stacks don't give us a size and we end up setting the
+   * size to MAXUINT, further some of these devices stop working once a single
+   * read past the end has been issued. Checking for a maximum part_length and
+   * skipping the backup labels at the end of the slice/partition/device
+   * avoids breaking down on such devices.
+   */
+  const int vdevnum =
+    grub_disk_get_size (dev->disk) == GRUB_DISK_SIZE_UNKNOWN ?
+    VDEV_LABELS / 2 : VDEV_LABELS;
+
+  /* Size in bytes of the device (disk or partition) aligned to label size */
+  grub_uint64_t device_size =
+       grub_disk_get_size (dev->disk)<<  GRUB_DISK_SECTOR_BITS;
+  const grub_uint64_t alignedbytes =
+    P2ALIGN (device_size, (grub_uint64_t) sizeof (vdev_label_t));
+
+  for (label = 0; label<  vdevnum; label++)
      {
-      grub_zfs_endian_t ub_endian = UNKNOWN_ENDIAN;
-      grub_dprintf ("zfs", "label %d\n", label);
-
-      data->vdev_phys_sector
-       = label * (sizeof (vdev_label_t)>>  SPA_MINBLOCKSHIFT)
-       + ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>>  SPA_MINBLOCKSHIFT)
-       + (label<  VDEV_LABELS / 2 ? 0 : grub_disk_get_size (dev->disk)
-          - VDEV_LABELS * (sizeof (vdev_label_t)>>  SPA_MINBLOCKSHIFT));
+      grub_uint64_t labelstartbytes = vdev_label_start (alignedbytes, label);
+      grub_uint64_t labelstart = labelstartbytes>>  GRUB_DISK_SECTOR_BITS;
+
+      grub_dprintf ("zfs", "reading label %d at sector %llu (byte %llu)\n",
+                   label, (unsigned long long) labelstart,
+                   (unsigned long long) labelstartbytes);
+
+      data->vdev_phys_sector = labelstart +
+               ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE)>>  
GRUB_DISK_SECTOR_BITS);
+
+      err = check_pool_label (data);
+      if (err)
+       {
+         grub_dprintf ("zfs", "error checking label %d\n", label);
+         grub_errno = GRUB_ERR_NONE;
+         continue;
+       }
This seems like a lot of moving around but I don't see any real change other than aligning the size down.
        /* Read in the uberblock ring (128K). */
-      err = grub_disk_read (data->disk, data->vdev_phys_sector
-                           + (VDEV_PHYS_SIZE>>  SPA_MINBLOCKSHIFT),
-                           0, VDEV_UBERBLOCK_RING, (char *) ub_array);
-      if (err)
-       {
-         grub_errno = GRUB_ERR_NONE;
-         continue;
-       }
-      grub_dprintf ("zfs", "label ok %d\n", label);
-
-      ubbest = find_bestub (ub_array, data->vdev_phys_sector);
-      if (!ubbest)
-       {
-         grub_dprintf ("zfs", "No uberblock found\n");
-         grub_errno = GRUB_ERR_NONE;
-         continue;
-       }
-      ub_endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_magic,
-                                    LITTLE_ENDIAN) == UBERBLOCK_MAGIC
-                  ? LITTLE_ENDIAN : BIG_ENDIAN);
-      err = zio_read (&ubbest->ubp_uberblock.ub_rootbp,
-                     ub_endian,
-               &osp,&ospsize, data);
-      if (err)
-       {
-         grub_dprintf ("zfs", "couldn't zio_read\n");
-         grub_errno = GRUB_ERR_NONE;
-         continue;
-       }
-
-      if (ospsize<  OBJSET_PHYS_SIZE_V14)
-       {
-         grub_dprintf ("zfs", "osp too small\n");
-         grub_free (osp);
-         continue;
-       }
-      grub_dprintf ("zfs", "ubbest %p\n", ubbest);
-
-      err = check_pool_label (data);
-      if (err)
-       {
-         grub_errno = GRUB_ERR_NONE;
-         continue;
-       }
-#if 0
-      if (find_best_root&&
-         vdev_uberblock_compare (&ubbest->ubp_uberblock,
-                               &(current_uberblock))<= 0)
-       continue;
-#endif
-      /* Got the MOS. Save it at the memory addr MOS. */
-      grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode,
-                   DNODE_SIZE);
-      data->mos.endian = (grub_zfs_to_cpu64 (ubbest->ubp_uberblock.ub_rootbp.blk_prop, 
ub_endian)>>  63)&  1;
-      grub_memmove (&(data->current_uberblock),
-               &ubbest->ubp_uberblock, sizeof (uberblock_t));
-      grub_free (ub_array);
-      grub_free (bh);
-      grub_free (osp);
-      return data;
+      err = grub_disk_read (data->disk,
+                           data->vdev_phys_sector +
+                           (VDEV_PHYS_SIZE>>  GRUB_DISK_SECTOR_BITS),
+                           0, VDEV_UBERBLOCK_RING, ub_array);
+      if (err)
+       {
+         grub_dprintf ("zfs", "error reading uberblock ring for label %d\n", 
label);
+         grub_errno = GRUB_ERR_NONE;
+         continue;
+       }
+
+      ubcur = find_bestub (ub_array, data);
+      if (!ubcur)
+       {
+         grub_dprintf ("zfs", "No good uberblocks found in label %d\n", label);
+         grub_errno = GRUB_ERR_NONE;
+         continue;
+       }
+
+      if (vdev_uberblock_compare (ubcur, ubbest)>  0)
+       {
+         /* Looks like the block is good, so use it. */
+         grub_memcpy (ubbest, ubcur, sizeof (*ubbest));
+         bestlabel = label;
+         grub_dprintf ("zfs", "Current best uberblock found in label %d\n", 
label);
+       }
      }
-  grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid label");
-  zfs_unmount (data);
    grub_free (ub_array);
-  grub_free (bh);
+
+  /*
+   * We zero'd the structure to begin with.  If we never assigned to it,
+   * magic will still be zero.
+   */
+  if (!ubbest->ub_magic)
+    {
+      grub_error (GRUB_ERR_BAD_FS, "couldn't find a valid ZFS label");
+      zfs_unmount (data);
+      grub_free (ubbest);
+      return 0;
+    }
+
+  grub_dprintf ("zfs", "ubbest %p in label %d\n", ubbest, bestlabel);
+
+  grub_zfs_endian_t ub_endian =
+    grub_zfs_to_cpu64 (ubbest->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
+    ? LITTLE_ENDIAN : BIG_ENDIAN;
+  err = zio_read (&ubbest->ub_rootbp, ub_endian,&osp,&ospsize, data);
+
+  if (err)
+    {
+      grub_error (GRUB_ERR_BAD_FS, "couldn't zio_read object directory");
+      zfs_unmount (data);
+      grub_free (ubbest);
+      return 0;
+    }
+
+  if (ospsize<  OBJSET_PHYS_SIZE_V14)
+    {
+      grub_error (GRUB_ERR_BAD_FS, "osp too small");
+      zfs_unmount (data);
+      grub_free (osp);
+      grub_free (ubbest);
+      return 0;
+    }
+
+  /* Got the MOS. Save it at the memory addr MOS. */
+  grub_memmove (&(data->mos.dn),&((objset_phys_t *) osp)->os_meta_dnode, 
DNODE_SIZE);
+  data->mos.endian =
+               (grub_zfs_to_cpu64 (ubbest->ub_rootbp.blk_prop, ub_endian)>>  
63)&  1;
+  grub_memmove (&(data->current_uberblock), ubbest, sizeof (uberblock_t));
+
    grub_free (osp);
+  grub_free (ubbest);

-  return 0;
+  return (data);
  }

  grub_err_t
@@ -2149,33 +2263,18 @@
  static grub_err_t
  zfs_uuid (grub_device_t device, char **uuid)
  {
-  char *nvlist;
-  int found;
    struct grub_zfs_data *data;
-  grub_uint64_t guid;
-  grub_err_t err;
-
-  *uuid = 0;

    data = zfs_mount (device);
    if (! data)
      return grub_errno;

-  err = zfs_fetch_nvlist (data,&nvlist);
-  if (err)
-    {
-      zfs_unmount (data);
-      return err;
-    }
-
-  found = grub_zfs_nvlist_lookup_uint64 (nvlist, ZPOOL_CONFIG_POOL_GUID,&guid);
-  if (! found)
-    return grub_errno;
-  grub_free (nvlist);
-  *uuid = grub_xasprintf ("%016llx", (long long unsigned) guid);
+  *uuid = grub_xasprintf ("%016llx", (long long unsigned) data->pool_guid);
    zfs_unmount (data);
+
    if (! *uuid)
      return grub_errno;
+
    return GRUB_ERR_NONE;
  }


=== modified file 'include/grub/zfs/uberblock_impl.h'
--- include/grub/zfs/uberblock_impl.h   2010-12-01 21:55:26 +0000
+++ include/grub/zfs/uberblock_impl.h   2012-01-19 11:28:09 +0000
@@ -23,6 +23,8 @@
  #ifndef _SYS_UBERBLOCK_IMPL_H
  #define       _SYS_UBERBLOCK_IMPL_H

+#define UBMAX(a,b) ((a)>  (b) ? (a) : (b))
+
  /*
   * The uberblock version is incremented whenever an incompatible on-disk
   * format change is made to the SPA, DMU, or ZAP.
@@ -45,16 +47,10 @@
        blkptr_t        ub_rootbp;      /* MOS objset_phys_t            */
  } uberblock_t;

-#define        UBERBLOCK_SIZE          (1ULL<<  UBERBLOCK_SHIFT)
-#define        VDEV_UBERBLOCK_SHIFT    UBERBLOCK_SHIFT
-
-/* XXX Uberblock_phys_t is no longer in the kernel zfs */
-typedef struct uberblock_phys {
-       uberblock_t     ubp_uberblock;
-       char            ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) -
-                               sizeof (zio_eck_t)];
-       zio_eck_t       ubp_zec;
-} uberblock_phys_t;
-
+#define        VDEV_UBERBLOCK_SHIFT(as)        UBMAX(as, UBERBLOCK_SHIFT)
+#define        UBERBLOCK_SIZE(as)              (1ULL<<  
VDEV_UBERBLOCK_SHIFT(as))
+
+/* Number of uberblocks that can fit in the ring at a given ashift */
+#define UBERBLOCK_COUNT(as) (VDEV_UBERBLOCK_RING>>  VDEV_UBERBLOCK_SHIFT(as))

  #endif        /* _SYS_UBERBLOCK_IMPL_H */



--
Regards
Vladimir 'φ-coder/phcoder' Serbinenko

Attachment: zfs.diff
Description: Text Data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]