diff --git a/conf/common.rmk b/conf/common.rmk index 0e63da6..b33c528 100644 --- a/conf/common.rmk +++ b/conf/common.rmk @@ -38,7 +38,7 @@ grub_fstest_SOURCES = util/grub-fstest.c util/hostfs.c util/misc.c \ fs/affs.c fs/cpio.c fs/fat.c fs/ext2.c fs/hfs.c \ fs/hfsplus.c fs/iso9660.c fs/udf.c fs/jfs.c fs/minix.c \ fs/ntfs.c fs/ntfscomp.c fs/reiserfs.c fs/sfs.c \ - fs/ufs.c fs/xfs.c fs/afs.c fs/tar.c \ + fs/ufs.c fs/xfs.c fs/afs.c fs/tar.c \ \ kern/partition.c partmap/pc.c partmap/apple.c partmap/sun.c \ partmap/gpt.c \ @@ -167,7 +167,7 @@ CLEANFILES += grub-dumpbios pkglib_MODULES += fshelp.mod fat.mod ufs.mod ext2.mod ntfs.mod \ ntfscomp.mod minix.mod hfs.mod jfs.mod iso9660.mod xfs.mod \ affs.mod sfs.mod hfsplus.mod reiserfs.mod cpio.mod tar.mod \ - udf.mod afs.mod + udf.mod afs.mod zfs.mod # For fshelp.mod. fshelp_mod_SOURCES = fs/fshelp.c @@ -179,6 +179,11 @@ fat_mod_SOURCES = fs/fat.c fat_mod_CFLAGS = $(COMMON_CFLAGS) fat_mod_LDFLAGS = $(COMMON_LDFLAGS) +# For zfs.mod. +zfs_mod_SOURCES = fs/zfs.c fs/zfs_lzjb.c fs/zfs_sha256.c fs/zfs_fletcher.c +zfs_mod_CFLAGS = $(COMMON_CFLAGS) +zfs_mod_LDFLAGS = $(COMMON_LDFLAGS) + # For ufs.mod. ufs_mod_SOURCES = fs/ufs.c ufs_mod_CFLAGS = $(COMMON_CFLAGS) diff --git a/conf/i386-pc.rmk b/conf/i386-pc.rmk index d74a5fa..d82d7b0 100644 --- a/conf/i386-pc.rmk +++ b/conf/i386-pc.rmk @@ -147,7 +147,8 @@ grub_emu_SOURCES = commands/minicmd.c commands/cat.c commands/cmp.c \ fs/affs.c fs/cpio.c fs/fat.c fs/ext2.c fs/hfs.c \ fs/hfsplus.c fs/iso9660.c fs/udf.c fs/jfs.c fs/minix.c \ fs/ntfs.c fs/ntfscomp.c fs/reiserfs.c fs/sfs.c \ - fs/ufs.c fs/xfs.c fs/afs.c fs/tar.c \ + fs/ufs.c fs/xfs.c fs/zfs.c fs/zfs_lzjb.c fs/zfs_sha256.c \ + fs/zfs_fletcher.c fs/afs.c fs/tar.c \ \ util/console.c util/hostfs.c util/grub-emu.c util/misc.c \ util/hostdisk.c util/getroot.c \ diff --git a/fs/zfs.c b/fs/zfs.c new file mode 100644 index 0000000..be801b6 --- /dev/null +++ b/fs/zfs.c @@ -0,0 +1,1787 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The zfs plug-in routines for GRUB are: + * + * zfs_mount() - locates a valid uberblock of the root pool and reads + * in its MOS at the memory address MOS. + * + * zfs_open() - locates a plain file object by following the MOS + * and places its dnode at the memory address DNODE. + * + * zfs_read() - read in the data blocks pointed by the DNODE. + * + * ZFS_SCRATCH is used as a working area. + * + * (memory addr) MOS DNODE ZFS_SCRATCH + * | | | + * +-------V---------V----------V---------------+ + * memory | | dnode | dnode | scratch | + * | | 512B | 512B | area | + * +--------------------------------------------+ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAXPATHLEN 1024 +#define MAXNAMELEN 1024 + +#define ZPOOL_PROP_BOOTFS "bootfs" + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +/* + * For nvlist manipulation. (from nvpair.h) + */ +#define NV_ENCODE_NATIVE 0 +#define NV_ENCODE_XDR 1 +#define HOST_ENDIAN 1 /* for x86 machine */ +#define DATA_TYPE_UINT64 8 +#define DATA_TYPE_STRING 9 +#define DATA_TYPE_NVLIST 19 +#define DATA_TYPE_NVLIST_ARRAY 20 + +#ifndef GRUB_UTIL +static grub_dl_t my_mod; +#endif + +/* + * Verify dnode type. + * Can only be used in functions returning non-0 for failure. + */ +#define VERIFY_DN_TYPE(dnp, type) \ + if (type && (dnp)->dn_type != type) { \ + return (GRUB_ERR_BAD_FS); \ + } + +#define P2PHASE(x, align) ((x) & ((align) - 1)) +#define DVA_OFFSET_TO_PHYS_SECTOR(offset) \ + ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT) + +/* + * FAT ZAP data structures + */ +#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ +#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n)))) +#define CHAIN_END 0xffff /* end of the chunk chain */ + +/* + * The amount of space within the chunk available for the array is: + * chunk size - space for type (1) - space for next pointer (2) + */ +#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3) + +#define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5) +#define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs)) +#define LEAF_HASH(bs, h) \ + ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \ + ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len))) + +/* + * The amount of space available for chunks is: + * block size shift - hash entry size (2) * number of hash + * entries - header space (2*chunksize) + */ +#define ZAP_LEAF_NUMCHUNKS(bs) \ + (((1<l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx] +#define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry) + + +/* + * Decompression Entry - lzjb + */ +#ifndef NBBY +#define NBBY 8 +#endif + +extern void fletcher_2_native(const void *, grub_uint64_t, zio_cksum_t *); +extern void fletcher_2_byteswap(const void *, grub_uint64_t, zio_cksum_t *); +extern void fletcher_4_native(const void *, grub_uint64_t, zio_cksum_t *); +extern void fletcher_4_byteswap(const void *, grub_uint64_t, zio_cksum_t *); +extern int lzjb_decompress(void *, void *, grub_size_t, grub_size_t); +extern void zio_checksum_SHA256(const void *, grub_uint64_t, zio_cksum_t *); + +typedef int zfs_decomp_func_t(void *s_start, void *d_start, grub_size_t s_len, + grub_size_t d_len); +typedef struct decomp_entry { + char *name; + zfs_decomp_func_t *decomp_func; +} decomp_entry_t; + +struct grub_zfs_data +{ + /* cache for a file block of the currently zfs_open()-ed file */ + void *file_buf; + char current_rootpool[MAXNAMELEN]; + grub_uint64_t file_start; + grub_uint64_t file_end; + +/* cache for a dnode block */ + dnode_phys_t *dnode_buf; + dnode_phys_t *dnode_mdn; + grub_uint64_t dnode_start; + grub_uint64_t dnode_end; + + uberblock_t current_uberblock; + char *stackbase; + grub_disk_t disk; + + dnode_phys_t mos; + dnode_phys_t dnode; + + char current_bootfs[MAXNAMELEN]; + unsigned long long current_bootfs_obj; + char current_bootpath[MAXPATHLEN]; + char current_devid[MAXPATHLEN]; + int is_zfs_mount; + int filepos; + int filemax; + + char scratch[2000000]; +}; + +decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = +{ + {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */ + {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ + {"off", 0}, /* ZIO_COMPRESS_OFF */ + {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ + {"empty", 0} /* ZIO_COMPRESS_EMPTY */ +}; + +static int zio_read_data(blkptr_t *bp, void *buf, char *stack, + struct grub_zfs_data *data); + +/* + * Our own version of bcmp(). + */ +static int +zfs_bcmp(const void *s1, const void *s2, grub_size_t n) +{ + const grub_uint8_t *ps1 = s1; + const grub_uint8_t *ps2 = s2; + + if (s1 != s2 && n != 0) { + do { + if (*ps1++ != *ps2++) + return (1); + } while (--n != 0); + } + + return (0); +} + +/* + * Our own version of log2(). Same thing as highbit()-1. + */ +static int +zfs_log2(grub_uint64_t num) +{ + int i = 0; + + while (num > 1) { + i++; + num = num >> 1; + } + + return (i); +} + +/* Checksum Functions */ +static void +zio_checksum_off(const void *buf __attribute__ ((unused)), + grub_uint64_t size __attribute__ ((unused)), + zio_cksum_t *zcp) +{ + ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); +} + +/* Checksum Table and Values */ +zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { + { {NULL, NULL}, 0, 0, "inherit"}, + { {NULL, NULL}, 0, 0, "on"}, + { {zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, + { {zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, + { {zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, + { {fletcher_2_native, fletcher_2_byteswap}, 0, 1, "zilog"}, + { {fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, + { {fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, + { {zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, +}; + +/* + * zio_checksum_verify: Provides support for checksum verification. + * + * Fletcher2, Fletcher4, and SHA256 are supported. + * + * Return: + * -1 = Failure + * 0 = Success + */ +static int +zio_checksum_verify(blkptr_t *bp, char *buf, int size) +{ + zio_cksum_t zc = bp->blk_cksum; + grub_uint32_t checksum = BP_GET_CHECKSUM(bp); + int byteswap = BP_SHOULD_BYTESWAP(bp); + zio_block_tail_t *zbt = (zio_block_tail_t *)(buf + size) - 1; + zio_checksum_info_t *ci = &zio_checksum_table[checksum]; + zio_cksum_t actual_cksum, expected_cksum; + + /* byteswap is not supported */ + if (byteswap) + return (-1); + + if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) + return (-1); + + if (ci->ci_zbt) { + expected_cksum = zbt->zbt_cksum; + zbt->zbt_cksum = zc; + ci->ci_func[0](buf, size, &actual_cksum); + zbt->zbt_cksum = expected_cksum; + zc = expected_cksum; + + } else { + ci->ci_func[byteswap](buf, size, &actual_cksum); + } + + if ((actual_cksum.zc_word[0] - zc.zc_word[0]) | + (actual_cksum.zc_word[1] - zc.zc_word[1]) | + (actual_cksum.zc_word[2] - zc.zc_word[2]) | + (actual_cksum.zc_word[3] - zc.zc_word[3])) + return (-1); + + return (0); +} + +/* + * vdev_label_offset takes "offset" (the offset within a vdev_label) and + * returns its physical disk offset (starting from the beginning of the vdev). + * + * Input: + * psize : Physical size of this vdev + * l : Label Number (0-3) + * offset : The offset with a vdev_label in which we want the physical + * address + * Return: + * Success : physical disk offset + * Failure : grub_errno = GRUB_ERR_BAD_ARGUMENT, return value is meaningless + */ +static grub_uint64_t +vdev_label_offset(grub_uint64_t psize, int l, grub_uint64_t offset) +{ + /* XXX Need to add back label support! */ + if (l >= VDEV_LABELS/2 || offset > sizeof (vdev_label_t)) { + grub_errno = GRUB_ERR_BAD_ARGUMENT; + return (0); + } + + return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? + 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); + +} + +/* + * vdev_uberblock_compare takes two uberblock structures and returns an integer + * indicating the more recent of the two. + * Return Value = 1 if ub2 is more recent + * Return Value = -1 if ub1 is more recent + * The most recent uberblock is determined using its transaction number and + * timestamp. The uberblock with the highest transaction number is + * considered "newer". If the transaction numbers of the two blocks match, the + * timestamps are compared to determine the "newer" of the two. + */ +static int +vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) +{ + if (ub1->ub_txg < ub2->ub_txg) + return (-1); + if (ub1->ub_txg > ub2->ub_txg) + return (1); + + if (ub1->ub_timestamp < ub2->ub_timestamp) + return (-1); + if (ub1->ub_timestamp > ub2->ub_timestamp) + return (1); + + return (0); +} + +/* + * Three pieces of information are needed to verify an uberblock: the magic + * number, the version number, and the checksum. + * + * Currently Implemented: version number, magic number + * Need to Implement: checksum + * + * Return: + * 0 - Success + * -1 - Failure + */ +static int +uberblock_verify(uberblock_phys_t *ub, int offset) +{ + + uberblock_t *uber = &ub->ubp_uberblock; + blkptr_t bp; + + BP_ZERO(&bp); + BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); + BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); + ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); + + if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) + return (-1); + + if (uber->ub_magic == UBERBLOCK_MAGIC && + uber->ub_version > 0 && uber->ub_version <= SPA_VERSION) + return (0); + + return (-1); +} + +/* + * Find the best uberblock. + * Return: + * Success - Pointer to the best uberblock. + * Failure - NULL + */ +static uberblock_phys_t * +find_bestub(uberblock_phys_t *ub_array, int label) +{ + uberblock_phys_t *ubbest = NULL; + int i, offset; + + for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { + offset = vdev_label_offset(0, label, VDEV_UBERBLOCK_OFFSET(i)); + if (grub_errno == GRUB_ERR_BAD_ARGUMENT) + return (NULL); + if (uberblock_verify(&ub_array[i], offset) == 0) { + if (ubbest == NULL) { + ubbest = &ub_array[i]; + } else if (vdev_uberblock_compare( + &(ub_array[i].ubp_uberblock), + &(ubbest->ubp_uberblock)) > 0) { + ubbest = &ub_array[i]; + } + } + } + + return (ubbest); +} + +/* + * Read a block of data based on the gang block address dva, + * and put its data in buf. + * + * Return: + * 0 - success + * 1 - failure + */ +static int +zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack, + struct grub_zfs_data *data) +{ + zio_gbh_phys_t *zio_gb; + grub_uint64_t offset, sector; + blkptr_t tmpbp; + unsigned i; + + zio_gb = (zio_gbh_phys_t *)stack; + stack += SPA_GANGBLOCKSIZE; + offset = DVA_GET_OFFSET(dva); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + + /* read in the gang block header */ + if (grub_disk_read(data->disk,sector, 0, SPA_GANGBLOCKSIZE, + (char *)zio_gb)) { + grub_printf("failed to read in a gang block header\n"); + return (1); + } + + /* self checksuming the gang block header */ + BP_ZERO(&tmpbp); + BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER); + BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER); + ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva), + DVA_GET_OFFSET(dva), bp->blk_birth, 0); + if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) { + grub_printf("failed to checksum a gang block header\n"); + return (1); + } + + for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { + if (zio_gb->zg_blkptr[i].blk_birth == 0) + continue; + + if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack, data)) + return (1); + buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]); + } + + return (0); +} + +/* + * Read in a block of raw data to buf. + * + * Return: + * 0 - success + * 1 - failure + */ +static int +zio_read_data(blkptr_t *bp, void *buf, char *stack, + struct grub_zfs_data *data) +{ + int i, psize; + + psize = BP_GET_PSIZE(bp); + + /* pick a good dva from the block pointer */ + for (i = 0; i < SPA_DVAS_PER_BP; i++) { + grub_uint64_t offset, sector; + + if (bp->blk_dva[i].dva_word[0] == 0 && + bp->blk_dva[i].dva_word[1] == 0) + continue; + + if (DVA_GET_GANG(&bp->blk_dva[i])) { + if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack, data) == 0) + return (0); + } else { + /* read in a data block */ + offset = DVA_GET_OFFSET(&bp->blk_dva[i]); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + if (!grub_disk_read(data->disk, sector, 0, psize, buf)) + return (0); + } + } + + return (1); +} + +/* + * Read in a block of data, verify its checksum, decompress if needed, + * and put the uncompressed data in buf. + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +zio_read(blkptr_t *bp, void *buf, char *stack, struct grub_zfs_data *data) +{ + int lsize, psize, comp; + char *retbuf; + + comp = BP_GET_COMPRESS(bp); + lsize = BP_GET_LSIZE(bp); + psize = BP_GET_PSIZE(bp); + + if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || + (comp != ZIO_COMPRESS_OFF && + decomp_table[comp].decomp_func == NULL)) { + grub_printf("compression algorithm not supported\n"); + return (GRUB_ERR_BAD_FS); + } + + if ((char *)buf < stack && ((char *)buf) + lsize > stack) { + grub_printf("not enough memory allocated\n"); + return (GRUB_ERR_OUT_OF_MEMORY); + } + + retbuf = buf; + if (comp != ZIO_COMPRESS_OFF) { + buf = stack; + stack += psize; + } + + if (zio_read_data(bp, buf, stack, data)) { + grub_printf("zio_read_data failed\n"); + return (GRUB_ERR_BAD_FS); + } + + if (zio_checksum_verify(bp, buf, psize) != 0) { + grub_printf("checksum verification failed\n"); + return (GRUB_ERR_BAD_FS); + } + + if (comp != ZIO_COMPRESS_OFF) + decomp_table[comp].decomp_func(buf, retbuf, psize, lsize); + + return (0); +} + +/* + * Get the block from a block id. + * push the block onto the stack. + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +dmu_read(dnode_phys_t *dn, grub_uint64_t blkid, void *buf, char *stack, + struct grub_zfs_data *data) +{ + int idx, level; + blkptr_t *bp_array = dn->dn_blkptr; + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + blkptr_t *bp, *tmpbuf; + + bp = (blkptr_t *)stack; + stack += sizeof (blkptr_t); + + tmpbuf = (blkptr_t *)stack; + stack += 1<dn_indblkshift; + + for (level = dn->dn_nlevels - 1; level >= 0; level--) { + idx = (blkid >> (epbs * level)) & ((1<dn_datablkszsec << SPA_MINBLOCKSHIFT); + break; + } else if ((grub_errno = zio_read(bp, tmpbuf, stack, data))) { + return (grub_errno); + } + + bp_array = tmpbuf; + } + + return (0); +} + +/* + * mzap_lookup: Looks up property described by "name" and returns the value + * in "value". + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name, + grub_uint64_t *value) +{ + int i, chunks; + mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; + + chunks = objsize/MZAP_ENT_LEN - 1; + for (i = 0; i < chunks; i++) + grub_dprintf ("zfs", "mzap: %d:%s\n", i, mzap_ent[i].mze_name); + for (i = 0; i < chunks; i++) { + + if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { + *value = mzap_ent[i].mze_value; + return (0); + } + } + + return (GRUB_ERR_BAD_FS); +} + +static grub_uint64_t +zap_hash(grub_uint64_t salt, const char *name) +{ + static grub_uint64_t table[256]; + const grub_uint8_t *cp; + grub_uint8_t c; + grub_uint64_t crc = salt; + + if (table[128] == 0) { + grub_uint64_t *ct; + int i, j; + for (i = 0; i < 256; i++) { + for (ct = table + i, *ct = i, j = 8; j > 0; j--) + *ct = (*ct >> 1) ^ (-(*ct & 1) & + ZFS_CRC64_POLY); + } + } + + if (crc == 0 || table[128] != ZFS_CRC64_POLY) { + grub_errno = GRUB_ERR_BAD_FS; + return (0); + } + + for (cp = (const grub_uint8_t *)name; (c = *cp) != '\0'; cp++) + crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF]; + + /* + * Only use 28 bits, since we need 4 bits in the cookie for the + * collision differentiator. We MUST use the high bits, since + * those are the onces that we first pay attention to when + * chosing the bucket. + */ + crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); + + return (crc); +} + +/* + * Only to be used on 8-bit arrays. + * array_len is actual len in bytes (not encoded le_value_length). + * buf is null-terminated. + */ +static int +zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk, + int array_len, const char *buf) +{ + int bseen = 0; + + while (bseen < array_len) { + struct zap_leaf_array *la = + &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array; + int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES); + + if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) + return (0); + + if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0) + break; + chunk = la->la_next; + bseen += toread; + } + return (bseen == array_len); +} + +/* + * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the + * value for the property "name". + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, grub_uint64_t h, + const char *name, grub_uint64_t *value) +{ + grub_uint16_t chunk; + struct zap_leaf_entry *le; + + /* Verify if this is a valid leaf block */ + if (l->l_hdr.lh_block_type != ZBT_LEAF) + return (GRUB_ERR_BAD_FS); + if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC) + return (GRUB_ERR_BAD_FS); + + for (chunk = l->l_hash[LEAF_HASH(blksft, h)]; + chunk != CHAIN_END; chunk = le->le_next) { + + if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) + return (GRUB_ERR_BAD_FS); + + le = ZAP_LEAF_ENTRY(l, blksft, chunk); + + /* Verify the chunk entry */ + if (le->le_type != ZAP_CHUNK_ENTRY) + return (GRUB_ERR_BAD_FS); + + if (le->le_hash != h) + continue; + + if (zap_leaf_array_equal(l, blksft, le->le_name_chunk, + le->le_name_length, name)) { + + struct zap_leaf_array *la; + grub_uint8_t *ip; + + if (le->le_int_size != 8 || le->le_value_length != 1) + return (GRUB_ERR_BAD_FS); + + /* get the uint64_t property value */ + la = &ZAP_LEAF_CHUNK(l, blksft, + le->le_value_chunk).l_array; + ip = la->la_array; + + *value = (grub_uint64_t)ip[0] << 56 + | (grub_uint64_t)ip[1] << 48 + | (grub_uint64_t)ip[2] << 40 + | (grub_uint64_t)ip[3] << 32 + | (grub_uint64_t)ip[4] << 24 + | (grub_uint64_t)ip[5] << 16 + | (grub_uint64_t)ip[6] << 8 + | (grub_uint64_t)ip[7]; + + return (0); + } + } + + return (GRUB_ERR_BAD_FS); +} + +/* + * Fat ZAP lookup + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, + char *name, grub_uint64_t *value, char *stack, + struct grub_zfs_data *data) +{ + zap_leaf_phys_t *l; + grub_uint64_t hash, idx, blkid; + int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT); + + /* Verify if this is a fat zap header block */ + if (zap->zap_magic != (grub_uint64_t)ZAP_MAGIC) + return (GRUB_ERR_BAD_FS); + + hash = zap_hash(zap->zap_salt, name); + if (grub_errno) + return (grub_errno); + + /* get block id from index */ + if (zap->zap_ptrtbl.zt_numblks != 0) { + /* external pointer tables not supported */ + return (GRUB_ERR_BAD_FS); + } + idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift); + blkid = ((grub_uint64_t *)zap)[idx + (1<<(blksft-3-1))]; + + /* Get the leaf block */ + l = (zap_leaf_phys_t *)stack; + stack += 1<dn_datablkszsec << SPA_MINBLOCKSHIFT; + stack += size; + if ((grub_errno = dmu_read(zap_dnode, 0, zapbuf, stack, data))) + return (grub_errno); + + block_type = *((grub_uint64_t *)zapbuf); + + if (block_type == ZBT_MICRO) { + return (mzap_lookup(zapbuf, size, name, val)); + } else if (block_type == ZBT_HEADER) { + /* this is a fat zap */ + return (fzap_lookup(zap_dnode, zapbuf, name, + val, stack, data)); + } + + return (GRUB_ERR_BAD_FS); +} + +/* + * Get the dnode of an object number from the metadnode of an object set. + * + * Input + * mdn - metadnode to get the object dnode + * objnum - object number for the object dnode + * buf - data buffer that holds the returning dnode + * stack - scratch area + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +dnode_get(dnode_phys_t *mdn, grub_uint64_t objnum, grub_uint8_t type, + dnode_phys_t *buf, char *stack, struct grub_zfs_data *data) +{ + grub_uint64_t blkid, blksz; /* the block id this object dnode is in */ + int epbs; /* shift of number of dnodes in a block */ + int idx; /* index within a block */ + dnode_phys_t *dnbuf; + + blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT; + epbs = zfs_log2(blksz) - DNODE_SHIFT; + blkid = objnum >> epbs; + idx = objnum & ((1<dnode_buf != NULL && data->dnode_mdn == mdn && + objnum >= data->dnode_start && objnum < data->dnode_end) { + grub_memmove(buf, &(data->dnode_buf)[idx], DNODE_SIZE); + VERIFY_DN_TYPE(buf, type); + return (0); + } + + if (data->dnode_buf && blksz == 1<dnode_buf; + data->dnode_mdn = mdn; + data->dnode_start = blkid << epbs; + data->dnode_end = (blkid + 1) << epbs; + } else { + dnbuf = (dnode_phys_t *)stack; + stack += blksz; + } + + if ((grub_errno = dmu_read(mdn, blkid, (char *)dnbuf, stack, data))) + return (grub_errno); + + grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); + VERIFY_DN_TYPE(buf, type); + + return (0); +} + +#if 0 + +/* + * Check if this is a special file that resides at the top + * dataset of the pool. Currently this is the GRUB menu, + * boot signature and boot signature backup. + * str starts with '/'. + */ +static int +is_top_dataset_file(char *str) +{ + char *tptr; + + if ((tptr = grub_strstr(str, "menu.lst")) && + (tptr[8] == '\0' || tptr[8] == ' ') && + *(tptr-1) == '/') + return (1); + + if (grub_strncmp(str, BOOTSIGN_DIR"/", + grub_strlen(BOOTSIGN_DIR) + 1) == 0) + return (1); + + if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0) + return (1); + + return (0); +} +#endif + +/* + * Get the file dnode for a given file name where mdn is the meta dnode + * for this ZFS object set. When found, place the file dnode in dn. + * The 'path' argument will be mangled. + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn, + char *stack, struct grub_zfs_data *data) +{ + grub_uint64_t objnum, version; + char *cname, ch; + + if ((grub_errno = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, + dn, stack, data))) + return (grub_errno); + + if ((grub_errno = zap_lookup(dn, ZPL_VERSION_STR, &version, + stack, data))) + return (grub_errno); + if (version > ZPL_VERSION) + return (-1); + + if ((grub_errno = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack, data))) + return (grub_errno); + + if ((grub_errno = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, + dn, stack, data))) + return (grub_errno); + + /* skip leading slashes */ + while (*path == '/') + path++; + + while (*path && ! grub_isspace(*path)) { + + /* get the next component name */ + cname = path; + while (*path && ! grub_isspace(*path) && *path != '/') + path++; + ch = *path; + *path = 0; /* ensure null termination */ + + if ((grub_errno = zap_lookup(dn, cname, &objnum, stack, data))) + { + grub_dprintf ("zfs", "zap lookup %s failed", cname); + return (grub_errno); + } + + objnum = ZFS_DIRENT_OBJ(objnum); + if ((grub_errno = dnode_get(mdn, objnum, 0, dn, stack, data))) + return (grub_errno); + + *path = ch; + while (*path == '/') + path++; + } + + /* We found the dnode for this file. Verify if it is a plain file. */ + VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS); + + return (0); +} + +/* + * Get the default 'bootfs' property value from the rootpool. + * + * Return: + * 0 - success + * grub_errno -failure + */ +static int +get_default_bootfsobj(dnode_phys_t *mosmdn, grub_uint64_t *obj, char *stack, + struct grub_zfs_data *data) +{ + grub_uint64_t objnum = 0; + dnode_phys_t *dn = (dnode_phys_t *)stack; + stack += DNODE_SIZE; + + if ((grub_errno = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack, data))) + return (grub_errno); + + /* + * find the object number for 'pool_props', and get the dnode + * of the 'pool_props'. + */ + if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack, data)) + return (GRUB_ERR_BAD_FS); + + if ((grub_errno = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, + stack, data))) + return (grub_errno); + + if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack, data)) + return (GRUB_ERR_BAD_FS); + + if (!objnum) + return (GRUB_ERR_BAD_FS); + + *obj = objnum; + return (0); +} + +/* + * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), + * e.g. pool/rootfs, or a given object number (obj), e.g. the object number + * of pool/rootfs. + * + * If no fsname and no obj are given, return the DSL_DIR metadnode. + * If fsname is given, return its metadnode and its matching object number. + * If only obj is given, return the metadnode for this object number. + * + * Return: + * 0 - success + * grub_errno - failure + */ +static int +get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, grub_uint64_t *obj, + dnode_phys_t *mdn, char *stack, struct grub_zfs_data *data) +{ + grub_uint64_t objnum, headobj; + char *cname, ch; + blkptr_t *bp; + objset_phys_t *osp; + int issnapshot = 0; + char *snapname; + + if (fsname == NULL && obj) { + headobj = *obj; + goto skip; + } + + if ((grub_errno = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, mdn, stack, data))) + return (grub_errno); + + if ((grub_errno = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, + stack, data))) + return (grub_errno); + + if ((grub_errno = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, + mdn, stack, data))) + return (grub_errno); + + if (fsname == NULL) { + headobj = + ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; + goto skip; + } + + /* take out the pool name */ + while (*fsname && ! grub_isspace(*fsname) && *fsname != '/') + fsname++; + + while (*fsname && ! grub_isspace(*fsname)) { + grub_uint64_t childobj; + + while (*fsname == '/') + fsname++; + + cname = fsname; + while (*fsname && ! grub_isspace(*fsname) && *fsname != '/') + fsname++; + ch = *fsname; + *fsname = 0; + + snapname = cname; + while (*snapname && ! grub_isspace(*snapname) + && *snapname != '@') + snapname++; + if (*snapname == '@') { + issnapshot = 1; + *snapname = 0; + } + childobj = + ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; + if ((grub_errno = dnode_get(mosmdn, childobj, + DMU_OT_DSL_DIR_CHILD_MAP, mdn, + stack, data))) + return (grub_errno); + + if (zap_lookup(mdn, cname, &objnum, stack, data)) + return (GRUB_ERR_BAD_FS); + + if ((grub_errno = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, + mdn, stack, data))) + return (grub_errno); + + *fsname = ch; + if (issnapshot) + *snapname = '@'; + } + headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; + if (obj) + *obj = headobj; + +skip: + if ((grub_errno = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, + mdn, stack, data))) + return (grub_errno); + if (issnapshot) { + grub_uint64_t snapobj; + + snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> + ds_snapnames_zapobj; + + if ((grub_errno = dnode_get(mosmdn, snapobj, + DMU_OT_DSL_DS_SNAP_MAP, + mdn, stack, data))) + return (grub_errno); + if (zap_lookup(mdn, snapname + 1, &headobj, stack, data)) + return (GRUB_ERR_BAD_FS); + if ((grub_errno = dnode_get(mosmdn, headobj, + DMU_OT_DSL_DATASET, mdn, + stack, data))) + return (grub_errno); + if (obj) + *obj = headobj; + } + + bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; + osp = (objset_phys_t *)stack; + stack += sizeof (objset_phys_t); + if ((grub_errno = zio_read(bp, osp, stack, data))) + return (grub_errno); + + grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); + + return (0); +} + +/* + * For a given XDR packed nvlist, verify the first 4 bytes and move on. + * + * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) : + * + * encoding method/host endian (4 bytes) + * nvl_version (4 bytes) + * nvl_nvflag (4 bytes) + * encoded nvpairs: + * encoded size of the nvpair (4 bytes) + * decoded size of the nvpair (4 bytes) + * name string size (4 bytes) + * name string data (sizeof(NV_ALIGN4(string)) + * data type (4 bytes) + * # of elements in the nvpair (4 bytes) + * data + * 2 zero's for the last nvpair + * (end of the entire list) (8 bytes) + * + * Return: + * 0 - success + * 1 - failure + */ +static int +nvlist_unpack(char *nvlist, char **out) +{ + /* Verify if the 1st and 2nd byte in the nvlist are valid. */ + if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) + return (1); + + nvlist += 4; + *out = nvlist; + return (0); +} + +static char * +nvlist_array(char *nvlist, int index) +{ + int i, encode_size; + + for (i = 0; i < index; i++) { + /* skip the header, nvl_version, and nvl_nvflag */ + nvlist = nvlist + 4 * 2; + + while ((encode_size = BSWAP_32(*(grub_uint32_t *)nvlist))) + nvlist += encode_size; /* goto the next nvpair */ + + nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ + } + + return (nvlist); +} + +static int +nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, + int *nelmp) +{ + int name_len, type, slen, encode_size; + char *nvpair, *nvp_name, *strval = val; + grub_uint64_t *intval = val; + + /* skip the header, nvl_version, and nvl_nvflag */ + nvlist = nvlist + 4 * 2; + + /* + * Loop thru the nvpair list + * The XDR representation of an integer is in big-endian byte order. + */ + while ((encode_size = BSWAP_32(*(grub_uint32_t *)nvlist))) { + + nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ + + name_len = BSWAP_32(*(grub_uint32_t *)nvpair); + nvpair += 4; + + nvp_name = nvpair; + nvpair = nvpair + ((name_len + 3) & ~3); /* align */ + + type = BSWAP_32(*(grub_uint32_t *)nvpair); + nvpair += 4; + + if ((grub_strncmp(nvp_name, name, name_len) == 0) && + type == valtype) { + int nelm; + + if ((nelm = BSWAP_32(*(grub_uint32_t *)nvpair)) < 1) + return (1); + nvpair += 4; + + switch (valtype) { + case DATA_TYPE_STRING: + slen = BSWAP_32(*(grub_uint32_t *)nvpair); + nvpair += 4; + grub_memmove(strval, nvpair, slen); + strval[slen] = '\0'; + return (0); + + case DATA_TYPE_UINT64: + *intval = BSWAP_64(*(grub_uint64_t *)nvpair); + return (0); + + case DATA_TYPE_NVLIST: + *(void **)val = (void *)nvpair; + return (0); + + case DATA_TYPE_NVLIST_ARRAY: + *(void **)val = (void *)nvpair; + if (nelmp) + *nelmp = nelm; + return (0); + } + } + + nvlist += encode_size; /* goto the next nvpair */ + } + + return (1); +} + +/* + * Check if this vdev is online and is in a good state. + */ +static int +vdev_validate(char *nv) +{ + grub_uint64_t ival; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, + DATA_TYPE_UINT64, NULL) == 0 || + nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, + DATA_TYPE_UINT64, NULL) == 0) + return (GRUB_ERR_BAD_FS); + + return (0); +} + +/* + * Get a list of valid vdev pathname from the boot device. + * The caller should already allocate MAXPATHLEN memory for bootpath and devid. + */ +int +vdev_get_bootpath(char *nv, grub_uint64_t inguid, char *devid, char *bootpath) +{ + char type[16]; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, + NULL)) + return (GRUB_ERR_BAD_FS); + + if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) { + grub_uint64_t guid; + + if (vdev_validate(nv) != 0) + return (GRUB_ERR_FILE_NOT_FOUND); + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID, + &guid, DATA_TYPE_UINT64, NULL) != 0) + return (GRUB_ERR_FILE_NOT_FOUND); + + if (guid != inguid) + return (GRUB_ERR_FILE_NOT_FOUND); + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, + bootpath, DATA_TYPE_STRING, NULL) != 0) + bootpath[0] = '\0'; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID, + devid, DATA_TYPE_STRING, NULL) != 0) + devid[0] = '\0'; + + if (grub_strlen(bootpath) >= MAXPATHLEN || + grub_strlen(devid) >= MAXPATHLEN) + return (GRUB_ERR_OUT_OF_MEMORY); + + return (0); + + } else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0) { + int nelm, i; + char *child; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child, + DATA_TYPE_NVLIST_ARRAY, &nelm)) + return (GRUB_ERR_BAD_FS); + + for (i = 0; i < nelm; i++) { + char *child_i; + + child_i = nvlist_array(child, i); + if (vdev_get_bootpath(child_i, inguid, devid, + bootpath) == 0) + return (0); + } + } + + return (GRUB_ERR_FILE_NOT_FOUND); +} + +/* + * Check the disk label information and retrieve needed vdev name-value pairs. + * + * Return: + * 0 - success + * GRUB_ERR_* - failure + */ +int +check_pool_label(int label, char *stack, char *outdevid, char *outpath, + struct grub_zfs_data *data) +{ + vdev_phys_t *vdev; + grub_uint64_t sector, pool_state, txg = 0; + char *nvlist, *nv; + grub_uint64_t diskguid; + grub_uint64_t version; + + sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE + + VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT; + + vdev = grub_malloc (VDEV_PHYS_SIZE); + /* Read in the vdev name-value pair list (112K). */ + if (grub_disk_read(data->disk, sector, 0, VDEV_PHYS_SIZE, vdev)) + { + grub_free (vdev); + return (GRUB_ERR_IO); + } + grub_dprintf ("zfs", "check 1 passed\n"); + + if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + grub_dprintf ("zfs", "check 2 passed\n"); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state, + DATA_TYPE_UINT64, NULL)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + grub_dprintf ("zfs", "check 3 passed\n"); + + if (pool_state == POOL_STATE_DESTROYED) + { + grub_free (vdev); + return (GRUB_ERR_FILE_NOT_FOUND); + } + grub_dprintf ("zfs", "check 4 passed\n"); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME, + data->current_rootpool, DATA_TYPE_STRING, NULL)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + + grub_dprintf ("zfs", "check 5 passed\n"); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg, + DATA_TYPE_UINT64, NULL)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + grub_dprintf ("zfs", "check 6 passed\n"); + + /* not an active device */ + if (txg == 0) + { + grub_free (vdev); + return (GRUB_ERR_FILE_NOT_FOUND); + } + grub_dprintf ("zfs", "check 7 passed\n"); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version, + DATA_TYPE_UINT64, NULL)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + grub_dprintf ("zfs", "check 8 passed\n"); + + if (version > SPA_VERSION) + { + grub_free (vdev); + return grub_error (GRUB_ERR_BAD_FS, "too new version"); + } + grub_dprintf ("zfs", "check 9 passed\n"); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, + DATA_TYPE_NVLIST, NULL)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + grub_dprintf ("zfs", "check 10 passed\n"); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, + DATA_TYPE_UINT64, NULL)) + { + grub_free (vdev); + return (GRUB_ERR_BAD_FS); + } + grub_dprintf ("zfs", "check 11 passed\n"); + + if (0 && vdev_get_bootpath(nv, diskguid, outdevid, outpath)) + { + grub_free (vdev); + return (GRUB_ERR_FILE_NOT_FOUND); + } + grub_dprintf ("zfs", "check 12 passed\n"); + grub_free (vdev); + + return (0); +} + +/* + * zfs_mount() locates a valid uberblock of the root pool and read in its MOS + * to the memory address MOS. + * + * Return: + * 1 - success + * 0 - failure + */ +static int +zfs_mount(struct grub_zfs_data *data) +{ + char *stack; + int label = 0; + uberblock_phys_t *ub_array, *ubbest = NULL; + vdev_boot_header_t *bh; + objset_phys_t *osp; + char tmp_bootpath[MAXNAMELEN]; + char tmp_devid[MAXNAMELEN]; +#if 0 + /* if it's our first time here, zero the best uberblock out */ + if (data->best_drive == 0 && data->best_part == 0 && find_best_root) + grub_memset(¤t_uberblock, 0, sizeof (uberblock_t)); +#endif + grub_memset (data, 0, sizeof (data)); + data->stackbase = (data->scratch); + stack = data->stackbase; + ub_array = (uberblock_phys_t *)stack; + stack += VDEV_UBERBLOCK_RING; + + bh = (vdev_boot_header_t *)stack; + stack += VDEV_BOOT_HEADER_SIZE; + + osp = (objset_phys_t *)stack; + stack += sizeof (objset_phys_t); + + /* XXX add back labels support? */ + for (label = 0; ubbest == NULL && label < (VDEV_LABELS/2); label++) { + grub_uint64_t sector = (label * sizeof (vdev_label_t) + + VDEV_SKIP_SIZE) >> SPA_MINBLOCKSHIFT; + grub_dprintf ("zfs", "label %d\n", label); + if (grub_disk_read(data->disk, sector, 0, VDEV_BOOT_HEADER_SIZE, + (char *)bh)) + continue; + if ((bh->vb_magic != VDEV_BOOT_MAGIC) || + (bh->vb_version != VDEV_BOOT_VERSION)) { + continue; + } + sector += (VDEV_BOOT_HEADER_SIZE + + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT; + + /* Read in the uberblock ring (128K). */ + if (grub_disk_read(data->disk, sector, 0, VDEV_UBERBLOCK_RING, + (char *)ub_array)) + continue; + grub_dprintf ("zfs", "label ok %d\n", label); + + if ((ubbest = find_bestub(ub_array, label)) != NULL && + zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack, data) + == 0) { + grub_dprintf ("zfs", "ubbest %p\n", ubbest); + + /* VERIFY_OS_TYPE(osp, DMU_OST_META);*/ + + if (check_pool_label(label, stack, tmp_devid, + tmp_bootpath, data)) + return (0); +#if 0 + if (find_best_root && + vdev_uberblock_compare(&ubbest->ubp_uberblock, + &(current_uberblock)) <= 0) + continue; +#endif + /* Got the MOS. Save it at the memory addr MOS. */ + grub_memmove(&(data->mos), &osp->os_meta_dnode, + DNODE_SIZE); + grub_memmove(&(data->current_uberblock), + &ubbest->ubp_uberblock, sizeof (uberblock_t)); + grub_memmove(data->current_bootpath, tmp_bootpath, + MAXNAMELEN); + grub_memmove(data->current_devid, tmp_devid, + grub_strlen(tmp_devid)); + data->is_zfs_mount = 1; + return (1); + } + } + + return (0); +} + +/* + * zfs_open() locates a file in the rootpool by following the + * MOS and places the dnode of the file in the memory address DNODE. + * + * Return: + * 1 - success + * 0 - failure + */ +static grub_err_t +zfs_open(char *filename, struct grub_zfs_data *data) +{ + char *stack; + char *fsname; + dnode_phys_t *mdn; + + data->file_buf = NULL; + data->stackbase = data->scratch; + stack = data->stackbase; + + mdn = (dnode_phys_t *) grub_malloc (sizeof (dnode_phys_t)); + + data->dnode_mdn = NULL; + data->dnode_buf = (dnode_phys_t *)stack; + stack += 1<mos), + fsname, NULL, + mdn, stack, data))) + { + grub_free (mdn); + return grub_errno; + } + + data->current_bootfs_obj = 0; + + if (dnode_get_path(mdn, filename, &(data->dnode), stack, data)) { + grub_free (mdn); + return grub_error (GRUB_ERR_FILE_NOT_FOUND, "couldn't find %s", + filename); + } + + /* get the file size and set the file position to 0 */ + data->filemax = ((znode_phys_t *)DN_BONUS(&(data->dnode)))->zp_size; + data->filepos = 0; + + data->dnode_buf = NULL; + grub_free (mdn); + return GRUB_ERR_NONE; +} + +/* + * zfs_read reads in the data blocks pointed by the DNODE. + * + * Return: + * len - the length successfully read in to the buffer + * 0 - failure + */ +static int +zfs_read(char *buf, int len, struct grub_zfs_data *data) +{ + char *stack; + char *tmpbuf; + int blksz, length, movesize; + + if (data->file_buf == NULL) { + data->file_buf = data->stackbase; + data->stackbase += SPA_MAXBLOCKSIZE; + data->file_start = data->file_end = 0; + } + stack = data->stackbase; + + /* + * If offset is in memory, move it into the buffer provided and return. + */ + if (data->filepos >= data->file_start + && data->filepos+len <= data->file_end) { + grub_memmove(buf, data->file_buf + data->filepos - data->file_start, + len); + data->filepos += len; + return (len); + } + + blksz = data->dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; + + /* + * Entire Dnode is too big to fit into the space available. We + * will need to read it in chunks. This could be optimized to + * read in as large a chunk as there is space available, but for + * now, this only reads in one data block at a time. + */ + length = len; + while (length) { + /* + * Find requested blkid and the offset within that block. + */ + grub_uint64_t blkid = data->filepos / blksz; + + if ((grub_errno = dmu_read(&(data->dnode), blkid, + data->file_buf, stack, data))) + return (0); + + data->file_start = blkid * blksz; + data->file_end = data->file_start + blksz; + + movesize = MIN(length, data->file_end - data->filepos); + + grub_memmove(buf, data->file_buf + data->filepos + - data->file_start, movesize); + buf += movesize; + length -= movesize; + data->filepos += movesize; + } + + return (len); +} + +static grub_err_t +grub_zfs_open (struct grub_file *file, const char *name) +{ + struct grub_zfs_data *data; + +#ifndef GRUB_UTIL + grub_dl_ref (my_mod); +#endif + + data = grub_malloc (sizeof (*data)); + + data->disk = file->device->disk; + + if (! zfs_mount (data)) + { + grub_free (data); + return grub_errno; + } + + if (zfs_open (name, data)) + { + grub_free (data); + return grub_errno; + } + file->data = data; + file->offset = 0; + file->size = data->filemax; + return GRUB_ERR_NONE; +} + +/* Read LEN bytes data from FILE into BUF. */ +static grub_ssize_t +grub_zfs_read (grub_file_t file, char *buf, grub_size_t len) +{ + struct grub_zfs_data *data = (struct grub_zfs_data *) file->data; + + data->filepos = file->offset; + + return zfs_read(buf, len, data); +} + + +static grub_err_t +grub_zfs_close (grub_file_t file) +{ + grub_free (file->data); + +#ifndef GRUB_UTIL + grub_dl_unref (my_mod); +#endif + + return GRUB_ERR_NONE; +} + +static grub_err_t +grub_zfs_dir (grub_device_t device, const char *path, + int (*hook) (const char *filename, + const struct grub_dirhook_info *info)) +{ + struct grub_zfs_data *data; + + data = grub_malloc (sizeof (*data)); + + data->disk = device->disk; + + if (! zfs_mount (data)) + { + if (! grub_errno) + grub_errno = GRUB_ERR_BAD_FS; + grub_free (data); + return grub_errno; + } + + grub_free (data); + return 0; +} + +static struct grub_fs grub_zfs_fs = + { + .name = "zfs", + .dir = grub_zfs_dir, + .open = grub_zfs_open, + .read = grub_zfs_read, + .close = grub_zfs_close, + .label = 0, + .uuid = 0, + .mtime = 0, + .next = 0 + }; + +GRUB_MOD_INIT(zfs) +{ + grub_fs_register (&grub_zfs_fs); +#ifndef GRUB_UTIL + my_mod = mod; +#endif +} + +GRUB_MOD_FINI(zfs) +{ + grub_fs_unregister (&grub_zfs_fs); +} diff --git a/fs/zfs_fletcher.c b/fs/zfs_fletcher.c new file mode 100644 index 0000000..e062cf7 --- /dev/null +++ b/fs/zfs_fletcher.c @@ -0,0 +1,111 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void +fletcher_2_native(const void *buf, grub_uint64_t size, zio_cksum_t *zcp) +{ + const grub_uint64_t *ip = buf; + const grub_uint64_t *ipend = ip + (size / sizeof (grub_uint64_t)); + grub_uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += ip[0]; + a1 += ip[1]; + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_2_byteswap(const void *buf, grub_uint64_t size, zio_cksum_t *zcp) +{ + const grub_uint64_t *ip = buf; + const grub_uint64_t *ipend = ip + (size / sizeof (grub_uint64_t)); + grub_uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += BSWAP_64(ip[0]); + a1 += BSWAP_64(ip[1]); + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_4_native(const void *buf, grub_uint64_t size, zio_cksum_t *zcp) +{ + const grub_uint32_t *ip = buf; + const grub_uint32_t *ipend = ip + (size / sizeof (grub_uint32_t)); + grub_uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_byteswap(const void *buf, grub_uint64_t size, zio_cksum_t *zcp) +{ + const grub_uint32_t *ip = buf; + const grub_uint32_t *ipend = ip + (size / sizeof (grub_uint32_t)); + grub_uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} diff --git a/fs/zfs_lzjb.c b/fs/zfs_lzjb.c new file mode 100644 index 0000000..7aaa941 --- /dev/null +++ b/fs/zfs_lzjb.c @@ -0,0 +1,87 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MATCH_BITS 6 +#define MATCH_MIN 3 +#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) + +/* + * Decompression Entry - lzjb + */ +#ifndef NBBY +#define NBBY 8 +#endif + + +/*ARGSUSED*/ +int +lzjb_decompress(void *s_start, void *d_start, grub_size_t s_len, + grub_size_t d_len) +{ + grub_uint8_t *src = s_start; + grub_uint8_t *dst = d_start; + grub_uint8_t *d_end = (grub_uint8_t *)d_start + d_len; + grub_uint8_t *cpy, copymap; + int copymask = 1 << (NBBY - 1); + + while (dst < d_end) { + if ((copymask <<= 1) == (1 << NBBY)) { + copymask = 1; + copymap = *src++; + } + if (copymap & copymask) { + int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; + int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; + src += 2; + if ((cpy = dst - offset) < (grub_uint8_t *)d_start) + return (-1); + while (--mlen >= 0 && dst < d_end) + *dst++ = *cpy++; + } else { + *dst++ = *src++; + } + } + return (0); +} diff --git a/fs/zfs_sha256.c b/fs/zfs_sha256.c new file mode 100644 index 0000000..909cbc3 --- /dev/null +++ b/fs/zfs_sha256.c @@ -0,0 +1,143 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * SHA-256 checksum, as specified in FIPS 180-2, available at: + * http://csrc.nist.gov/cryptval + * + * This is a very compact implementation of SHA-256. + * It is designed to be simple and portable, not to be fast. + */ + +/* + * The literal definitions according to FIPS180-2 would be: + * + * Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) + * Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) + * + * We use logical equivalents which require one less op. + */ +#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y)))) +#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s))) +#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22)) +#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25)) +#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3)) +#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10)) + +static const grub_uint32_t SHA256_K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +static void +SHA256Transform(grub_uint32_t *H, const grub_uint8_t *cp) +{ + grub_uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64]; + + for (t = 0; t < 16; t++, cp += 4) + W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3]; + + for (t = 16; t < 64; t++) + W[t] = sigma1(W[t - 2]) + W[t - 7] + + sigma0(W[t - 15]) + W[t - 16]; + + a = H[0]; b = H[1]; c = H[2]; d = H[3]; + e = H[4]; f = H[5]; g = H[6]; h = H[7]; + + for (t = 0; t < 64; t++) { + T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t]; + T2 = SIGMA0(a) + Maj(a, b, c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + H[0] += a; H[1] += b; H[2] += c; H[3] += d; + H[4] += e; H[5] += f; H[6] += g; H[7] += h; +} + +void +zio_checksum_SHA256(const void *buf, grub_uint64_t size, zio_cksum_t *zcp) +{ + grub_uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; + grub_uint8_t pad[128]; + int padsize = size & 63; + int i; + + for (i = 0; i < size - padsize; i += 64) + SHA256Transform(H, (grub_uint8_t *)buf + i); + + for (i = 0; i < padsize; i++) + pad[i] = ((grub_uint8_t *)buf)[i]; + + for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++) + pad[padsize] = 0; + + for (i = 0; i < 8; i++) + pad[padsize++] = (size << 3) >> (56 - 8 * i); + + for (i = 0; i < padsize; i += 64) + SHA256Transform(H, pad + i); + + ZIO_SET_CHECKSUM(zcp, + (grub_uint64_t)H[0] << 32 | H[1], + (grub_uint64_t)H[2] << 32 | H[3], + (grub_uint64_t)H[4] << 32 | H[5], + (grub_uint64_t)H[6] << 32 | H[7]); +} diff --git a/include/grub/zfs/dmu.h b/include/grub/zfs/dmu.h new file mode 100644 index 0000000..ba51e63 --- /dev/null +++ b/include/grub/zfs/dmu.h @@ -0,0 +1,105 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DMU_H +#define _SYS_DMU_H + +/* + * This file describes the interface that the DMU provides for its + * consumers. + * + * The DMU also interacts with the SPA. That interface is described in + * dmu_spa.h. + */ +typedef enum dmu_object_type { + DMU_OT_NONE, + /* general: */ + DMU_OT_OBJECT_DIRECTORY, /* ZAP */ + DMU_OT_OBJECT_ARRAY, /* UINT64 */ + DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ + DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ + DMU_OT_BPLIST, /* UINT64 */ + DMU_OT_BPLIST_HDR, /* UINT64 */ + /* spa: */ + DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ + DMU_OT_SPACE_MAP, /* UINT64 */ + /* zil: */ + DMU_OT_INTENT_LOG, /* UINT64 */ + /* dmu: */ + DMU_OT_DNODE, /* DNODE */ + DMU_OT_OBJSET, /* OBJSET */ + /* dsl: */ + DMU_OT_DSL_DIR, /* UINT64 */ + DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */ + DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */ + DMU_OT_DSL_PROPS, /* ZAP */ + DMU_OT_DSL_DATASET, /* UINT64 */ + /* zpl: */ + DMU_OT_ZNODE, /* ZNODE */ + DMU_OT_ACL, /* ACL */ + DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ + DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ + DMU_OT_MASTER_NODE, /* ZAP */ + DMU_OT_UNLINKED_SET, /* ZAP */ + /* zvol: */ + DMU_OT_ZVOL, /* UINT8 */ + DMU_OT_ZVOL_PROP, /* ZAP */ + /* other; for testing only! */ + DMU_OT_PLAIN_OTHER, /* UINT8 */ + DMU_OT_UINT64_OTHER, /* UINT64 */ + DMU_OT_ZAP_OTHER, /* ZAP */ + /* new object types: */ + DMU_OT_ERROR_LOG, /* ZAP */ + DMU_OT_SPA_HISTORY, /* UINT8 */ + DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ + DMU_OT_POOL_PROPS, /* ZAP */ + + DMU_OT_NUMTYPES +} dmu_object_type_t; + +typedef enum dmu_objset_type { + DMU_OST_NONE, + DMU_OST_META, + DMU_OST_ZFS, + DMU_OST_ZVOL, + DMU_OST_OTHER, /* For testing only! */ + DMU_OST_ANY, /* Be careful! */ + DMU_OST_NUMTYPES +} dmu_objset_type_t; + +/* + * The names of zap entries in the DIRECTORY_OBJECT of the MOS. + */ +#define DMU_POOL_DIRECTORY_OBJECT 1 +#define DMU_POOL_CONFIG "config" +#define DMU_POOL_ROOT_DATASET "root_dataset" +#define DMU_POOL_SYNC_BPLIST "sync_bplist" +#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" +#define DMU_POOL_ERRLOG_LAST "errlog_last" +#define DMU_POOL_SPARES "spares" +#define DMU_POOL_DEFLATE "deflate" +#define DMU_POOL_HISTORY "history" +#define DMU_POOL_PROPS "pool_props" +#define DMU_POOL_L2CACHE "l2cache" + +#endif /* _SYS_DMU_H */ diff --git a/include/grub/zfs/dmu_objset.h b/include/grub/zfs/dmu_objset.h new file mode 100644 index 0000000..3261e18 --- /dev/null +++ b/include/grub/zfs/dmu_objset.h @@ -0,0 +1,37 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DMU_OBJSET_H +#define _SYS_DMU_OBJSET_H + +#include + +typedef struct objset_phys { + dnode_phys_t os_meta_dnode; + zil_header_t os_zil_header; + grub_uint64_t os_type; + char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) - + sizeof (grub_uint64_t)]; +} objset_phys_t; + +#endif /* _SYS_DMU_OBJSET_H */ diff --git a/include/grub/zfs/dnode.h b/include/grub/zfs/dnode.h new file mode 100644 index 0000000..07666ca --- /dev/null +++ b/include/grub/zfs/dnode.h @@ -0,0 +1,78 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DNODE_H +#define _SYS_DNODE_H + +#include + +/* + * Fixed constants. + */ +#define DNODE_SHIFT 9 /* 512 bytes */ +#define DN_MIN_INDBLKSHIFT 10 /* 1k */ +#define DN_MAX_INDBLKSHIFT 14 /* 16k */ +#define DNODE_BLOCK_SHIFT 14 /* 16k */ +#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ +#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */ +#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */ + +/* + * Derived constants. + */ +#define DNODE_SIZE (1 << DNODE_SHIFT) +#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT) +#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT)) +#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) + +#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT) +#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) +#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT) + +#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ + (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) + +typedef struct dnode_phys { + grub_uint8_t dn_type; /* dmu_object_type_t */ + grub_uint8_t dn_indblkshift; /* ln2(indirect block size) */ + grub_uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */ + grub_uint8_t dn_nblkptr; /* length of dn_blkptr */ + grub_uint8_t dn_bonustype; /* type of data in bonus buffer */ + grub_uint8_t dn_checksum; /* ZIO_CHECKSUM type */ + grub_uint8_t dn_compress; /* ZIO_COMPRESS type */ + grub_uint8_t dn_flags; /* DNODE_FLAG_* */ + grub_uint16_t dn_datablkszsec; /* data block size in 512b sectors */ + grub_uint16_t dn_bonuslen; /* length of dn_bonus */ + grub_uint8_t dn_pad2[4]; + + /* accounting is protected by dn_dirty_mtx */ + grub_uint64_t dn_maxblkid; /* largest allocated block ID */ + grub_uint64_t dn_used; /* bytes (or sectors) of disk space */ + + grub_uint64_t dn_pad3[4]; + + blkptr_t dn_blkptr[1]; + grub_uint8_t dn_bonus[DN_MAX_BONUSLEN]; +} dnode_phys_t; + +#endif /* _SYS_DNODE_H */ diff --git a/include/grub/zfs/dsl_dataset.h b/include/grub/zfs/dsl_dataset.h new file mode 100644 index 0000000..de0dc54 --- /dev/null +++ b/include/grub/zfs/dsl_dataset.h @@ -0,0 +1,53 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DSL_DATASET_H +#define _SYS_DSL_DATASET_H + +typedef struct dsl_dataset_phys { + grub_uint64_t ds_dir_obj; + grub_uint64_t ds_prev_snap_obj; + grub_uint64_t ds_prev_snap_txg; + grub_uint64_t ds_next_snap_obj; + grub_uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */ + grub_uint64_t ds_num_children; /* clone/snap children; ==0 for head */ + grub_uint64_t ds_creation_time; /* seconds since 1970 */ + grub_uint64_t ds_creation_txg; + grub_uint64_t ds_deadlist_obj; + grub_uint64_t ds_used_bytes; + grub_uint64_t ds_compressed_bytes; + grub_uint64_t ds_uncompressed_bytes; + grub_uint64_t ds_unique_bytes; /* only relevant to snapshots */ + /* + * The ds_fsid_guid is a 56-bit ID that can change to avoid + * collisions. The ds_guid is a 64-bit ID that will never + * change, so there is a small probability that it will collide. + */ + grub_uint64_t ds_fsid_guid; + grub_uint64_t ds_guid; + grub_uint64_t ds_flags; + blkptr_t ds_bp; + grub_uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */ +} dsl_dataset_phys_t; + +#endif /* _SYS_DSL_DATASET_H */ diff --git a/include/grub/zfs/dsl_dir.h b/include/grub/zfs/dsl_dir.h new file mode 100644 index 0000000..717f359 --- /dev/null +++ b/include/grub/zfs/dsl_dir.h @@ -0,0 +1,49 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DSL_DIR_H +#define _SYS_DSL_DIR_H + +typedef struct dsl_dir_phys { + grub_uint64_t dd_creation_time; /* not actually used */ + grub_uint64_t dd_head_dataset_obj; + grub_uint64_t dd_parent_obj; + grub_uint64_t dd_clone_parent_obj; + grub_uint64_t dd_child_dir_zapobj; + /* + * how much space our children are accounting for; for leaf + * datasets, == physical space used by fs + snaps + */ + grub_uint64_t dd_used_bytes; + grub_uint64_t dd_compressed_bytes; + grub_uint64_t dd_uncompressed_bytes; + /* Administrative quota setting */ + grub_uint64_t dd_quota; + /* Administrative reservation setting */ + grub_uint64_t dd_reserved; + grub_uint64_t dd_props_zapobj; + grub_uint64_t dd_deleg_zapobj; /* dataset permissions */ + grub_uint64_t dd_pad[20]; /* pad out to 256 bytes for good measure */ +} dsl_dir_phys_t; + +#endif /* _SYS_DSL_DIR_H */ diff --git a/include/grub/zfs/spa.h b/include/grub/zfs/spa.h new file mode 100644 index 0000000..54de8f0 --- /dev/null +++ b/include/grub/zfs/spa.h @@ -0,0 +1,283 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SPA_H +#define _SYS_SPA_H + +/* + * General-purpose 32-bit and 64-bit bitfield encodings. + */ +#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len)) +#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len)) +#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low)) +#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low)) + +#define BF32_GET(x, low, len) BF32_DECODE(x, low, len) +#define BF64_GET(x, low, len) BF64_DECODE(x, low, len) + +#define BF32_SET(x, low, len, val) \ + ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) +#define BF64_SET(x, low, len, val) \ + ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) + +#define BF32_GET_SB(x, low, len, shift, bias) \ + ((BF32_GET(x, low, len) + (bias)) << (shift)) +#define BF64_GET_SB(x, low, len, shift, bias) \ + ((BF64_GET(x, low, len) + (bias)) << (shift)) + +#define BF32_SET_SB(x, low, len, shift, bias, val) \ + BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) +#define BF64_SET_SB(x, low, len, shift, bias, val) \ + BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) + +/* + * We currently support nine block sizes, from 512 bytes to 128K. + * We could go higher, but the benefits are near-zero and the cost + * of COWing a giant block to modify one byte would become excessive. + */ +#define SPA_MINBLOCKSHIFT 9 +#define SPA_MAXBLOCKSHIFT 17 +#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) +#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) + +#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) + +/* + * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. + * The ASIZE encoding should be at least 64 times larger (6 more bits) + * to support up to 4-way RAID-Z mirror mode with worst-case gang block + * overhead, three DVAs per bp, plus one more bit in case we do anything + * else that expands the ASIZE. + */ +#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */ +#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */ +#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */ + +/* + * All SPA data is represented by 128-bit data virtual addresses (DVAs). + * The members of the dva_t should be considered opaque outside the SPA. + */ +typedef struct dva { + grub_uint64_t dva_word[2]; +} dva_t; + +/* + * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. + */ +typedef struct zio_cksum { + grub_uint64_t zc_word[4]; +} zio_cksum_t; + +/* + * Each block is described by its DVAs, time of birth, checksum, etc. + * The word-by-word, bit-by-bit layout of the blkptr is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | vdev3 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 |G| offset3 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | checksum[2] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | checksum[3] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * vdev virtual device ID + * offset offset into virtual device + * LSIZE logical size + * PSIZE physical size (after compression) + * ASIZE allocated size (including RAID-Z parity and gang block headers) + * GRID RAID-Z layout information (reserved for future use) + * cksum checksum function + * comp compression function + * G gang block indicator + * E endianness + * type DMU object type + * lvl level of indirection + * birth txg transaction group in which the block was born + * fill count number of non-zero blocks under this bp + * checksum[4] 256-bit checksum of the data this bp describes + */ +typedef struct blkptr { + dva_t blk_dva[3]; /* 128-bit Data Virtual Address */ + grub_uint64_t blk_prop; /* size, compression, type, etc */ + grub_uint64_t blk_pad[3]; /* Extra space for the future */ + grub_uint64_t blk_birth; /* transaction group at birth */ + grub_uint64_t blk_fill; /* fill count */ + zio_cksum_t blk_cksum; /* 256-bit checksum */ +} blkptr_t; + +#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ +#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ + +/* + * Macros to get and set fields in a bp or DVA. + */ +#define DVA_GET_ASIZE(dva) \ + BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_ASIZE(dva, x) \ + BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8) +#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x) + +#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32) +#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x) + +#define DVA_GET_OFFSET(dva) \ + BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0) +#define DVA_SET_OFFSET(dva, x) \ + BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x) + +#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1) +#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) + +#define BP_GET_LSIZE(bp) \ + (BP_IS_HOLE(bp) ? 0 : \ + BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)) +#define BP_SET_LSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_PSIZE(bp) \ + BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) +#define BP_SET_PSIZE(bp, x) \ + BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) + +#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) +#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) + +#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) +#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) + +#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) +#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) + +#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) +#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) + +#define BP_GET_ASIZE(bp) \ + (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ + DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_GET_UCSIZE(bp) \ + ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ + BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); + +#define BP_GET_NDVAS(bp) \ + (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ + !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ + !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_COUNT_GANG(bp) \ + (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ + DVA_GET_GANG(&(bp)->blk_dva[1]) + \ + DVA_GET_GANG(&(bp)->blk_dva[2])) + +#define DVA_EQUAL(dva1, dva2) \ + ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ + (dva1)->dva_word[0] == (dva2)->dva_word[0]) + +#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ + (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ + ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ + ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ + ((zc1).zc_word[3] - (zc2).zc_word[3]))) + + +#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) + +#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ +{ \ + (zcp)->zc_word[0] = w0; \ + (zcp)->zc_word[1] = w1; \ + (zcp)->zc_word[2] = w2; \ + (zcp)->zc_word[3] = w3; \ +} + +#define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) +#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) +#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) +#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg)) + +#define BP_ZERO(bp) \ +{ \ + (bp)->blk_dva[0].dva_word[0] = 0; \ + (bp)->blk_dva[0].dva_word[1] = 0; \ + (bp)->blk_dva[1].dva_word[0] = 0; \ + (bp)->blk_dva[1].dva_word[1] = 0; \ + (bp)->blk_dva[2].dva_word[0] = 0; \ + (bp)->blk_dva[2].dva_word[1] = 0; \ + (bp)->blk_prop = 0; \ + (bp)->blk_pad[0] = 0; \ + (bp)->blk_pad[1] = 0; \ + (bp)->blk_pad[2] = 0; \ + (bp)->blk_birth = 0; \ + (bp)->blk_fill = 0; \ + ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ +} + +/* + * Note: the byteorder is either 0 or -1, both of which are palindromes. + * This simplifies the endianness handling a bit. + */ +#ifdef _BIG_ENDIAN +#define ZFS_HOST_BYTEORDER (0ULL) +#else +#define ZFS_HOST_BYTEORDER (-1ULL) +#endif + +#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) + +#define BP_SPRINTF_LEN 320 + +#endif /* _SYS_SPA_H */ diff --git a/include/grub/zfs/uberblock_impl.h b/include/grub/zfs/uberblock_impl.h new file mode 100644 index 0000000..3bed5c0 --- /dev/null +++ b/include/grub/zfs/uberblock_impl.h @@ -0,0 +1,66 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_UBERBLOCK_IMPL_H +#define _SYS_UBERBLOCK_IMPL_H + +/* + * The uberblock version is incremented whenever an incompatible on-disk + * format change is made to the SPA, DMU, or ZAP. + * + * Note: the first two fields should never be moved. When a storage pool + * is opened, the uberblock must be read off the disk before the version + * can be checked. If the ub_version field is moved, we may not detect + * version mismatch. If the ub_magic field is moved, applications that + * expect the magic number in the first word won't work. + */ +#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ +#define UBERBLOCK_SHIFT 10 /* up to 1K */ + +typedef struct uberblock { + grub_uint64_t ub_magic; /* UBERBLOCK_MAGIC */ + grub_uint64_t ub_version; /* ZFS_VERSION */ + grub_uint64_t ub_txg; /* txg of last sync */ + grub_uint64_t ub_guid_sum; /* sum of all vdev guids */ + grub_uint64_t ub_timestamp; /* UTC time of last sync */ + blkptr_t ub_rootbp; /* MOS objset_phys_t */ +} uberblock_t; + +#define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) +#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT + +#undef offsetof +#define offsetof(t, m) ((int)&(((t *)0)->m)) +#define VDEV_UBERBLOCK_OFFSET(n) \ +offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT]) + +/* XXX Uberblock_phys_t is no longer in the kernel zfs */ +typedef struct uberblock_phys { + uberblock_t ubp_uberblock; + char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - + sizeof (zio_block_tail_t)]; + zio_block_tail_t ubp_zbt; +} uberblock_phys_t; + + +#endif /* _SYS_UBERBLOCK_IMPL_H */ diff --git a/include/grub/zfs/vdev_impl.h b/include/grub/zfs/vdev_impl.h new file mode 100644 index 0000000..d9290d8 --- /dev/null +++ b/include/grub/zfs/vdev_impl.h @@ -0,0 +1,70 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_VDEV_IMPL_H +#define _SYS_VDEV_IMPL_H + +#define VDEV_SKIP_SIZE (8 << 10) +#define VDEV_BOOT_HEADER_SIZE (8 << 10) +#define VDEV_PHYS_SIZE (112 << 10) +#define VDEV_UBERBLOCK_RING (128 << 10) + +/* ZFS boot block */ +#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL +#define VDEV_BOOT_VERSION 1 /* version number */ + +typedef struct vdev_boot_header { + grub_uint64_t vb_magic; /* VDEV_BOOT_MAGIC */ + grub_uint64_t vb_version; /* VDEV_BOOT_VERSION */ + grub_uint64_t vb_offset; /* start offset (bytes) */ + grub_uint64_t vb_size; /* size (bytes) */ + char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (grub_uint64_t)]; +} vdev_boot_header_t; + +typedef struct vdev_phys { + char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)]; + zio_block_tail_t vp_zbt; +} vdev_phys_t; + +typedef struct vdev_label { + char vl_pad[VDEV_SKIP_SIZE]; /* 8K */ + vdev_boot_header_t vl_boot_header; /* 8K */ + vdev_phys_t vl_vdev_phys; /* 112K */ + char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ +} vdev_label_t; /* 256K total */ + +/* + * Size and offset of embedded boot loader region on each label. + * The total size of the first two labels plus the boot area is 4MB. + */ +#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t)) +#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */ + +/* + * Size of label regions at the start and end of each leaf device. + */ +#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE) +#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t)) +#define VDEV_LABELS 4 + +#endif /* _SYS_VDEV_IMPL_H */ diff --git a/include/grub/zfs/zap_impl.h b/include/grub/zfs/zap_impl.h new file mode 100644 index 0000000..2ad2933 --- /dev/null +++ b/include/grub/zfs/zap_impl.h @@ -0,0 +1,110 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZAP_IMPL_H +#define _SYS_ZAP_IMPL_H + +#define ZAP_MAGIC 0x2F52AB2ABULL + +#define ZAP_HASHBITS 28 +#define MZAP_ENT_LEN 64 +#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) +#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT +#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT) + +typedef struct mzap_ent_phys { + grub_uint64_t mze_value; + grub_uint32_t mze_cd; + grub_uint16_t mze_pad; /* in case we want to chain them someday */ + char mze_name[MZAP_NAME_LEN]; +} mzap_ent_phys_t; + +typedef struct mzap_phys { + grub_uint64_t mz_block_type; /* ZBT_MICRO */ + grub_uint64_t mz_salt; + grub_uint64_t mz_pad[6]; + mzap_ent_phys_t mz_chunk[1]; + /* actually variable size depending on block size */ +} mzap_phys_t; + +/* + * The (fat) zap is stored in one object. It is an array of + * 1<= 6] [zap_leaf_t] [ptrtbl] ... + * + */ + +#define ZBT_LEAF ((1ULL << 63) + 0) +#define ZBT_HEADER ((1ULL << 63) + 1) +#define ZBT_MICRO ((1ULL << 63) + 3) +/* any other values are ptrtbl blocks */ + +/* + * the embedded pointer table takes up half a block: + * block size / entry size (2^3) / 2 + */ +#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1) + +/* + * The embedded pointer table starts half-way through the block. Since + * the pointer table itself is half the block, it starts at (64-bit) + * word number (1<zap_f.zap_phys) \ + [(idx) + (1<> 8)) +#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16)) +#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) +#define P2ROUNDUP(x, align) (-(-(x) & -(align))) + +#endif /* _SYS_FS_ZFS_H */ diff --git a/include/grub/zfs/zfs_acl.h b/include/grub/zfs/zfs_acl.h new file mode 100644 index 0000000..65d3fda --- /dev/null +++ b/include/grub/zfs/zfs_acl.h @@ -0,0 +1,60 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_ACL_H +#define _SYS_FS_ZFS_ACL_H + +#ifndef _UID_T +#define _UID_T +typedef unsigned int uid_t; /* UID type */ +#endif /* _UID_T */ + +typedef struct zfs_oldace { + grub_uint32_t z_fuid; /* "who" */ + grub_uint32_t z_access_mask; /* access mask */ + grub_uint16_t z_flags; /* flags, i.e inheritance */ + grub_uint16_t z_type; /* type of entry allow/deny */ +} zfs_oldace_t; + +#define ACE_SLOT_CNT 6 + +typedef struct zfs_znode_acl_v0 { + grub_uint64_t z_acl_extern_obj; /* ext acl pieces */ + grub_uint32_t z_acl_count; /* Number of ACEs */ + grub_uint16_t z_acl_version; /* acl version */ + grub_uint16_t z_acl_pad; /* pad */ + zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */ +} zfs_znode_acl_v0_t; + +#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT) + +typedef struct zfs_znode_acl { + grub_uint64_t z_acl_extern_obj; /* ext acl pieces */ + grub_uint32_t z_acl_size; /* Number of bytes in ACL */ + grub_uint16_t z_acl_version; /* acl version */ + grub_uint16_t z_acl_count; /* ace count */ + grub_uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */ +} zfs_znode_acl_t; + + +#endif /* _SYS_FS_ZFS_ACL_H */ diff --git a/include/grub/zfs/zfs_znode.h b/include/grub/zfs/zfs_znode.h new file mode 100644 index 0000000..2512ccd --- /dev/null +++ b/include/grub/zfs/zfs_znode.h @@ -0,0 +1,70 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_ZFS_ZNODE_H +#define _SYS_FS_ZFS_ZNODE_H + +#include + +#define MASTER_NODE_OBJ 1 +#define ZFS_ROOT_OBJ "ROOT" +#define ZPL_VERSION_STR "VERSION" + +#define ZPL_VERSION 3ULL + +#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) + +/* + * This is the persistent portion of the znode. It is stored + * in the "bonus buffer" of the file. Short symbolic links + * are also stored in the bonus buffer. + */ +typedef struct znode_phys { + grub_uint64_t zp_atime[2]; /* 0 - last file access time */ + grub_uint64_t zp_mtime[2]; /* 16 - last file modification time */ + grub_uint64_t zp_ctime[2]; /* 32 - last file change time */ + grub_uint64_t zp_crtime[2]; /* 48 - creation time */ + grub_uint64_t zp_gen; /* 64 - generation (txg of creation) */ + grub_uint64_t zp_mode; /* 72 - file mode bits */ + grub_uint64_t zp_size; /* 80 - size of file */ + grub_uint64_t zp_parent; /* 88 - directory parent (`..') */ + grub_uint64_t zp_links; /* 96 - number of links to file */ + grub_uint64_t zp_xattr; /* 104 - DMU object for xattrs */ + grub_uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */ + grub_uint64_t zp_flags; /* 120 - persistent flags */ + grub_uint64_t zp_uid; /* 128 - file owner */ + grub_uint64_t zp_gid; /* 136 - owning group */ + grub_uint64_t zp_pad[4]; /* 144 - future */ + zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */ + /* + * Data may pad out any remaining bytes in the znode buffer, eg: + * + * |<---------------------- dnode_phys (512) ------------------------>| + * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->| + * |<---- znode (264) ---->|<---- data (56) ---->| + * + * At present, we only use this space to store symbolic links. + */ +} znode_phys_t; + +#endif /* _SYS_FS_ZFS_ZNODE_H */ diff --git a/include/grub/zfs/zil.h b/include/grub/zfs/zil.h new file mode 100644 index 0000000..2d927de --- /dev/null +++ b/include/grub/zfs/zil.h @@ -0,0 +1,51 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZIL_H +#define _SYS_ZIL_H + +/* + * Intent log format: + * + * Each objset has its own intent log. The log header (zil_header_t) + * for objset N's intent log is kept in the Nth object of the SPA's + * intent_log objset. The log header points to a chain of log blocks, + * each of which contains log records (i.e., transactions) followed by + * a log block trailer (zil_trailer_t). The format of a log record + * depends on the record (or transaction) type, but all records begin + * with a common structure that defines the type, length, and txg. + */ + +/* + * Intent log header - this on disk structure holds fields to manage + * the log. All fields are 64 bit to easily handle cross architectures. + */ +typedef struct zil_header { + grub_uint64_t zh_claim_txg; /* txg in which log blocks were claimed */ + grub_uint64_t zh_replay_seq; /* highest replayed sequence number */ + blkptr_t zh_log; /* log chain */ + grub_uint64_t zh_claim_seq; /* highest claimed sequence number */ + grub_uint64_t zh_pad[5]; +} zil_header_t; + +#endif /* _SYS_ZIL_H */ diff --git a/include/grub/zfs/zio.h b/include/grub/zfs/zio.h new file mode 100644 index 0000000..a7d663a --- /dev/null +++ b/include/grub/zfs/zio.h @@ -0,0 +1,83 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZIO_H +#define _ZIO_H + +#include + +#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ + +typedef struct zio_block_tail { + grub_uint64_t zbt_magic; /* for validation, endianness */ + zio_cksum_t zbt_cksum; /* 256-bit checksum */ +} zio_block_tail_t; + +/* + * Gang block headers are self-checksumming and contain an array + * of block pointers. + */ +#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE +#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ + sizeof (zio_block_tail_t)) / sizeof (blkptr_t)) +#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ + sizeof (zio_block_tail_t) - \ + (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ + sizeof (grub_uint64_t)) + +#define ZIO_GET_IOSIZE(zio) \ + (BP_IS_GANG((zio)->io_bp) ? \ + SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp)) + +typedef struct zio_gbh { + blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; + grub_uint64_t zg_filler[SPA_GBH_FILLER]; + zio_block_tail_t zg_tail; +} zio_gbh_phys_t; + +enum zio_checksum { + ZIO_CHECKSUM_INHERIT = 0, + ZIO_CHECKSUM_ON, + ZIO_CHECKSUM_OFF, + ZIO_CHECKSUM_LABEL, + ZIO_CHECKSUM_GANG_HEADER, + ZIO_CHECKSUM_ZILOG, + ZIO_CHECKSUM_FLETCHER_2, + ZIO_CHECKSUM_FLETCHER_4, + ZIO_CHECKSUM_SHA256, + ZIO_CHECKSUM_FUNCTIONS +}; + +#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2 +#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON + +enum zio_compress { + ZIO_COMPRESS_INHERIT = 0, + ZIO_COMPRESS_ON, + ZIO_COMPRESS_OFF, + ZIO_COMPRESS_LZJB, + ZIO_COMPRESS_EMPTY, + ZIO_COMPRESS_FUNCTIONS +}; + +#endif /* _ZIO_H */ diff --git a/include/grub/zfs/zio_checksum.h b/include/grub/zfs/zio_checksum.h new file mode 100644 index 0000000..144e4a3 --- /dev/null +++ b/include/grub/zfs/zio_checksum.h @@ -0,0 +1,43 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZIO_CHECKSUM_H +#define _SYS_ZIO_CHECKSUM_H + +/* + * Signature for checksum functions. + */ +typedef void zio_checksum_t(const void *data, grub_uint64_t size, + zio_cksum_t *zcp); + +/* + * Information about each checksum function. + */ +typedef struct zio_checksum_info { + zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */ + int ci_correctable; /* number of correctable bits */ + int ci_zbt; /* uses zio block tail? */ + char *ci_name; /* descriptive name */ +} zio_checksum_info_t; + +#endif /* _SYS_ZIO_CHECKSUM_H */