grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 4/5] zfs com.delphix:embedded_data feature support


From: Toomas Soome
Subject: Re: [PATCH 4/5] zfs com.delphix:embedded_data feature support
Date: Sun, 19 Apr 2015 20:43:01 +0300

> On 19.04.2015, at 18:37, Andrei Borzenkov <address@hidden> wrote:
> 
> В Thu, 16 Apr 2015 08:23:22 +0300
> Toomas Soome <address@hidden> пишет:
> 
>> 
>> ---
>> grub-core/fs/zfs/zfs.c |   84 
>> ++++++++++++++++++++++++++++++++++++++++--------
>> include/grub/zfs/spa.h |   27 +++++++++++++---
>> 2 files changed, 93 insertions(+), 18 deletions(-)
>> 
>> diff --git a/grub-core/fs/zfs/zfs.c b/grub-core/fs/zfs/zfs.c
>> index a731c3d..da44131 100644
>> --- a/grub-core/fs/zfs/zfs.c
>> +++ b/grub-core/fs/zfs/zfs.c
>> @@ -282,6 +282,7 @@ grub_crypto_cipher_handle_t (*grub_zfs_load_key) (const 
>> struct grub_zfs_key *key
>> static const char *spa_feature_names[] = {
>>   "org.illumos:lz4_compress",
>>   "com.delphix:hole_birth",
>> +  "com.delphix:embedded_data",
>>   NULL
>> };
>> 
>> @@ -1803,6 +1804,39 @@ zio_read_data (blkptr_t * bp, grub_zfs_endian_t 
>> endian, void *buf,
>> }
>> 
>> /*
>> + * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
>> + * more than BPE_PAYLOAD_SIZE bytes).
>> + */
>> +static grub_err_t
>> +decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
>> +{
>> +  grub_size_t psize, i;
>> +  grub_uint8_t *buf8 = buf;
>> +  grub_uint64_t w = 0;
>> +  const grub_uint64_t *bp64 = (const grub_uint64_t *)bp;
>> +
>> +  psize = BPE_GET_PSIZE(bp);
>> +
> 
> This needs check that it is not more than BPE_PAYLOAD_SIZE bytes.


in theory yes, in practice the BP is protected by checksum which is verified 
when BP was read, the values should be valid there.


> 
>> +  /*
>> +   * Decode the words of the block pointer into the byte array.
>> +   * Low bits of first word are the first byte (little endian).
>> +   */
>> +  for (i = 0; i < psize; i++)
>> +    {
>> +      if (i % sizeof (w) == 0)
>> +       {
>> +         /* beginning of a word */
>> +         w = *bp64;
>> +         bp64++;
>> +         if (!BPE_IS_PAYLOADWORD(bp, bp64))
>> +         bp64++;
>> +       }
>> +      buf8[i] = BF64_GET(w, (i % sizeof (w)) * 8, 8);
>> +    }
>> +  return GRUB_ERR_NONE;
>> +}
>> +
>> +/*
>>  * Read in a block of data, verify its checksum, decompress if needed,
>>  * and put the uncompressed data in buf.
>>  */
>> @@ -1820,12 +1854,26 @@ zio_read (blkptr_t *bp, grub_zfs_endian_t endian, 
>> void **buf,
>>   *buf = NULL;
>> 
>>   checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 40) & 0xff;
>> -  comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0xff;
>> +  comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0x7f;
>>   encrypted = ((grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 60) & 3);
>> -  lsize = (BP_IS_HOLE(bp) ? 0 :
>> -       (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian) & 0xffff) + 1)
>> -        << SPA_MINBLOCKSHIFT));
>> -  psize = get_psize (bp, endian);
>> +  if (BP_IS_EMBEDDED(bp))
>> +    {
>> +      if (BPE_GET_ETYPE(bp) != BP_EMBEDDED_TYPE_DATA)
>> +    return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET,
>> +                       "unsupported embedded BP (type=%u)\n",
>> +                       BPE_GET_ETYPE(bp));
>> +      lsize = BPE_GET_LSIZE(bp);
>> +      psize = BF64_GET_SB(grub_zfs_to_cpu64 ((bp)->blk_prop, endian), 25, 
>> 7, 0, 1);
>> +    }
>> +  else
>> +    {
>> +      lsize = (BP_IS_HOLE(bp) ? 0 :
>> +           (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian) & 0xffff) + 1)
>> +            << SPA_MINBLOCKSHIFT));
>> +      psize = get_psize (bp, endian);
>> +    }
>> +  grub_dprintf("zfs", "zio_read: E %d: size %" PRIdGRUB_SSIZE "/%"
>> +           PRIdGRUB_SSIZE "\n", (int)BP_IS_EMBEDDED(bp), lsize, psize);
>> 
>>   if (size)
>>     *size = lsize;
>> @@ -1849,23 +1897,31 @@ zio_read (blkptr_t *bp, grub_zfs_endian_t endian, 
>> void **buf,
>>     compbuf = *buf = grub_malloc (lsize);
>> 
> 
> I'll commit NULL check

ok.

> 
>>   grub_dprintf ("zfs", "endian = %d\n", endian);
>> -  err = zio_read_data (bp, endian, compbuf, data);
>> +  if (BP_IS_EMBEDDED(bp))
>> +    err = decode_embedded_bp_compressed(bp, compbuf);
>> +  else
>> +    {
>> +      err = zio_read_data (bp, endian, compbuf, data);
>> +      grub_memset (compbuf, 0, ALIGN_UP (psize, 16) - psize);
>> +    }
> 
> Something is fishy around this place (it is not about your patch but
> existing code as well). It allocates combuf but never checks for error,
> it allocates lsize but reads psize and never really verifies that
> lsize is < than psize .
> 

again, the lsize can not be smaller than psize, as lsize is uncompressed and 
psize is compressed and the values came up from disk verified. well, of course 
it does not hurt to have such checks in code anyhow.

> What do you say about attached patch? Is there any reason it should be
> complicated by allocating different sizes?
> 
> It also sounds like grub_memset should really be
> 
> grub_memset (compbuf + psize, 0, ALIGN_UP (psize, 16) - psize);
> 
> but I'm not sure here.

tbh, i was thinking, it should use grub_zalloc instead, only concern was that 
it may slow things down too much. 


well, if the compression is off, then lsize == psize, so in that sense the 
overhead is possible alignment.  im even not sure where is the ALIGN_UP to 16 
coming from; the only alignment what can happen is to the 1<<vdev_ashift in 
case of raidz (to split block between children dev on write and to collect the 
pieces on read). but again, it shouldn’t harm to have few extra bytes:D even 
zeroing out shouldnt matter there, as checksum is located at the end of the 
psize size memory area - so you get memory block with data+zc and the block 
size is psize. 

the extra patches should be tested with different pool types tho.

rgds,
toomas

> 
>>   if (err)
>>     {
>>       grub_free (compbuf);
>>       *buf = NULL;
>>       return err;
>>     }
>> -  grub_memset (compbuf, 0, ALIGN_UP (psize, 16) - psize);
>> 
>> -  err = zio_checksum_verify (zc, checksum, endian,
>> -                         compbuf, psize);
>> -  if (err)
>> +  if (!BP_IS_EMBEDDED(bp))
>>     {
>> -      grub_dprintf ("zfs", "incorrect checksum\n");
>> -      grub_free (compbuf);
>> -      *buf = NULL;
>> -      return err;
>> +      err = zio_checksum_verify (zc, checksum, endian,
>> +                             compbuf, psize);
>> +      if (err)
>> +        {
>> +          grub_dprintf ("zfs", "incorrect checksum\n");
>> +          grub_free (compbuf);
>> +          *buf = NULL;
>> +          return err;
>> +        }
>>     }
>> 
>>   if (encrypted)
>> diff --git a/include/grub/zfs/spa.h b/include/grub/zfs/spa.h
>> index df43b6b..5d89250 100644
>> --- a/include/grub/zfs/spa.h
>> +++ b/include/grub/zfs/spa.h
>> @@ -126,7 +126,7 @@ typedef struct zio_cksum {
>>  *   +-------+-------+-------+-------+-------+-------+-------+-------+
>>  * 5 |G|                      offset3                                |
>>  *   +-------+-------+-------+-------+-------+-------+-------+-------+
>> - * 6        |BDX|lvl| type  | cksum | comp  |     PSIZE     |     LSIZE     
>> |
>> + * 6        |BDX|lvl| type  | cksum |E| comp|    PSIZE      |     LSIZE     
>> |
>>  *   +-------+-------+-------+-------+-------+-------+-------+-------+
>>  * 7 |                       padding                                 |
>>  *   +-------+-------+-------+-------+-------+-------+-------+-------+
>> @@ -160,7 +160,8 @@ typedef struct zio_cksum {
>>  * G         gang block indicator
>>  * B         byteorder (endianness)
>>  * D         dedup
>> - * X                unused
>> + * X                encryption
>> + * E                blkptr_t contains embedded data
>>  * lvl               level of indirection
>>  * type              DMU object type
>>  * phys birth        txg of block allocation; zero if same as logical birth 
>> txg
>> @@ -203,8 +204,8 @@ typedef struct blkptr {
>> #define      BP_SET_LSIZE(bp, x)     \
>>      BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
>> 
>> -#define     BP_GET_COMPRESS(bp)             BF64_GET((bp)->blk_prop, 32, 8)
>> -#define     BP_SET_COMPRESS(bp, x)          BF64_SET((bp)->blk_prop, 32, 8, 
>> x)
>> +#define     BP_GET_COMPRESS(bp)             BF64_GET((bp)->blk_prop, 32, 7)
>> +#define     BP_SET_COMPRESS(bp, x)          BF64_SET((bp)->blk_prop, 32, 7, 
>> x)
>> 
>> #define      BP_GET_CHECKSUM(bp)             BF64_GET((bp)->blk_prop, 40, 8)
>> #define      BP_SET_CHECKSUM(bp, x)          BF64_SET((bp)->blk_prop, 40, 8, 
>> x)
>> @@ -215,6 +216,8 @@ typedef struct blkptr {
>> #define      BP_GET_LEVEL(bp)                BF64_GET((bp)->blk_prop, 56, 5)
>> #define      BP_SET_LEVEL(bp, x)             BF64_SET((bp)->blk_prop, 56, 5, 
>> x)
>> 
>> +#define     BP_IS_EMBEDDED(bp)              BF64_GET((bp)->blk_prop, 39, 1)
>> +
>> #define      BP_GET_PROP_BIT_61(bp)          BF64_GET((bp)->blk_prop, 61, 1)
>> #define      BP_SET_PROP_BIT_61(bp, x)       BF64_SET((bp)->blk_prop, 61, 1, 
>> x)
>> 
>> @@ -277,6 +280,22 @@ typedef struct blkptr {
>>      (zcp)->zc_word[3] = w3;                 \
>> }
>> 
>> +#define     BPE_GET_ETYPE(bp)       BP_GET_CHECKSUM(bp)
>> +#define     BPE_GET_LSIZE(bp)       \
>> +    BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1)
>> +#define     BPE_GET_PSIZE(bp)       \
>> +    BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)
>> +
>> +typedef enum bp_embedded_type {
>> +  BP_EMBEDDED_TYPE_DATA,
>> +  NUM_BP_EMBEDDED_TYPES
>> +} bp_embedded_type_t;
>> +
>> +#define     BPE_NUM_WORDS   14
>> +#define     BPE_PAYLOAD_SIZE        (BPE_NUM_WORDS * sizeof(grub_uint64_t))
>> +#define     BPE_IS_PAYLOADWORD(bp, wp)      \
>> +    ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
>> +
>> #define      BP_IDENTITY(bp)         (&(bp)->blk_dva[0])
>> #define      BP_IS_GANG(bp)          DVA_GET_GANG(BP_IDENTITY(bp))
>> #define      DVA_IS_EMPTY(dva)       ((dva)->dva_word[0] == 0ULL && \
> 
> <zio_read_lsize_vs_psize.patch>




reply via email to

[Prev in Thread] Current Thread [Next in Thread]