qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters


From: Vladimir Sementsov-Ogievskiy
Subject: Re: [Qemu-devel] [PATCH v6 25/42] mirror: Deal with filters
Date: Tue, 3 Sep 2019 08:32:50 +0000

02.09.2019 17:35, Max Reitz wrote:
> On 31.08.19 11:57, Vladimir Sementsov-Ogievskiy wrote:
>> 09.08.2019 19:13, Max Reitz wrote:
>>> This includes some permission limiting (for example, we only need to
>>> take the RESIZE permission for active commits where the base is smaller
>>> than the top).
>>>
>>> Signed-off-by: Max Reitz <address@hidden>
>>> ---
>>>    block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++-----------
>>>    blockdev.c     |  47 +++++++++++++++++---
>>>    2 files changed, 131 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/block/mirror.c b/block/mirror.c
>>> index 54bafdf176..6ddbfb9708 100644
>>> --- a/block/mirror.c
>>> +++ b/block/mirror.c
>>> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
>>>        BlockBackend *target;
>>>        BlockDriverState *mirror_top_bs;
>>>        BlockDriverState *base;
>>> +    BlockDriverState *base_overlay;
>>>    
>>>        /* The name of the graph node to replace */
>>>        char *replaces;
>>> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job)
>>>                                 &error_abort);
>>>        if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
>>>            BlockDriverState *backing = s->is_none_mode ? src : s->base;
>>> -        if (backing_bs(target_bs) != backing) {
>>> -            bdrv_set_backing_hd(target_bs, backing, &local_err);
>>> +        BlockDriverState *unfiltered_target = 
>>> bdrv_skip_rw_filters(target_bs);
>>> +
>>> +        if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
>>> +            bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
>>>                if (local_err) {
>>>                    error_report_err(local_err);
>>>                    ret = -EPERM;
>>> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job)
>>>         * valid.
>>>         */
>>>        block_job_remove_all_bdrv(bjob);
>>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), 
>>> &error_abort);
>>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, 
>>> &error_abort);
>>>    
>>>        /* We just changed the BDS the job BB refers to (with either or both 
>>> of the
>>>         * bdrv_replace_node() calls), so switch the BB back so the cleanup 
>>> does
>>> @@ -812,7 +815,8 @@ static int coroutine_fn 
>>> mirror_dirty_init(MirrorBlockJob *s)
>>>                return 0;
>>>            }
>>>    
>>> -        ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, 
>>> &count);
>>> +        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, 
>>> bytes,
>>> +                                      &count);
>>>            if (ret < 0) {
>>>                return ret;
>>>            }
>>> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error 
>>> **errp)
>>>        } else {
>>>            s->target_cluster_size = BDRV_SECTOR_SIZE;
>>>        }
>>> -    if (backing_filename[0] && !target_bs->backing &&
>>> +    if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
>>>            s->granularity < s->target_cluster_size) {
>>>            s->buf_size = MAX(s->buf_size, s->target_cluster_size);
>>>            s->cow_bitmap = bitmap_new(length);
>>> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp)
>>>        if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
>>>            int ret;
>>>    
>>> -        assert(!target->backing);
>>> -        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
>>> +        assert(!bdrv_backing_chain_next(target));
>>
>> Preexisting, but seems we may crash here, I don't see where it is checked 
>> before, to
>> return error if there is some backing. And even if we do so, we don't 
>> prevent appearing
>> of target backing during mirror operation.
> 
> The idea is that MIRROR_OPEN_BACKING_CHAIN is set only when using
> drive-mirror with mode=existing.  In this case, we also set
> BDRV_O_NO_BACKING for the target.
> 
> You’re right that a user could add a backing chain to the target during
> the operation.  They really have to make an effort to shoot themselves
> in the foot for this because the target must have an auto-generated node
> name.
> 
> I suppose the best would be not to open the backing chain if the target
> node already has a backing child?

Hmm, but we still should generate an error, as we can't do what was requested.

> 
>>> +        ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
>>> +                                     "backing", errp);
>>>            if (ret < 0) {
>>>                return;
>>>            }
>>> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job(
>>>        MirrorBlockJob *s;
>>>        MirrorBDSOpaque *bs_opaque;
>>>        BlockDriverState *mirror_top_bs;
>>> -    bool target_graph_mod;
>>>        bool target_is_backing;
>>> +    uint64_t target_perms, target_shared_perms;
>>>        Error *local_err = NULL;
>>>        int ret;
>>>    
>>> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job(
>>>            buf_size = DEFAULT_MIRROR_BUF_SIZE;
>>>        }
>>>    
>>> -    if (bs == target) {
>>> +    if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
>>>            error_setg(errp, "Can't mirror node into itself");
>>>            return NULL;
>>>        }
>>> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job(
>>>         * In the case of active commit, things look a bit different, though,
>>>         * because the target is an already populated backing file in active 
>>> use.
>>>         * We can allow anything except resize there.*/
>>> +
>>> +    target_perms = BLK_PERM_WRITE;
>>> +    target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
>>> +
>>>        target_is_backing = bdrv_chain_contains(bs, target);
>>> -    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
>>> +    if (target_is_backing) {
>>> +        int64_t bs_size, target_size;
>>
>> <empty after definitions>
> 
> Is that part of any of our guidelines? :-)
> 
> Sure, will add.

Not sure. Someone asked me about it on list in past and I'm used to.

> 
>>> +        bs_size = bdrv_getlength(bs);
>>> +        if (bs_size < 0) {
>>> +            error_setg_errno(errp, -bs_size,
>>> +                             "Could not inquire top image size");
>>> +            goto fail;
>>> +        }
>>> +
>>> +        target_size = bdrv_getlength(target);
>>> +        if (target_size < 0) {
>>> +            error_setg_errno(errp, -target_size,
>>> +                             "Could not inquire base image size");
>>> +            goto fail;
>>> +        }
>>> +
>>> +        if (target_size < bs_size) {
>>> +            target_perms |= BLK_PERM_RESIZE;
>>> +        }
>>> +
>>> +        target_shared_perms |= BLK_PERM_CONSISTENT_READ
>>> +                            |  BLK_PERM_WRITE
>>> +                            |  BLK_PERM_GRAPH_MOD;
>>> +    } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) {
>>> +        /*
>>> +         * We may want to allow this in the future, but it would
>>> +         * require taking some extra care.
>>> +         */
>>> +        error_setg(errp, "Cannot mirror to a filter on top of a node in 
>>> the "
>>> +                   "source's backing chain");
>>> +        goto fail;
>>> +    }
>>> +
>>> +    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
>>> +        target_perms |= BLK_PERM_GRAPH_MOD;
>>> +    }
>>> +
>>>        s->target = blk_new(s->common.job.aio_context,
>>> -                        BLK_PERM_WRITE | BLK_PERM_RESIZE |
>>> -                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
>>> -                        BLK_PERM_WRITE_UNCHANGED |
>>> -                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
>>> -                                             BLK_PERM_WRITE |
>>> -                                             BLK_PERM_GRAPH_MOD : 0));
>>> +                        target_perms, target_shared_perms);
>>>        ret = blk_insert_bs(s->target, target, errp);
>>>        if (ret < 0) {
>>>            goto fail;
>>> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job(
>>>        s->backing_mode = backing_mode;
>>>        s->copy_mode = copy_mode;
>>>        s->base = base;
>>> +    s->base_overlay = bdrv_find_overlay(bs, base);
>>>        s->granularity = granularity;
>>>        s->buf_size = ROUND_UP(buf_size, granularity);
>>>        s->unmap = unmap;
>>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job(
>>>        /* In commit_active_start() all intermediate nodes disappear, so
>>>         * any jobs in them must be blocked */
>>>        if (target_is_backing) {
>>> -        BlockDriverState *iter;
>>> -        for (iter = backing_bs(bs); iter != target; iter = 
>>> backing_bs(iter)) {
>>> -            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
>>> -             * ourselves at s->base (if writes are blocked for a node, 
>>> they are
>>> -             * also blocked for its backing file). The other options would 
>>> be a
>>> -             * second filter driver above s->base (== target). */
>>> +        BlockDriverState *iter, *filtered_target;
>>> +        uint64_t iter_shared_perms;
>>> +
>>> +        /*
>>> +         * The topmost node with
>>> +         * bdrv_skip_rw_filters(filtered_target) == 
>>> bdrv_skip_rw_filters(target)
>>> +         */
>>> +        filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, 
>>> target));
>>> +
>>> +        assert(bdrv_skip_rw_filters(filtered_target) ==
>>> +               bdrv_skip_rw_filters(target));
>>> +
>>> +        /*
>>> +         * XXX BLK_PERM_WRITE needs to be allowed so we don't block
>>> +         * ourselves at s->base (if writes are blocked for a node, they are
>>> +         * also blocked for its backing file). The other options would be a
>>> +         * second filter driver above s->base (== target).
>>> +         */
>>> +        iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
>>> +
>>> +        for (iter = bdrv_filtered_bs(bs); iter != target;
>>> +             iter = bdrv_filtered_bs(iter))
>>> +        {
>>> +            if (iter == filtered_target) {
>>> +                /*
>>> +                 * From here on, all nodes are filters on the base.
>>> +                 * This allows us to share BLK_PERM_CONSISTENT_READ.
>>
>> I'd prefere to add something like: "because we share it on target (see 
>> target BlockBackend creation
>> and corresponding comment above)".
> 
> I’d rather not refer to other comments in case they change…  Maybe just
> “This allows us to share BLK_PERM_CONSISTENT_READ, as we do on the
> target.”?  I think if someone is interested, they will scan the file for
> what permissions are shared on the target anyway.

OK. Yes I just wanted to stress that we just duplicate behavior about target, 
as it helped
me to understand.

> 
>>> +                 */
>>> +                iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
>>> +            }
>>> +
>>>                ret = block_job_add_bdrv(&s->common, "intermediate node", 
>>> iter, 0,
>>> -                                     BLK_PERM_WRITE_UNCHANGED | 
>>> BLK_PERM_WRITE,
>>> -                                     errp);
>>> +                                     iter_shared_perms, errp);
>>>                if (ret < 0) {
>>>                    goto fail;
>>>                }
>>> @@ -1737,7 +1802,7 @@ fail:
>>>        bs_opaque->stop = true;
>>>        bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
>>>                                 &error_abort);
>>> -    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), 
>>> &error_abort);
>>> +    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, 
>>> &error_abort);
>>>    
>>>        bdrv_unref(mirror_top_bs);
>>>    
>>> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, 
>>> BlockDriverState *bs,
>>>            return;
>>>        }
>>>        is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>>> -    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
>>> +    base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : 
>>> NULL;
>>>        mirror_start_job(job_id, bs, creation_flags, target, replaces,
>>>                         speed, granularity, buf_size, backing_mode,
>>>                         on_source_error, on_target_error, unmap, NULL, NULL,
>>> diff --git a/blockdev.c b/blockdev.c
>>> index c540802127..c451f553f7 100644
>>
>>
>> block/mirroc.c is OK for me. Continue with blockdev.c...
>>
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char 
>>> *job_id, BlockDriverState *bs,
>>>            return;
>>>        }
>>>    
>>> -    if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
>>> +    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
>>>            sync = MIRROR_SYNC_MODE_FULL;
>>>        }
>>>    
>>> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char 
>>> *job_id, BlockDriverState *bs,
>>>    
>>>    void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>    {
>>> -    BlockDriverState *bs;
>>> +    BlockDriverState *bs, *unfiltered_bs;
>>>        BlockDriverState *source, *target_bs;
>>>        AioContext *aio_context;
>>>        BlockMirrorBackingMode backing_mode;
>>> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>        int flags;
>>>        int64_t size;
>>>        const char *format = arg->format;
>>> +    const char *replaces_node_name = NULL;
>>>        int ret;
>>>    
>>>        bs = qmp_get_root_bs(arg->device, errp);
>>> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>            return;
>>>        }
>>>    
>>> +    /*
>>> +     * If the user has not instructed us otherwise, we should let the
>>> +     * block job run from @bs (thus taking into account all filters on
>>> +     * it) but replace @unfiltered_bs when it finishes (thus not
>>> +     * removing those filters).
>>> +     * (And if there are any explicit filters, we should assume the
>>> +     *  user knows how to use the @replaces option.)
>>> +     */
>>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
>>> +
>>>        aio_context = bdrv_get_aio_context(bs);
>>>        aio_context_acquire(aio_context);
>>>    
>>> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>        }
>>>    
>>>        flags = bs->open_flags | BDRV_O_RDWR;
>>> -    source = backing_bs(bs);
>>> +    source = bdrv_filtered_cow_bs(unfiltered_bs);
>>>        if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
>>
>>
>> Hmm, you handle this case a bit differently here and in 
>> blockdev_mirror_common..
>> Can we handle it only in blockdev_mirror_common, to be consistent with 
>> qmp_blockdev_mirror?
> 
> What exactly do you mean?  The difference between skipping all filters
> and just skipping implicit filters?  Hm.
> 
> First, the check in blockdev_mirror_common() should actually be
> unnecessary.  In qmp_{blockdev,drive}_mirror(), we do nearly the same
> check anyway (and then force sync=full if there is no backing file).

Hmm, I see it only in qmp_drive_mirror, not in _blockdev_

>  So
> if all three functions did the same check, we wouldn’t need it in
> blockdev_mirror_common().

And if it was so, better have one check in _common than two duplicated.

> 
> Second, let’s look at the difference in an example: One where
> blockdev_mirror_common() would not decide to enforce mode=full, but
> qmp_{blockdev,drive}_mirror() would.
> This happens when @bs is an explicit filter over some overlay with a
> backing file, e.g.:
> 
> throttle --file--> qcow2 --backing--> raw
> 
> It’s correct to run the mirror job from the throttle node; but @source
> should be bdrv_backing_chain_next() so it will point to the raw node.
> Currently, it is NULL (because the throttle node does not have a COW child).
> 
> But then again, I’ve made qmp_{blockdev,drive}_mirror() throw an error
> in such a case:
> 
>>> +        if (bdrv_filtered_bs(unfiltered_bs)) {
>>> +            /* @unfiltered_bs is an explicit filter */
>>> +            error_setg(errp, "Cannot perform sync=top mirror through an "
>>> +                       "explicitly added filter node on the source");
>>> +            goto out;
>>> +        }
> 
> So it isn’t really a problem.  Still, does the error make sense?  Should
> we just allow that case by letting source be
> bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs))?

Looks good. As I understand, you have the test (40/42) for this case and it
works for _blockdev_ and for for _drive_ version of command. Of course, they'd
better behave in same manner.

> 
> (BTW, I just noticed that @base seems to be pretty much unused in
> block/mirror.c.  It only really uses @base_overlay now.  So I suppose it
> makes sense to remove it in v7.)
> 
>>>            arg->sync = MIRROR_SYNC_MODE_FULL;
>>>        }
>>>        if (arg->sync == MIRROR_SYNC_MODE_NONE) {
>>> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>                                 " named node of the graph");
>>>                goto out;
>>>            }
>>> +        replaces_node_name = arg->replaces;
>>> +    } else if (unfiltered_bs != bs) {
>>> +        replaces_node_name = unfiltered_bs->node_name;
>>>        }
>>>    
>>>        if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
>>> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>            bdrv_img_create(arg->target, format,
>>>                            NULL, NULL, NULL, size, flags, false, 
>>> &local_err);
>>>        } else {
>>> +        /* Implicit filters should not appear in the filename */
>>> +        BlockDriverState *explicit_backing = 
>>> bdrv_skip_implicit_filters(source);
>>> +
>>>            switch (arg->mode) {
>>>            case NEW_IMAGE_MODE_EXISTING:
>>>                break;
>>> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>                /* create new image with backing file */
>>>                bdrv_refresh_filename(source);
>>>                bdrv_img_create(arg->target, format,
>>> -                            source->filename,
>>> -                            source->drv->format_name,
>>> +                            explicit_backing->filename,
>>> +                            explicit_backing->drv->format_name,
>>>                                NULL, size, flags, false, &local_err);
>>>                break;
>>>            default:
>>> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
>>>        }
>>>    
>>>        blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, 
>>> target_bs,
>>> -                           arg->has_replaces, arg->replaces, arg->sync,
>>> +                           !!replaces_node_name, replaces_node_name, 
>>> arg->sync,
>>>                               backing_mode, arg->has_speed, arg->speed,
>>>                               arg->has_granularity, arg->granularity,
>>>                               arg->has_buf_size, arg->buf_size,
>>> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char 
>>> *job_id,
>>>                             bool has_auto_dismiss, bool auto_dismiss,
>>>                             Error **errp)
>>>    {
>>> -    BlockDriverState *bs;
>>> +    BlockDriverState *bs, *unfiltered_bs;
>>>        BlockDriverState *target_bs;
>>>        AioContext *aio_context;
>>>        BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
>>> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char 
>>> *job_id,
>>>            return;
>>>        }
>>>    
>>> +    /*
>>> +     * Same as in qmp_drive_mirror():
>>
>> Then, may be better do it in blockdev_mirror_common ?
> 
> Hm, maybe.  Should we decide to let @source be
> bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs)) in qmp_drive_mirror(), I
> don’t think we need @unfiltered_bs there to determine @source.
> 
> Max
> 
>>> We want to run the job from @bs,
>>> +     * but we want to replace @unfiltered_bs on completion.
>>> +     */
>>> +    unfiltered_bs = bdrv_skip_implicit_filters(bs);
>>> +    if (!has_replaces && unfiltered_bs != bs) {
>>> +        replaces = unfiltered_bs->node_name;
>>> +        has_replaces = true;
>>> +    }
>>> +
>>>        target_bs = bdrv_lookup_bs(target, target, errp);
>>>        if (!target_bs) {
>>>            return;
>>>
>>
>>
> 
> 


-- 
Best regards,
Vladimir

reply via email to

[Prev in Thread] Current Thread [Next in Thread]