gluster-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Gluster-devel] lookup caching


From: Olivier Le Cam
Subject: Re: [Gluster-devel] lookup caching
Date: Sun, 11 Apr 2010 10:42:04 +0200
User-agent: Thunderbird 2.0.0.24 (Macintosh/20100228)

Hi -

Raghavendra G wrote:

you can do that by sending the cached stats (here stat of file, stat of parent directory) through STACK_UNWIND.

STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode, cached_stat, NULL, parent_stat);

you can look into libglusterfs/src/defaults.c for default definitions of each of fop (file operations) and their call backs.

Thank you. I have been able to get a (quick and dirty) stats lookups caching translator working. I still not well understand everything with the GlusterFS internal library, most of the caching job is done by my own code.

Anyway, it is enough at this step to make some benchmarkings with and to see if it is possible to improve performances significantly enough.

My first impression is quite mitigate. I can indeed see some improvements accessing small files: stats caching does its job. But for some reason, io-cache still talks with the servers before delivering a file, even if that file is available in its cache.

I can see three protocol calls:
        - client_open()         (to both servers)
        - client_stat()         (to one server only: load balancing?)
        - client_flush()        (to both servers)

This might be a problem with the implementation of my translator which only override "lookup" calls for now.

My source code is attached (please be soft on it: as said before, it's a quick and dirty hack of the rot-13 translator).

I'd like to get rid of any traffic with the servers when the file is available in the io-cache. That way I could really see if such a translator can be of any interest.

Do you have any idea for achieving this goal?

Thanks and best regards,
--
Olivier
/*
  Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
  This file is part of GlusterFS.

  GlusterFS is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published
  by the Free Software Foundation; either version 3 of the License,
  or (at your option) any later version.

  GlusterFS is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program.  If not, see
  <http://www.gnu.org/licenses/>.
*/

#include <ctype.h>
#include <sys/uio.h>

#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif

#include "glusterfs.h"
#include "xlator.h"
#include "logging.h"
#include <sys/time.h>

#include "rot-13.h"

/*
 * This is a rot13 ``encryption'' xlator. It rot13's data when 
 * writing to disk and rot13's it back when reading it. 
 * This xlator is meant as an example, NOT FOR PRODUCTION
 * USE ;) (hence no error-checking)
 */

mdc_inode_cache_t *
mdc_inode_cache_delete(mdc_private_t *priv, mdc_inode_cache_t *cache)
{
        mdc_inode_cache_t *next = cache->next;

        if (cache->previous)
                cache->previous->next = cache->next;
        if (cache->next)
                cache->next->previous = cache->previous;
        FREE (cache);

        priv->count--;
        return next;
}

int32_t
mdc_inode_cache_set(xlator_t *this, ino_t ino, const struct stat *stbuf, const 
struct stat *postparent)
{
        mdc_private_t           *priv   = (mdc_private_t*) this->private;
        mdc_inode_cache_t       *cache  = priv->inode_cache_head[ino % 
HASH_POS];
        mdc_inode_cache_t       *new    = NULL;

        if (ino == 0 || stbuf == NULL || postparent == NULL)
                return 0;

        if (cache->next) {
                do {
                        cache = cache->next;
                        if (cache->ino == ino) {
                                return 0;       /* already in */
                        }
                } while(cache->next);
        }

        new = CALLOC (sizeof(mdc_inode_cache_t), 1);
        if (new == NULL) {
                return -1;
        }

        new->ino = ino;
        memcpy(&(new->stbuf), stbuf, sizeof(struct stat));
        memcpy(&(new->postparent), postparent, sizeof(struct stat));
        gettimeofday (&(new->tv), NULL);
        new->previous = cache;
        new->next = NULL;

        cache->next = new;
        priv->count++;

        return 0;
}

mdc_inode_cache_t *
mdc_inode_cache_get(xlator_t *this, ino_t ino)
{
        mdc_private_t           *priv = (mdc_private_t*) this->private;
        mdc_inode_cache_t       *cache = priv->inode_cache_head[ino % HASH_POS];
        struct timeval          now = {0,};
        time_t                  timeout = 0;

        if (ino == 0)
                return NULL;

        gettimeofday(&now, NULL);
        timeout = now.tv_sec - priv->cache_timeout;

        while (cache) {
                if (cache->tv.tv_sec < timeout && cache->ino) {
                        cache = mdc_inode_cache_delete (priv, cache);
                        continue;
                }
                if (cache->ino == ino) {
                        return cache;
                }
                cache = cache->next;
        }

        return NULL;
}

int32_t
mdc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                int32_t op_ret, int32_t op_errno, inode_t *inode,
                struct stat *stbuf, dict_t *dict, struct stat *postparent)
{
        // char *path;
        // inode_path(inode, NULL, &path);

        if (inode == NULL)
                goto out;

        if (stbuf && stbuf->st_ino) {
                uint32_t ret;

                ret = mdc_inode_cache_set(this, stbuf->st_ino, stbuf, 
postparent);
                if (ret != 0) {
                         gf_log (this->name, GF_LOG_WARNING, 
                                "Could not cache metadata (ino=%"PRIu64")",
                                inode->ino);
                }
        }

out :
        STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf, 
dict,
                postparent);
}

int32_t
mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
            dict_t *xattr_req)
{
        mdc_inode_cache_t       *cache          = NULL;

        if (loc == NULL || loc->inode == NULL) {
                goto out;
        }

        cache = mdc_inode_cache_get(this, loc->inode->ino);

        if (cache) {
                STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode, 
                        &cache->stbuf, NULL, &cache->postparent);
                return 0;
        }

out :
        STACK_WIND (frame, mdc_lookup_cbk, FIRST_CHILD (this),
                FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
        return 0;
}

int32_t
init (xlator_t *this)
{
        int             i       = 0;
        data_t          *data   = NULL;
        mdc_private_t   *priv   = NULL;

        if (!this->children || this->children->next) {
                gf_log ("mdc-cache", GF_LOG_ERROR, 
                        "FATAL: mdc-cache should have exactly one child");
                return -1;
        }

        if (!this->parents) {
                gf_log (this->name, GF_LOG_WARNING,
                        "dangling volume. check volfile ");
        }
  
        priv = CALLOC (sizeof (mdc_private_t), 1);
        ERR_ABORT (priv);
        LOCK_INIT (&priv->lock);

        for (i = 0; i < HASH_POS; i++) {
                priv->inode_cache_head[i] = CALLOC (sizeof (mdc_inode_cache_t), 
1);
                if (priv->inode_cache_head[i]) {
                        priv->inode_cache_head[i]->ino = 0;
                        priv->inode_cache_head[i]->previous = NULL;
                        priv->inode_cache_head[i]->next = NULL;
                }
        }

        priv->cache_timeout = 1;
        data = dict_get (this->options, "cache-timeout");
        if (data) {
                priv->cache_timeout = data_to_uint32 (data);
                gf_log (this->name, GF_LOG_TRACE,
                        "Using %d seconds to revalidate cache",
                        priv->cache_timeout);
        }

        priv->count = 0;        
        this->private = priv;

        gf_log ("mdc-cache", GF_LOG_WARNING, "metadata caching (mdc-cache) 
xlator loaded");
        return 0;
}

void 
fini (xlator_t *this)
{
        mdc_private_t *priv = this->private;
        
        FREE (priv);
        
        return;
}

struct xlator_fops fops = {
        .lookup       = mdc_lookup
};

struct xlator_mops mops = {
};

struct xlator_cbks cbks = {
};

struct volume_options options[] = {
        { .key  = {"cache-timeout"},
          .type = GF_OPTION_TYPE_INT,
          .min  = 1,
          .max  = 900
        },
        { .key  = {NULL} }
};
/*
   Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
   This file is part of GlusterFS.

   GlusterFS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published
   by the Free Software Foundation; either version 3 of the License,
   or (at your option) any later version.

   GlusterFS is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see
   <http://www.gnu.org/licenses/>.
*/

#ifndef __ROT_13_H__
#define __ROT_13_H__

#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif

#include <sys/uio.h>
#include "call-stub.h"

#define HASH_POS        1699

struct mdc_inode_cache {
        ino_t           ino;
        struct stat     stbuf;
        struct stat     postparent;
        struct timeval  tv;
        struct mdc_inode_cache  *previous;
        struct mdc_inode_cache  *next;
};
typedef struct mdc_inode_cache mdc_inode_cache_t;

struct mdc_private {
        uint32_t        cache_timeout;
        uint32_t        max_entries;
        uint32_t        count;
        struct mdc_inode_cache *inode_cache_head[HASH_POS];
        gf_lock_t       lock;
};
typedef struct mdc_private mdc_private_t;

#endif /* __ROT_13_H__ */

reply via email to

[Prev in Thread] Current Thread [Next in Thread]