bug-gawk
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: extract hash for stand-alone use


From: david kerns
Subject: Re: extract hash for stand-alone use
Date: Thu, 20 May 2021 07:25:37 -0700

this is a bit off topic for this mailing list... but here:

$ cat ah.c
#include <stdio.h>
#include <string.h>

static unsigned long
awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
{
        unsigned long h = 0;
        unsigned long htmp;

        /*
         * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
         * db package.
         *
         * This is INCREDIBLY ugly, but fast.  We break the string up into
         * 8 byte units.  On the first time through the loop we get the
         * "leftover bytes" (strlen % 8).  On every other iteration, we
         * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
         * saves us 7 cmp & branch instructions.  If this routine is
         * heavily used enough, it's worth the ugly coding.
         */

        /*
         * Even more speed:
         * #define HASHC   h = *s++ + 65599 * h
         * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
         *
         * 4/2011: Force the results to 32 bits, to get the same
         * result on both 32- and 64-bit systems. This may be a
         * bad idea.
         */
#define HASHC   htmp = (h << 6);  \
                h = *s++ + htmp + (htmp << 10) - h ; \
                htmp &= 0xFFFFFFFF; \
                h &= 0xFFFFFFFF

        h = 0;

        /* "Duff's Device" */
        if (len > 0) {
                size_t loop = (len + 8 - 1) >> 3;

                switch (len & (8 - 1)) {
                case 0:
                        do {    /* All fall throughs */
                                HASHC;
                case 7:         HASHC;
                case 6:         HASHC;
                case 5:         HASHC;
                case 4:         HASHC;
                case 3:         HASHC;
                case 2:         HASHC;
                case 1:         HASHC;
                        } while (--loop);
                }
        }

        if (code != NULL)
                *code = h;

        if (h >= hsize)
                h %= hsize;
        return h;
}

int main(int argc, char **argv)
{
  printf("%08x\n", awk_hash(argv[1], strlen(argv[1]), 0xffffffff, NULL));
  return 0;
}
$ make ah
cc     ah.c   -o ah
$ ./ah "this is a test"
62a9aba5

On Thu, May 20, 2021 at 6:37 AM Peng Yu <pengyu.ut@gmail.com> wrote:

> > assuming you mean the hash for associative arrays...
>
> Yes.
>
> > look in str_array.c for awk_hash
>
> I see these functions defined in str_array.c. What functions are
> relevant? (I assume env_* are not relevant.)
>
> Is there any minimal working example on how to use the hash related
> functions?
>
> $ grep str_array tags  | awk -v FS='\t' -e '$4 == "f"'
> awk_hash        str_array.c     /^awk_hash(const char *s, size_t len,
> unsigned
> long hsize, size_t *code)$/;"   f       typeref:typename:unsigned long
> file:
> env_clear       str_array.c     /^env_clear(NODE *symbol, NODE
> *subs)$/;"      f       typeref:typename:NODE **        file:
> env_remove      str_array.c     /^env_remove(NODE *symbol, NODE
> *subs)$/;"      f       typeref:typename:NODE **        file:
> env_store       str_array.c     /^env_store(NODE *symbol, NODE
> *subs)$/;"      f       typeref:typename:NODE **        file:
> fnv1a_hash_string       str_array.c     /^fnv1a_hash_string(const char
> *str,
> size_t len, unsigned long hsize, size_t
> *code)$/;"      f       typeref:typename:unsigned long  file:
> grow_table      str_array.c     /^grow_table(NODE
> *symbol)$/;"    f       typeref:typename:void   file:
> gst_hash_string str_array.c     /^gst_hash_string(const char *str, size_t
> len, unsigned long hsize, size_t
> *code)$/;"      f       typeref:typename:unsigned long  file:
> init_env_array  str_array.c     /^init_env_array(NODE
> *env_node)$/;"  f       typeref:typename:void
> scramble        str_array.c     /^scramble(unsigned long
> x)$/;"  f       typeref:typename:unsigned long  file:
> str_array_init  str_array.c     /^str_array_init(NODE *symbol
> ATTRIBUTE_UNUSED, NODE *subs
> ATTRIBUTE_UNUSED)$/;"   f       typeref:typename:NODE **        file:
> str_clear       str_array.c     /^str_clear(NODE *symbol, NODE *subs
> ATTRIBUTE_UNUSED)$/;"   f       typeref:typename:NODE **        file:
> str_copy        str_array.c     /^str_copy(NODE *symbol, NODE
> *newsymb)$/;"   f       typeref:typename:NODE **        file:
> str_dump        str_array.c     /^str_dump(NODE *symbol, NODE
> *ndump)$/;"     f       typeref:typename:NODE **        file:
> str_exists      str_array.c     /^str_exists(NODE *symbol, NODE
> *subs)$/;"      f       typeref:typename:NODE **        file:
> str_find        str_array.c     /^str_find(NODE *symbol, NODE *s1, size_t
> code1,
> unsigned long hash1)$/;"        f       typeref:typename:NODE **
> file:
> str_kilobytes   str_array.c     /^str_kilobytes(NODE
> *symbol)$/;"    f       typeref:typename:AWKNUM
> str_list        str_array.c     /^str_list(NODE *symbol, NODE
> *t)$/;" f       typeref:typename:NODE **        file:
> str_lookup      str_array.c     /^str_lookup(NODE *symbol, NODE
> *subs)$/;"      f       typeref:typename:NODE **        file:
> str_remove      str_array.c     /^str_remove(NODE *symbol, NODE
> *subs)$/;"      f       typeref:typename:NODE **        file:
>
> --
> Regards,
> Peng
>


reply via email to

[Prev in Thread] Current Thread [Next in Thread]