#include #include #include #include #include using std::tr1::unordered_set; typedef unsigned long int u4; /* unsigned 4-byte type */ typedef unsigned char u1; /* unsigned 1-byte type */ /* The mixing step */ #define mix(a,b,c) \ { \ a=a-b; a=a-c; a=a^(c>>13); \ b=b-c; b=b-a; b=b^(a<<8); \ c=c-a; c=c-b; c=c^(b>>13); \ a=a-b; a=a-c; a=a^(c>>12); \ b=b-c; b=b-a; b=b^(a<<16); \ c=c-a; c=c-b; c=c^(b>>5); \ a=a-b; a=a-c; a=a^(c>>3); \ b=b-c; b=b-a; b=b^(a<<10); \ c=c-a; c=c-b; c=c^(b>>15); \ } /* The whole new hash function */ u4 hash( register u1 *k, u4 length, u4 initval) { register u4 a,b,c; /* the internal state */ u4 len; /* how many key bytes still need mixing */ /* Set up the internal state */ len = length; a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ c = initval; /* variable initialization of internal state */ /*---------------------------------------- handle most of the key */ while (len >= 12) { a=a+(k[0]+((u4)k[1]<<8)+((u4)k[2]<<16) +((u4)k[3]<<24)); b=b+(k[4]+((u4)k[5]<<8)+((u4)k[6]<<16) +((u4)k[7]<<24)); c=c+(k[8]+((u4)k[9]<<8)+((u4)k[10]<<16)+((u4)k[11]<<24)); mix(a,b,c); k = k+12; len = len-12; } /*------------------------------------- handle the last 11 bytes */ c = c+length; switch(len) /* all the case statements fall through */ { case 11: c=c+((u4)k[10]<<24); case 10: c=c+((u4)k[9]<<16); case 9 : c=c+((u4)k[8]<<8); /* the first byte of c is reserved for the length */ case 8 : b=b+((u4)k[7]<<24); case 7 : b=b+((u4)k[6]<<16); case 6 : b=b+((u4)k[5]<<8); case 5 : b=b+k[4]; case 4 : a=a+((u4)k[3]<<24); case 3 : a=a+((u4)k[2]<<16); case 2 : a=a+((u4)k[1]<<8); case 1 : a=a+k[0]; /* case 0: nothing left to add */ } mix(a,b,c); /*-------------------------------------------- report the result */ return c; } void gen_random(char *s, const int len) { static unordered_set ht1; static const char alphanum[] = "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; for (int i = 0; i < len; ++i) { s[i] = alphanum[rand() % (sizeof(alphanum) - 1)]; } s[len] = 0; if (ht1.insert(strdup(s)).second == false) return gen_random(s, len); } int main (int argc, char* argv[]) { unordered_set ht1, ht2; char hostname[255]; gethostname(hostname, 254); int len = strlen(hostname); for (int j = 0; j < 1000; j++) { for (int i = 0; i < 16384; i++) { unsigned int res = hash((u1*)hostname, len, i); ht1.insert(res); res = hash((u1*)hostname, len, 0); ht2.insert(res^i); } gen_random(hostname, len); } printf ("%i duplicates\n", (int)(16384*1000 - ht1.size())); printf ("%i duplicates\n", (int)(16384*1000 - ht2.size())); return 0; }