#include #include #include #include #include /* * FNV-1 hash implementation (32-bit aka FNV32_1). * * See http://isthe.com/chongo/tech/comp/fnv/ */ uint32_t fnv32_1(uint8_t * data, size_t data_len) { uint32_t hash; /* Constants, for 32-bit only. */ uint32_t offset_basis = (uint32_t) 2166136261UL; uint32_t FNV_prime = (uint32_t) 16777619UL; hash = offset_basis; for (size_t i = 0; i < data_len; i++) { hash = hash * FNV_prime; /* implictly modulo 2^32 */ hash = hash ^ data[i]; /* implictly only on lower octet. */ } return hash; } /* Compute FNV-1 hash for the given string (without NUL byte). */ uint32_t fnv32_1_str(char *string) { return fnv32_1((uint8_t *) string, strlen(string)); } /* http://en.wikipedia.org/wiki/MurmurHash */ uint32_t Murmur3_32(uint8_t * key, size_t len, uint32_t seed) { // Note: In this version, all integer arithmetic is performed with unsigned 32 bit integers. // In the case of overflow, the result is constrained by the application of modulo 2^{32} arithmetic. const uint32_t c1 = 0xcc9e2d51UL; uint32_t c2 = 0x1b873593UL; uint32_t r1 = 15; uint32_t r2 = 13; uint32_t m = 5; uint32_t n = 0xe6546b64UL; uint32_t hash = seed; size_t i = 0; /* For each four-byte chunk of key */ for (i = 0; i < len / 4; i++) { /* FIXME endianness */ uint32_t k = (key[i + 0] << 0) | (key[i + 1] << 8) | (key[i + 2] << 16) | (key[i + 3] << 24); k = k * c1; k = (k << r1) | (k >> (32 - r1)); k = k * c2; hash = hash ^ k; hash = (hash << r2) | (hash >> (32 - r2)); hash = hash * m + n; } /* With any remaining bytes: */ if (len > i * 4) { size_t remaininglen = len - i * 4; uint32_t remainingbytes = 0; for (size_t j = 0; j < remaininglen; j++) { remainingbytes = remainingbytes << 8; remainingbytes |= key[len - 1 - j]; } // remainingbytes \gets SwapEndianOrderOf(remainingbytesInKey) // Note: Endian swapping is only necessary on big-endian machines. // The purpose is to place the meaningful digits towards the low end of the value, // so that these digits have the greatest potential to affect the low range digits // in the subsequent multiplication. Consider that locating the meaningful digits // in the high range would produce a greater effect upon the high digits of the // multiplication, and notably, that such high digits are likely to be discarded // by the modulo arithmetic under overflow. We don't want that. remainingbytes = remainingbytes * c1; remainingbytes = (remainingbytes << r1) | (remainingbytes >> (32 - r1)); remainingbytes = remainingbytes * c2; hash = hash ^ remainingbytes; } hash = hash ^ len; hash = hash ^ (hash >> 16); hash = hash * 0x85ebca6b; hash = hash ^ (hash >> 13); hash = hash * 0xc2b2ae35; hash = hash ^ (hash >> 16); return hash; } uint32_t Murmur3_32_str(char *string) { return Murmur3_32((uint8_t *) string, strlen(string), 0); } int main(void) { /* Test FNV32_1 */ assert(fnv32_1_str("03SB[") == 0x00000000UL); assert(fnv32_1_str("") == 0x811c9dc5UL); assert(fnv32_1_str("a") == 0x050c5d7eUL); assert(fnv32_1_str("b") == 0x050c5d7dUL); assert(fnv32_1_str("c") == 0x050c5d7cUL); assert(fnv32_1_str("d") == 0x050c5d7bUL); assert(fnv32_1_str("e") == 0x050c5d7aUL); assert(fnv32_1_str("f") == 0x050c5d79UL); assert(fnv32_1_str("fo") == 0x6b772514UL); assert(fnv32_1_str("foo") == 0x408f5e13UL); assert(fnv32_1_str("foob") == 0xb4b1178bUL); assert(fnv32_1_str("fooba") == 0xfdc80fb0UL); assert(fnv32_1_str("foobar") == 0x31f0b262UL); /* Test MurmurHash3 for x86, 32-bit */ /* FIXME */ printf("%x\n", Murmur3_32_str("The quick brown fox jumps over the lazy dog")); /* FIXME: bloom filter... */ }