You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
123 lines
3.8 KiB
C
123 lines
3.8 KiB
C
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/*
|
|
* FNV-1 hash implementation (32-bit aka FNV32_1).
|
|
*
|
|
* See http://isthe.com/chongo/tech/comp/fnv/
|
|
*/
|
|
uint32_t fnv32_1(uint8_t * data, size_t data_len) {
|
|
uint32_t hash;
|
|
|
|
/* Constants, for 32-bit only. */
|
|
uint32_t offset_basis = (uint32_t) 2166136261UL;
|
|
uint32_t FNV_prime = (uint32_t) 16777619UL;
|
|
|
|
hash = offset_basis;
|
|
for (size_t i = 0; i < data_len; i++) {
|
|
hash = hash * FNV_prime; /* implictly modulo 2^32 */
|
|
hash = hash ^ data[i]; /* implictly only on lower octet. */
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
/* Compute FNV-1 hash for the given string (without NUL byte). */
|
|
uint32_t fnv32_1_str(char *string) {
|
|
return fnv32_1((uint8_t *) string, strlen(string));
|
|
}
|
|
|
|
/* http://en.wikipedia.org/wiki/MurmurHash */
|
|
uint32_t Murmur3_32(uint8_t * key, size_t len, uint32_t seed) {
|
|
// Note: In this version, all integer arithmetic is performed with unsigned 32 bit integers.
|
|
// In the case of overflow, the result is constrained by the application of modulo 2^{32} arithmetic.
|
|
const uint32_t c1 = 0xcc9e2d51UL;
|
|
uint32_t c2 = 0x1b873593UL;
|
|
uint32_t r1 = 15;
|
|
uint32_t r2 = 13;
|
|
uint32_t m = 5;
|
|
uint32_t n = 0xe6546b64UL;
|
|
|
|
uint32_t hash = seed;
|
|
|
|
size_t i = 0;
|
|
|
|
/* For each four-byte chunk of key */
|
|
for (i = 0; i < len / 4; i++) {
|
|
/* FIXME endianness */
|
|
uint32_t k = (key[i + 0] << 0) | (key[i + 1] << 8) | (key[i + 2] << 16) | (key[i + 3] << 24);
|
|
|
|
k = k * c1;
|
|
k = (k << r1) | (k >> (32 - r1));
|
|
k = k * c2;
|
|
|
|
hash = hash ^ k;
|
|
hash = (hash << r2) | (hash >> (32 - r2));
|
|
hash = hash * m + n;
|
|
}
|
|
|
|
/* With any remaining bytes: */
|
|
if (len > i * 4) {
|
|
size_t remaininglen = len - i * 4;
|
|
uint32_t remainingbytes = 0;
|
|
for (size_t j = 0; j < remaininglen; j++) {
|
|
remainingbytes = remainingbytes << 8;
|
|
remainingbytes |= key[len - 1 - j];
|
|
}
|
|
|
|
// remainingbytes \gets SwapEndianOrderOf(remainingbytesInKey)
|
|
// Note: Endian swapping is only necessary on big-endian machines.
|
|
// The purpose is to place the meaningful digits towards the low end of the value,
|
|
// so that these digits have the greatest potential to affect the low range digits
|
|
// in the subsequent multiplication. Consider that locating the meaningful digits
|
|
// in the high range would produce a greater effect upon the high digits of the
|
|
// multiplication, and notably, that such high digits are likely to be discarded
|
|
// by the modulo arithmetic under overflow. We don't want that.
|
|
remainingbytes = remainingbytes * c1;
|
|
remainingbytes =
|
|
(remainingbytes << r1) | (remainingbytes >> (32 - r1));
|
|
remainingbytes = remainingbytes * c2;
|
|
|
|
hash = hash ^ remainingbytes;
|
|
}
|
|
|
|
hash = hash ^ len;
|
|
|
|
hash = hash ^ (hash >> 16);
|
|
hash = hash * 0x85ebca6b;
|
|
hash = hash ^ (hash >> 13);
|
|
hash = hash * 0xc2b2ae35;
|
|
hash = hash ^ (hash >> 16);
|
|
|
|
return hash;
|
|
}
|
|
|
|
uint32_t Murmur3_32_str(char *string) {
|
|
return Murmur3_32((uint8_t *) string, strlen(string), 0);
|
|
}
|
|
|
|
int main(void) {
|
|
/* Test FNV32_1 */
|
|
assert(fnv32_1_str("03SB[") == 0x00000000UL);
|
|
assert(fnv32_1_str("") == 0x811c9dc5UL);
|
|
assert(fnv32_1_str("a") == 0x050c5d7eUL);
|
|
assert(fnv32_1_str("b") == 0x050c5d7dUL);
|
|
assert(fnv32_1_str("c") == 0x050c5d7cUL);
|
|
assert(fnv32_1_str("d") == 0x050c5d7bUL);
|
|
assert(fnv32_1_str("e") == 0x050c5d7aUL);
|
|
assert(fnv32_1_str("f") == 0x050c5d79UL);
|
|
assert(fnv32_1_str("fo") == 0x6b772514UL);
|
|
assert(fnv32_1_str("foo") == 0x408f5e13UL);
|
|
assert(fnv32_1_str("foob") == 0xb4b1178bUL);
|
|
assert(fnv32_1_str("fooba") == 0xfdc80fb0UL);
|
|
assert(fnv32_1_str("foobar") == 0x31f0b262UL);
|
|
|
|
/* Test MurmurHash3 for x86, 32-bit */
|
|
/* FIXME */
|
|
printf("%x\n", Murmur3_32_str("The quick brown fox jumps over the lazy dog"));
|
|
|
|
/* FIXME: bloom filter... */
|
|
}
|