add an untested MurmurHash3 implementation
This commit is contained in:
		
							parent
							
								
									db68f3d0ce
								
							
						
					
					
						commit
						882ccc6dab
					
				
					 1 changed files with 76 additions and 0 deletions
				
			
		
							
								
								
									
										76
									
								
								bloom.c
									
										
									
									
									
								
							
							
						
						
									
										76
									
								
								bloom.c
									
										
									
									
									
								
							| 
						 | 
					@ -1,5 +1,6 @@
 | 
				
			||||||
#include <assert.h>
 | 
					#include <assert.h>
 | 
				
			||||||
#include <stdint.h>
 | 
					#include <stdint.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -28,6 +29,75 @@ uint32_t fnv32_1_str(char *string) {
 | 
				
			||||||
  return fnv32_1((uint8_t *) string, strlen(string));
 | 
					  return fnv32_1((uint8_t *) string, strlen(string));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* http://en.wikipedia.org/wiki/MurmurHash */
 | 
				
			||||||
 | 
					uint32_t Murmur3_32(uint8_t * key, size_t len, uint32_t seed) {
 | 
				
			||||||
 | 
					  // Note: In this version, all integer arithmetic is performed with unsigned 32 bit integers.
 | 
				
			||||||
 | 
					  //       In the case of overflow, the result is constrained by the application of modulo 2^{32} arithmetic.
 | 
				
			||||||
 | 
					  const uint32_t c1 = 0xcc9e2d51UL;
 | 
				
			||||||
 | 
					  uint32_t c2 = 0x1b873593UL;
 | 
				
			||||||
 | 
					  uint32_t r1 = 15;
 | 
				
			||||||
 | 
					  uint32_t r2 = 13;
 | 
				
			||||||
 | 
					  uint32_t m = 5;
 | 
				
			||||||
 | 
					  uint32_t n = 0xe6546b64UL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  uint32_t hash = seed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  size_t i = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* For each four-byte chunk of key */
 | 
				
			||||||
 | 
					  for (i = 0; i < len / 4; i++) {
 | 
				
			||||||
 | 
					    /* FIXME endianness */
 | 
				
			||||||
 | 
					    uint32_t k = (key[i + 0] << 0) | (key[i + 1] << 8) | (key[i + 2] << 16) | (key[i + 3] << 24);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    k = k * c1;
 | 
				
			||||||
 | 
					    k = (k << r1) | (k >> (32 - r1));
 | 
				
			||||||
 | 
					    k = k * c2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    hash = hash ^ k;
 | 
				
			||||||
 | 
					    hash = (hash << r2) | (hash >> (32 - r2));
 | 
				
			||||||
 | 
					    hash = hash * m + n;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* With any remaining bytes: */
 | 
				
			||||||
 | 
					  if (len > i * 4) {
 | 
				
			||||||
 | 
					    size_t remaininglen = len - i * 4;
 | 
				
			||||||
 | 
					    uint32_t remainingbytes = 0;
 | 
				
			||||||
 | 
					    for (size_t j = 0; j < remaininglen; j++) {
 | 
				
			||||||
 | 
					      remainingbytes = remainingbytes << 8;
 | 
				
			||||||
 | 
					      remainingbytes |= key[len - 1 - j];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // remainingbytes \gets SwapEndianOrderOf(remainingbytesInKey)
 | 
				
			||||||
 | 
					    // Note: Endian swapping is only necessary on big-endian machines.
 | 
				
			||||||
 | 
					    //       The purpose is to place the meaningful digits towards the low end of the value,
 | 
				
			||||||
 | 
					    //       so that these digits have the greatest potential to affect the low range digits
 | 
				
			||||||
 | 
					    //       in the subsequent multiplication.  Consider that locating the meaningful digits
 | 
				
			||||||
 | 
					    //       in the high range would produce a greater effect upon the high digits of the
 | 
				
			||||||
 | 
					    //       multiplication, and notably, that such high digits are likely to be discarded
 | 
				
			||||||
 | 
					    //       by the modulo arithmetic under overflow.  We don't want that.
 | 
				
			||||||
 | 
					    remainingbytes = remainingbytes * c1;
 | 
				
			||||||
 | 
					    remainingbytes =
 | 
				
			||||||
 | 
					        (remainingbytes << r1) | (remainingbytes >> (32 - r1));
 | 
				
			||||||
 | 
					    remainingbytes = remainingbytes * c2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    hash = hash ^ remainingbytes;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  hash = hash ^ len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  hash = hash ^ (hash >> 16);
 | 
				
			||||||
 | 
					  hash = hash * 0x85ebca6b;
 | 
				
			||||||
 | 
					  hash = hash ^ (hash >> 13);
 | 
				
			||||||
 | 
					  hash = hash * 0xc2b2ae35;
 | 
				
			||||||
 | 
					  hash = hash ^ (hash >> 16);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return hash;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					uint32_t Murmur3_32_str(char *string) {
 | 
				
			||||||
 | 
					  return Murmur3_32((uint8_t *) string, strlen(string), 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main(void) {
 | 
					int main(void) {
 | 
				
			||||||
  /* Test FNV32_1 */
 | 
					  /* Test FNV32_1 */
 | 
				
			||||||
  assert(fnv32_1_str("03SB[") == 0x00000000UL);
 | 
					  assert(fnv32_1_str("03SB[") == 0x00000000UL);
 | 
				
			||||||
| 
						 | 
					@ -43,4 +113,10 @@ int main(void) {
 | 
				
			||||||
  assert(fnv32_1_str("foob") == 0xb4b1178bUL);
 | 
					  assert(fnv32_1_str("foob") == 0xb4b1178bUL);
 | 
				
			||||||
  assert(fnv32_1_str("fooba") == 0xfdc80fb0UL);
 | 
					  assert(fnv32_1_str("fooba") == 0xfdc80fb0UL);
 | 
				
			||||||
  assert(fnv32_1_str("foobar") == 0x31f0b262UL);
 | 
					  assert(fnv32_1_str("foobar") == 0x31f0b262UL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* Test MurmurHash3 for x86, 32-bit */
 | 
				
			||||||
 | 
					  /* FIXME */
 | 
				
			||||||
 | 
					  printf("%x\n", Murmur3_32_str("The quick brown fox jumps over the lazy dog"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* FIXME: bloom filter... */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue