//----------------------------------------------------------------------------- // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. // Note - The x86 and x64 versions do _not_ produce the same results, as the // algorithms are optimized for their respective platforms. You can still // compile and run any of them on any platform, but your performance with the // non-native version will be less than optimal. /* * This version is taken from https://github.com/PeterScott/murmur3 * and modified to work with CUDA. */ // Including stdint.h is a pain in cupy, so just put the declarations in. // Beware that long int is not 64bit on all platforms! // e.g. Windows requires a 'long long' to get to 64 bit. typedef unsigned char uint8_t; typedef unsigned int uint32_t; typedef signed char int8_t; typedef int int32_t; const unsigned long int test_var = 0; const int size = sizeof(test_var); #if size == 64 typedef unsigned long int uint64_t; typedef long int int64_t; #else typedef unsigned long long uint64_t; typedef long long int64_t; #endif //----------------------------------------------------------------------------- // Platform-specific functions and macros #define FORCE_INLINE __device__ static inline FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r ) { return (x << r) | (x >> (64 - r)); } #define ROTL64(x,y) rotl64(x,y) #define BIG_CONSTANT(x) (x##LLU) //----------------------------------------------------------------------------- // Block read - if your platform needs to do endian-swapping or can only // handle aligned reads, do the conversion here #define getblock(p, i) (p[i]) //----------------------------------------------------------------------------- // Finalization mix - force all bits of a hash block to avalanche __device__ static inline FORCE_INLINE uint32_t fmix32 ( uint32_t h ) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } //---------- __device__ static inline FORCE_INLINE uint64_t fmix64 ( uint64_t k ) { k ^= k >> 33; k *= BIG_CONSTANT(0xff51afd7ed558ccd); k ^= k >> 33; k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); k ^= k >> 33; return k; } //----------------------------------------------------------------------------- __device__ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; const int nblocks = len / 16; int i; uint64_t h1 = seed; uint64_t h2 = seed; uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); //---------- // body const uint64_t * blocks = (const uint64_t *)(data); for(i = 0; i < nblocks; i++) { uint64_t k1 = getblock(blocks,i*2+0); uint64_t k2 = getblock(blocks,i*2+1); k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; } //---------- // tail const uint8_t * tail = (const uint8_t*)(data + nblocks*16); uint64_t k1 = 0; uint64_t k2 = 0; switch(len & 15) { case 15: k2 ^= (uint64_t)(tail[14]) << 48; case 14: k2 ^= (uint64_t)(tail[13]) << 40; case 13: k2 ^= (uint64_t)(tail[12]) << 32; case 12: k2 ^= (uint64_t)(tail[11]) << 24; case 11: k2 ^= (uint64_t)(tail[10]) << 16; case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h2 ^= len; h1 += h2; h2 += h1; h1 = fmix64(h1); h2 = fmix64(h2); h1 += h2; h2 += h1; ((uint64_t*)out)[0] = h1; ((uint64_t*)out)[1] = h2; } //----------------------------------------------------------------------------- // Write a stream of hash values for an input stream. Each output may have up to 128 bits // of entropy. Input size should be specified in bytes. extern "C" __global__ void hash_data(char* dest, const char* src, size_t out_size, size_t in_size, size_t n_items, uint32_t seed) { char entropy[16]; // 128/8=16 int _loop_start = blockIdx.x * blockDim.x + threadIdx.x; int _loop_stride = blockDim.x * gridDim.x; for (int i = _loop_start; i < n_items; i += _loop_stride) { const char* src_i = &src[i*in_size]; char* dest_i = &dest[i*out_size]; MurmurHash3_x64_128(src_i, in_size, seed, entropy); for (int j=0; j < out_size; ++j) dest_i[j] = entropy[j]; } }