From 5d6d07a006117b91930b6b9b7fb23bc2d7e42fdb Mon Sep 17 00:00:00 2001 From: Oleksandr Melnyk Date: Sat, 11 Jun 2022 17:21:10 +0300 Subject: [PATCH] Inline xxHash64 --- .../__inline__xxHash64.cs | 247 ++++++++++++++++ src/Standart.Hash.xxHash/xxHash64.XXH64.cs | 267 ++++-------------- src/Standart.Hash.xxHash/xxHash64.cs | 13 +- 3 files changed, 315 insertions(+), 212 deletions(-) create mode 100644 src/Standart.Hash.xxHash/__inline__xxHash64.cs diff --git a/src/Standart.Hash.xxHash/__inline__xxHash64.cs b/src/Standart.Hash.xxHash/__inline__xxHash64.cs new file mode 100644 index 0000000..fdbbf38 --- /dev/null +++ b/src/Standart.Hash.xxHash/__inline__xxHash64.cs @@ -0,0 +1,247 @@ +/* +* This is the auto generated code. +* All function calls are inlined in XXH64 +* Please don't try to analyze it. +*/ + +using System.Runtime.CompilerServices; + +namespace Standart.Hash.xxHash; + +public partial class xxHash64 +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe ulong __inline__XXH64(byte* input, int len, ulong seed) + { + ulong h64; + + if (len >= 32) + { + byte* end = input + len; + byte* limit = end - 31; + + ulong v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + ulong v2 = seed + XXH_PRIME64_2; + ulong v3 = seed + 0; + ulong v4 = seed - XXH_PRIME64_1; + + do + { + // XXH64_round + v1 += *((ulong*) input) * XXH_PRIME64_2; + v1 = (v1 << 31) | (v1 >> (64 - 31)); + v1 *= XXH_PRIME64_1; + input += 8; + + // XXH64_round + v2 += *((ulong*) input) * XXH_PRIME64_2; + v2 = (v2 << 31) | (v2 >> (64 - 31)); + v2 *= XXH_PRIME64_1; + input += 8; + + // XXH64_round + v3 += *((ulong*) input) * XXH_PRIME64_2; + v3 = (v3 << 31) | (v3 >> (64 - 31)); + v3 *= XXH_PRIME64_1; + input += 8; + + // XXH64_round + v4 += *((ulong*) input) * XXH_PRIME64_2; + v4 = (v4 << 31) | (v4 >> (64 - 31)); + v4 *= XXH_PRIME64_1; + input += 8; + } while (input < limit); + + h64 = ((v1 << 1) | (v1 >> (64 - 1))) + + ((v2 << 7) | (v2 >> (64 - 7))) + + ((v3 << 12) | (v3 >> (64 - 12))) + + ((v4 << 18) | (v4 >> (64 - 18))); + + // XXH64_mergeRound + v1 *= XXH_PRIME64_2; + v1 = (v1 << 31) | (v1 >> (64 - 31)); + v1 *= XXH_PRIME64_1; + h64 ^= v1; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + // XXH64_mergeRound + v2 *= XXH_PRIME64_2; + v2 = (v2 << 31) | (v2 >> (64 - 31)); + v2 *= XXH_PRIME64_1; + h64 ^= v2; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + // XXH64_mergeRound + v3 *= XXH_PRIME64_2; + v3 = (v3 << 31) | (v3 >> (64 - 31)); + v3 *= XXH_PRIME64_1; + h64 ^= v3; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + // XXH64_mergeRound + v4 *= XXH_PRIME64_2; + v4 = (v4 << 31) | (v4 >> (64 - 31)); + v4 *= XXH_PRIME64_1; + h64 ^= v4; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + } + else + { + h64 = seed + XXH_PRIME64_5; + } + + h64 += (ulong) len; + + // XXH64_finalize + len &= 31; + while (len >= 8) { + ulong k1 = XXH64_round(0, *(ulong*)input); + input += 8; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; + } + if (len >= 4) { + h64 ^= *(uint*)input * XXH_PRIME64_1; + input += 4; + h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; + } + while (len > 0) { + h64 ^= (*input++) * XXH_PRIME64_5; + h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; + --len; + } + + // XXH64_avalanche + h64 ^= h64 >> 33; + h64 *= XXH_PRIME64_2; + h64 ^= h64 >> 29; + h64 *= XXH_PRIME64_3; + h64 ^= h64 >> 32; + + return h64; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void __inline__XXH64_stream_process(byte[] input, int len, ref ulong v1, ref ulong v2, ref ulong v3, + ref ulong v4) + { + fixed (byte* pData = &input[0]) + { + byte* ptr = pData; + byte* limit = ptr + len; + + do + { + // XXH64_round + v1 += *((ulong*) ptr) * XXH_PRIME64_2; + v1 = (v1 << 31) | (v1 >> (64 - 31)); + v1 *= XXH_PRIME64_1; + ptr += 8; + + // XXH64_round + v2 += *((ulong*) ptr) * XXH_PRIME64_2; + v2 = (v2 << 31) | (v2 >> (64 - 31)); + v2 *= XXH_PRIME64_1; + ptr += 8; + + // XXH64_round + v3 += *((ulong*) ptr) * XXH_PRIME64_2; + v3 = (v3 << 31) | (v3 >> (64 - 31)); + v3 *= XXH_PRIME64_1; + ptr += 8; + + // XXH64_round + v4 += *((ulong*) ptr) * XXH_PRIME64_2; + v4 = (v4 << 31) | (v4 >> (64 - 31)); + v4 *= XXH_PRIME64_1; + ptr += 8; + } while (ptr < limit); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe ulong __inline__XXH64_stream_finalize(byte[] input, int len, ref ulong v1, ref ulong v2, ref ulong v3, + ref ulong v4, long length, ulong seed) + { + fixed (byte* pData = &input[0]) + { + byte* ptr = pData; + byte* end = pData + len; + ulong h64; + + if (length >= 32) + { + h64 = ((v1 << 1) | (v1 >> (64 - 1))) + + ((v2 << 7) | (v2 >> (64 - 7))) + + ((v3 << 12) | (v3 >> (64 - 12))) + + ((v4 << 18) | (v4 >> (64 - 18))); + + // XXH64_mergeRound + v1 *= XXH_PRIME64_2; + v1 = (v1 << 31) | (v1 >> (64 - 31)); + v1 *= XXH_PRIME64_1; + h64 ^= v1; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + // XXH64_mergeRound + v2 *= XXH_PRIME64_2; + v2 = (v2 << 31) | (v2 >> (64 - 31)); + v2 *= XXH_PRIME64_1; + h64 ^= v2; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + // XXH64_mergeRound + v3 *= XXH_PRIME64_2; + v3 = (v3 << 31) | (v3 >> (64 - 31)); + v3 *= XXH_PRIME64_1; + h64 ^= v3; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + // XXH64_mergeRound + v4 *= XXH_PRIME64_2; + v4 = (v4 << 31) | (v4 >> (64 - 31)); + v4 *= XXH_PRIME64_1; + h64 ^= v4; + h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + } + else + { + h64 = seed + XXH_PRIME64_5; + } + + h64 += (ulong) length; + + // XXH64_finalize + len &= 31; + while (len >= 8) { + ulong k1 = XXH64_round(0, *(ulong*)ptr); + ptr += 8; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; + } + if (len >= 4) { + h64 ^= *(uint*)ptr * XXH_PRIME64_1; + ptr += 4; + h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; + } + while (len > 0) { + h64 ^= (*ptr++) * XXH_PRIME64_5; + h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; + --len; + } + + // XXH64_avalanche + h64 ^= h64 >> 33; + h64 *= XXH_PRIME64_2; + h64 ^= h64 >> 29; + h64 *= XXH_PRIME64_3; + h64 ^= h64 >> 32; + + return h64; + } + } +} \ No newline at end of file diff --git a/src/Standart.Hash.xxHash/xxHash64.XXH64.cs b/src/Standart.Hash.xxHash/xxHash64.XXH64.cs index 7bc1c8e..5c04f8f 100644 --- a/src/Standart.Hash.xxHash/xxHash64.XXH64.cs +++ b/src/Standart.Hash.xxHash/xxHash64.XXH64.cs @@ -7,15 +7,14 @@ namespace Standart.Hash.xxHash public static partial class xxHash64 { [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe ulong XXH64_internal(byte* input, int len, ulong seed) + private static unsafe ulong XXH64(byte* input, int len, ulong seed) { - byte* end = input + len; ulong h64; if (len >= 32) - { - byte* limit = end - 32; + byte* end = input + len; + byte* limit = end - 31; ulong v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; ulong v2 = seed + XXH_PRIME64_2; @@ -24,64 +23,21 @@ namespace Standart.Hash.xxHash do { - // XXH64_round - v1 += *((ulong*)input) * XXH_PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= XXH_PRIME64_1; - input += 8; - - // XXH64_round - v2 += *((ulong*)input) * XXH_PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= XXH_PRIME64_1; - input += 8; - - // XXH64_round - v3 += *((ulong*)input) * XXH_PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= XXH_PRIME64_1; - input += 8; - - // XXH64_round - v4 += *((ulong*)input) * XXH_PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= XXH_PRIME64_1; - input += 8; - - } while (input <= limit); + v1 = XXH64_round(v1, *(ulong*) input); input += 8; + v2 = XXH64_round(v2, *(ulong*) input); input += 8; + v3 = XXH64_round(v3, *(ulong*) input); input += 8; + v4 = XXH64_round(v4, *(ulong*) input); input += 8; + } while (input < limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - - // XXH64_mergeRound - v1 *= XXH_PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= XXH_PRIME64_1; - h64 ^= v1; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - - // XXH64_mergeRound - v2 *= XXH_PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= XXH_PRIME64_1; - h64 ^= v2; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - - // XXH64_mergeRound - v3 *= XXH_PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= XXH_PRIME64_1; - h64 ^= v3; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - - // XXH64_mergeRound - v4 *= XXH_PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= XXH_PRIME64_1; - h64 ^= v4; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; + + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); } else { @@ -89,165 +45,62 @@ namespace Standart.Hash.xxHash } h64 += (ulong)len; - - // XXH64_finalize - while (input <= end - 8) - { - ulong t1 = *((ulong*)input) * XXH_PRIME64_2; - t1 = XXH_rotl64(t1, 31); - t1 *= XXH_PRIME64_1; - h64 ^= t1; - h64 = XXH_rotl64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; - input += 8; - } - - if (input <= end - 4) - { - h64 ^= *((uint*)input) * XXH_PRIME64_1; - h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; - input += 4; - } - - while (input < end) - { - h64 ^= *((byte*)input) * XXH_PRIME64_5; - h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; - input += 1; - } - - // XXH64_avalanche - h64 ^= h64 >> 33; - h64 *= XXH_PRIME64_2; - h64 ^= h64 >> 29; - h64 *= XXH_PRIME64_3; - h64 ^= h64 >> 32; - - return h64; + + return XXH64_finalize(h64, input, len); } - + [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe void __XXH64_stream_align(byte[] input, int len, ref ulong v1, ref ulong v2, ref ulong v3, ref ulong v4) + private static ulong XXH64_round(ulong acc, ulong input) { - fixed (byte* pData = &input[0]) - { - byte* ptr = pData; - byte* limit = ptr + len; - - do - { - // XXH64_round - v1 += *((ulong*)ptr) * XXH_PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= XXH_PRIME64_1; - ptr += 8; - - // XXH64_round - v2 += *((ulong*)ptr) * XXH_PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= XXH_PRIME64_1; - ptr += 8; - - // XXH64_round - v3 += *((ulong*)ptr) * XXH_PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= XXH_PRIME64_1; - ptr += 8; - - // XXH64_round - v4 += *((ulong*)ptr) * XXH_PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= XXH_PRIME64_1; - ptr += 8; - - } while (ptr < limit); - } + acc += input * XXH_PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= XXH_PRIME64_1; + return acc; } - + [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe ulong __XXH64_stream_finalize(byte[] input, int len, ref ulong v1, ref ulong v2, ref ulong v3, ref ulong v4, long length, ulong seed) + private static ulong XXH64_mergeRound(ulong acc, ulong val) { - fixed (byte* pData = &input[0]) - { - byte* ptr = pData; - byte* end = pData + len; - ulong h64; + val = XXH64_round(0, val); + acc ^= val; + acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; + return acc; + } - if (length >= 32) - { - h64 = XXH_rotl64(v1, 1) + - XXH_rotl64(v2, 7) + - XXH_rotl64(v3, 12) + - XXH_rotl64(v4, 18); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong XXH64_avalanche(ulong hash) + { + hash ^= hash >> 33; + hash *= XXH_PRIME64_2; + hash ^= hash >> 29; + hash *= XXH_PRIME64_3; + hash ^= hash >> 32; + return hash; + } - // XXH64_mergeRound - v1 *= XXH_PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= XXH_PRIME64_1; - h64 ^= v1; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - - // XXH64_mergeRound - v2 *= XXH_PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= XXH_PRIME64_1; - h64 ^= v2; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - - // XXH64_mergeRound - v3 *= XXH_PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= XXH_PRIME64_1; - h64 ^= v3; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - - // XXH64_mergeRound - v4 *= XXH_PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= XXH_PRIME64_1; - h64 ^= v4; - h64 = h64 * XXH_PRIME64_1 + XXH_PRIME64_4; - } - else - { - h64 = seed + XXH_PRIME64_5; - } - - h64 += (ulong)length; - - // XXH64_finalize - while (ptr <= end - 8) - { - ulong t1 = *((ulong*)ptr) * XXH_PRIME64_2; - t1 = XXH_rotl64(t1, 31); - t1 *= XXH_PRIME64_1; - h64 ^= t1; - h64 = XXH_rotl64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; - ptr += 8; - } - - if (ptr <= end - 4) - { - h64 ^= *((uint*)ptr) * XXH_PRIME64_1; - h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; - ptr += 4; - } - - while (ptr < end) - { - h64 ^= *((byte*)ptr) * XXH_PRIME64_5; - h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; - ptr += 1; - } - - // XXH64_avalanche - h64 ^= h64 >> 33; - h64 *= XXH_PRIME64_2; - h64 ^= h64 >> 29; - h64 *= XXH_PRIME64_3; - h64 ^= h64 >> 32; - - return h64; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe ulong XXH64_finalize(ulong hash, byte* ptr, int len) + { + len &= 31; + while (len >= 8) { + ulong k1 = XXH64_round(0, *(ulong*)ptr); + ptr += 8; + hash ^= k1; + hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; } + if (len >= 4) { + hash ^= *(uint*)ptr * XXH_PRIME64_1; + ptr += 4; + hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; + } + while (len > 0) { + hash ^= (*ptr++) * XXH_PRIME64_5; + hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; + --len; + } + return XXH64_avalanche(hash); } } } diff --git a/src/Standart.Hash.xxHash/xxHash64.cs b/src/Standart.Hash.xxHash/xxHash64.cs index 693b4bd..63becdb 100644 --- a/src/Standart.Hash.xxHash/xxHash64.cs +++ b/src/Standart.Hash.xxHash/xxHash64.cs @@ -119,7 +119,7 @@ public static partial class xxHash64 int l = offset - r; // length // Process the next chunk - __XXH64_stream_align(buffer, l, ref v1, ref v2, ref v3, ref v4); + __inline__XXH64_stream_process(buffer, l, ref v1, ref v2, ref v3, ref v4); // Put remaining bytes to buffer Utils.BlockCopy(buffer, l, buffer, 0, r); @@ -127,7 +127,7 @@ public static partial class xxHash64 } // Process the final chunk - ulong h64 = __XXH64_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); + ulong h64 = __inline__XXH64_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); return h64; } @@ -215,7 +215,7 @@ public static partial class xxHash64 int l = offset - r; // length // Process the next chunk - __XXH64_stream_align(buffer, l, ref v1, ref v2, ref v3, ref v4); + __inline__XXH64_stream_process(buffer, l, ref v1, ref v2, ref v3, ref v4); // Put remaining bytes to buffer Utils.BlockCopy(buffer, l, buffer, 0, r); @@ -223,7 +223,7 @@ public static partial class xxHash64 } // Process the final chunk - ulong h64 = __XXH64_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); + ulong h64 = __inline__XXH64_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); return h64; } @@ -256,6 +256,9 @@ public static partial class xxHash64 [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe ulong UnsafeComputeHash(byte* ptr, int length, ulong seed) { - return XXH64_internal(ptr, length, seed); + // Use inlined version + // return XXH64(ptr, length, seed); + + return __inline__XXH64(ptr, length, seed); } } \ No newline at end of file