Inline 32 & 128

This commit is contained in:
Oleksandr Melnyk 2022-06-09 23:22:22 +03:00
parent a551f65e09
commit d7cf848606
5 changed files with 239 additions and 137 deletions

View File

@ -13,7 +13,7 @@ namespace Standart.Hash.xxHash;
public static partial class xxHash128 public static partial class xxHash128
{ {
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint128 __XXH3_128bits_internal(byte* input, int len, ulong seed, byte* secret, int secretLen) private static unsafe uint128 __inline__XXH3_128bits_internal(byte* input, int len, ulong seed, byte* secret, int secretLen)
{ {
if (len <= 16) if (len <= 16)
{ {

View File

@ -0,0 +1,183 @@
/*
* This is the auto generated code.
* All function calls are inlined in XXH32
* Please don't try to analyze it.
*/
using System.Runtime.CompilerServices;
namespace Standart.Hash.xxHash;
public partial class xxHash32
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint __inline__XXH32(byte* input, int len, uint seed)
{
uint h32;
if (len >= 16)
{
byte* end = input + len;
byte* limit = end - 15;
uint v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
uint v2 = seed + XXH_PRIME32_2;
uint v3 = seed + 0;
uint v4 = seed - XXH_PRIME32_1;
do
{
// XXH32_round
v1 += *((uint*) input) * XXH_PRIME32_2;
v1 = (v1 << 13) | (v1 >> (32 - 13));
v1 *= XXH_PRIME32_1;
input += 4;
// XXH32_round
v2 += *((uint*) input) * XXH_PRIME32_2;
v2 = (v2 << 13) | (v2 >> (32 - 13));
v2 *= XXH_PRIME32_1;
input += 4;
// XXH32_round
v3 += *((uint*) input) * XXH_PRIME32_2;
v3 = (v3 << 13) | (v3 >> (32 - 13));
v3 *= XXH_PRIME32_1;
input += 4;
// XXH32_round
v4 += *((uint*) input) * XXH_PRIME32_2;
v4 = (v4 << 13) | (v4 >> (32 - 13));
v4 *= XXH_PRIME32_1;
input += 4;
} while (input < limit);
h32 = ((v1 << 1) | (v1 >> (32 - 1))) +
((v2 << 7) | (v2 >> (32 - 7))) +
((v3 << 12) | (v3 >> (32 - 12))) +
((v4 << 18) | (v4 >> (32 - 18)));
}
else
{
h32 = seed + XXH_PRIME32_5;
}
h32 += (uint) len;
// XXH32_finalize
len &= 15;
while (len >= 4)
{
h32 += *((uint*) input) * XXH_PRIME32_3;
input += 4;
h32 = ((h32 << 17) | (h32 >> (32 - 17))) * XXH_PRIME32_4;
len -= 4;
}
while (len > 0)
{
h32 += *((byte*) input) * XXH_PRIME32_5;
++input;
h32 = ((h32 << 11) | (h32 >> (32 - 11))) * XXH_PRIME32_1;
--len;
}
// XXH32_avalanche
h32 ^= h32 >> 15;
h32 *= XXH_PRIME32_2;
h32 ^= h32 >> 13;
h32 *= XXH_PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void __inline__XXH32_stream_process(byte[] input, int len, ref uint v1, ref uint v2, ref uint v3, ref uint v4)
{
fixed (byte* pData = &input[0])
{
byte* ptr = pData;
byte* limit = ptr + len;
do
{
// XXH32_round
v1 += *((uint*)ptr) * XXH_PRIME32_2;
v1 = (v1 << 13) | (v1 >> (32 - 13));
v1 *= XXH_PRIME32_1;
ptr += 4;
// XXH32_round
v2 += *((uint*)ptr) * XXH_PRIME32_2;
v2 = (v2 << 13) | (v2 >> (32 - 13));
v2 *= XXH_PRIME32_1;
ptr += 4;
// XXH32_round
v3 += *((uint*)ptr) * XXH_PRIME32_2;
v3 = (v3 << 13) | (v3 >> (32 - 13));
v3 *= XXH_PRIME32_1;
ptr += 4;
// XXH32_round
v4 += *((uint*)ptr) * XXH_PRIME32_2;
v4 = (v4 << 13) | (v4 >> (32 - 13));
v4 *= XXH_PRIME32_1;
ptr += 4;
} while (ptr < limit);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint __inline__XXH32_stream_finalize(byte[] input, int len, ref uint v1, ref uint v2, ref uint v3, ref uint v4, long length, uint seed)
{
fixed (byte* pData = &input[0])
{
byte* ptr = pData;
uint h32;
if (length >= 16)
{
h32 = ((v1 << 1) | (v1 >> (32 - 1))) +
((v2 << 7) | (v2 >> (32 - 7))) +
((v3 << 12) | (v3 >> (32 - 12))) +
((v4 << 18) | (v4 >> (32 - 18)));
}
else
{
h32 = seed + XXH_PRIME32_5;
}
h32 += (uint)length;
// XXH32_finalize
len &= 15;
while (len >= 4)
{
h32 += *((uint*)ptr) * XXH_PRIME32_3;
ptr += 4;
h32 = ((h32 << 17) | (h32 >> (32 - 17))) * XXH_PRIME32_4;
len -= 4;
}
while (len > 0)
{
h32 += *((byte*)ptr) * XXH_PRIME32_5;
ptr++;
h32 = ((h32 << 11) | (h32 >> (32 - 11))) * XXH_PRIME32_1;
len--;
}
// XXH32_avalanche
h32 ^= h32 >> 15;
h32 *= XXH_PRIME32_2;
h32 ^= h32 >> 13;
h32 *= XXH_PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
}
}

View File

@ -166,7 +166,10 @@ namespace Standart.Hash.xxHash
{ {
fixed (byte* secret = &XXH3_SECRET[0]) fixed (byte* secret = &XXH3_SECRET[0])
{ {
return __XXH3_128bits_internal(input, len, seed, secret, XXH3_SECRET_DEFAULT_SIZE); // Use inlined version
// return XXH3_128bits_internal(input, len, seed, secret, XXH3_SECRET_DEFAULT_SIZE);
return __inline__XXH3_128bits_internal(input, len, seed, secret, XXH3_SECRET_DEFAULT_SIZE);
} }
} }
} }

View File

@ -7,14 +7,14 @@ namespace Standart.Hash.xxHash
public static partial class xxHash32 public static partial class xxHash32
{ {
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint XXH32_internal(byte* input, int len, uint seed) private static unsafe uint XXH32(byte* input, int len, uint seed)
{ {
byte* end = input + len;
uint h32; uint h32;
if (len >= 16) if (len >= 16)
{ {
byte* limit = end - 16; byte* end = input + len;
byte* limit = end - 15;
uint v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; uint v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
uint v2 = seed + XXH_PRIME32_2; uint v2 = seed + XXH_PRIME32_2;
@ -23,31 +23,11 @@ namespace Standart.Hash.xxHash
do do
{ {
// XXH32_round v1 = XXH32_round(v1, *(uint*) input); input += 4;
v1 += *((uint*)input) * XXH_PRIME32_2; v2 = XXH32_round(v2, *(uint*) input); input += 4;
v1 = XXH_rotl32(v1, 13); v3 = XXH32_round(v3, *(uint*) input); input += 4;
v1 *= XXH_PRIME32_1; v4 = XXH32_round(v4, *(uint*) input); input += 4;
input += 4; } while (input < limit);
// XXH32_round
v2 += *((uint*)input) * XXH_PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= XXH_PRIME32_1;
input += 4;
// XXH32_round
v3 += *((uint*)input) * XXH_PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= XXH_PRIME32_1;
input += 4;
// XXH32_round
v4 += *((uint*)input) * XXH_PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= XXH_PRIME32_1;
input += 4;
} while (input <= limit);
h32 = XXH_rotl32(v1, 1) + h32 = XXH_rotl32(v1, 1) +
XXH_rotl32(v2, 7) + XXH_rotl32(v2, 7) +
@ -61,116 +41,50 @@ namespace Standart.Hash.xxHash
h32 += (uint)len; h32 += (uint)len;
// XXH32_finalize return XXH32_finalize(h32, input, len);
while (input <= end - 4)
{
h32 += *((uint*)input) * XXH_PRIME32_3;
h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4;
input += 4;
}
while (input < end)
{
h32 += *((byte*)input) * XXH_PRIME32_5;
h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;
input += 1;
}
// XXH32_avalanche
h32 ^= h32 >> 15;
h32 *= XXH_PRIME32_2;
h32 ^= h32 >> 13;
h32 *= XXH_PRIME32_3;
h32 ^= h32 >> 16;
return h32;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void __XXH32_stream_align(byte[] input, int len, ref uint v1, ref uint v2, ref uint v3, ref uint v4) private static uint XXH32_round(uint acc, uint input)
{ {
fixed (byte* pData = &input[0]) acc += input * XXH_PRIME32_2;
{ acc = XXH_rotl32(acc, 13);
byte* ptr = pData; acc *= XXH_PRIME32_1;
byte* limit = ptr + len; return acc;
do
{
// XXH32_round
v1 += *((uint*)ptr) * XXH_PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= XXH_PRIME32_1;
ptr += 4;
// XXH32_round
v2 += *((uint*)ptr) * XXH_PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= XXH_PRIME32_1;
ptr += 4;
// XXH32_round
v3 += *((uint*)ptr) * XXH_PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= XXH_PRIME32_1;
ptr += 4;
// XXH32_round
v4 += *((uint*)ptr) * XXH_PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= XXH_PRIME32_1;
ptr += 4;
} while (ptr < limit);
}
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint __XXH32_stream_finalize(byte[] input, int len, ref uint v1, ref uint v2, ref uint v3, ref uint v4, long length, uint seed) private static uint XXH32_avalanche(uint hash)
{ {
fixed (byte* pData = &input[0]) hash ^= hash >> 15;
{ hash *= XXH_PRIME32_2;
byte* ptr = pData; hash ^= hash >> 13;
byte* end = pData + len; hash *= XXH_PRIME32_3;
uint h32; hash ^= hash >> 16;
return hash;
if (length >= 16)
{
h32 = XXH_rotl32(v1, 1) +
XXH_rotl32(v2, 7) +
XXH_rotl32(v3, 12) +
XXH_rotl32(v4, 18);
}
else
{
h32 = seed + XXH_PRIME32_5;
} }
h32 += (uint)length; [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint XXH32_finalize(uint hash, byte* ptr, int len)
// XXH32_finalize
while (ptr <= end - 4)
{ {
h32 += *((uint*)ptr) * XXH_PRIME32_3; len &= 15;
h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; while (len >= 4)
{
hash += *((uint*)ptr) * XXH_PRIME32_3;
ptr += 4; ptr += 4;
hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4;
len -= 4;
} }
while (ptr < end) while (len > 0)
{ {
h32 += *((byte*)ptr) * XXH_PRIME32_5; hash += *((byte*)ptr) * XXH_PRIME32_5;
h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; ptr++;
ptr += 1; hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;
len--;
} }
// XXH32_avalanche return XXH32_avalanche(hash);
h32 ^= h32 >> 15;
h32 *= XXH_PRIME32_2;
h32 ^= h32 >> 13;
h32 *= XXH_PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
} }
} }
} }

View File

@ -86,8 +86,7 @@ public static partial class xxHash32
/// <param name="seed">The seed number</param> /// <param name="seed">The seed number</param>
/// <param name="cancellationToken">The cancellation token</param> /// <param name="cancellationToken">The cancellation token</param>
/// <returns>The hash</returns> /// <returns>The hash</returns>
public static async ValueTask<uint> ComputeHashAsync(Stream stream, int bufferSize, uint seed, public static async ValueTask<uint> ComputeHashAsync(Stream stream, int bufferSize, uint seed, CancellationToken cancellationToken)
CancellationToken cancellationToken)
{ {
Debug.Assert(stream != null); Debug.Assert(stream != null);
Debug.Assert(bufferSize > 16); Debug.Assert(bufferSize > 16);
@ -120,7 +119,7 @@ public static partial class xxHash32
int l = offset - r; // length int l = offset - r; // length
// Process the next chunk // Process the next chunk
__XXH32_stream_align(buffer, l, ref v1, ref v2, ref v3, ref v4); __inline__XXH32_stream_process(buffer, l, ref v1, ref v2, ref v3, ref v4);
// Put remaining bytes to buffer // Put remaining bytes to buffer
Utils.BlockCopy(buffer, l, buffer, 0, r); Utils.BlockCopy(buffer, l, buffer, 0, r);
@ -128,7 +127,7 @@ public static partial class xxHash32
} }
// Process the final chunk // Process the final chunk
uint h32 = __XXH32_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); uint h32 = __inline__XXH32_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed);
return h32; return h32;
} }
@ -216,7 +215,7 @@ public static partial class xxHash32
int l = offset - r; // length int l = offset - r; // length
// Process the next chunk // Process the next chunk
__XXH32_stream_align(buffer, l, ref v1, ref v2, ref v3, ref v4); __inline__XXH32_stream_process(buffer, l, ref v1, ref v2, ref v3, ref v4);
// Put remaining bytes to buffer // Put remaining bytes to buffer
Utils.BlockCopy(buffer, l, buffer, 0, r); Utils.BlockCopy(buffer, l, buffer, 0, r);
@ -224,7 +223,7 @@ public static partial class xxHash32
} }
// Process the last chunk // Process the last chunk
uint h32 = __XXH32_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); uint h32 = __inline__XXH32_stream_finalize(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed);
return h32; return h32;
} }
@ -257,6 +256,9 @@ public static partial class xxHash32
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe uint UnsafeComputeHash(byte* ptr, int length, uint seed) private static unsafe uint UnsafeComputeHash(byte* ptr, int length, uint seed)
{ {
return XXH32_internal(ptr, length, seed); // Use inlined version
// return XXH32(ptr, length, seed);
return __inline__XXH32(ptr, length, seed);
} }
} }