Improve perfomance for XXH32 and XXH64

This commit is contained in:
Oleksandr Melnyk 2022-07-16 12:28:06 +03:00
parent 9ab0dd4290
commit 91b72f21db
5 changed files with 56 additions and 42 deletions

View File

@ -36,12 +36,12 @@ Runtime=.NET 6.0
| Method | x64 | | Method | x64 |
|:---------------|-----------:| |:---------------|-----------:|
| Hash32 Array | 5.87 GB/s | | Hash32 Array | 6.65 GB/s |
| Hash64 Array | 9.07 GB/s | | Hash64 Array | 12.28 GB/s |
| Hash128 Array | 12.04 GB/s | | Hash128 Array | 12.04 GB/s |
| Hash3 Array | 12.08 GB/s | | Hash3 Array | 12.08 GB/s |
| Hash32 Span | 5.87 GB/s | | Hash32 Span | 6.65 GB/s |
| Hash64 Span | 9.07 GB/s | | Hash64 Span | 12.28 GB/s |
| Hash128 Span | 12.04 GB/s | | Hash128 Span | 12.04 GB/s |
| Hash3 Span | 12.08 GB/s | | Hash3 Span | 12.08 GB/s |
| Hash32 Stream | 3.22 GB/s | | Hash32 Stream | 3.22 GB/s |
@ -51,10 +51,10 @@ Runtime=.NET 6.0
| Method | Platform | Language | 1KB Time | 1MB Time | 1GB Time | Speed | | Method | Platform | Language | 1KB Time | 1MB Time | 1GB Time | Speed |
|:-------------------|---------:|---------:|----------:|----------:|----------:|-----------:| |:-------------------|---------:|---------:|----------:|----------:|----------:|-----------:|
| Hash32 | x64 | C# | 151.5 ns | 143.4 us | 170.3 ms | 5.87 GB/s | | Hash32 | x64 | C# | 138.0 ns | 130.2 us | 150.3 ms | 6.65 GB/s |
| Hash32 | x64 | C | 138.5 ns | 129.5 us | 152.4 ms | 6.56 GB/s | | Hash32 | x64 | C | 140.2 ns | 129.6 us | 150.3 ms | 6.65 GB/s |
| Hash64 | x64 | C# | 84.6 ns | 77.9 us | 110.2 ms | 9.07 GB/s | | Hash64 | x64 | C# | 73.9 ns | 64.6 us | 81.4 ms | 12.28 GB/s |
| Hash64 | x64 | C | 74.2 ns | 64.8 us | 83.0 ms | 12.04 GB/s | | Hash64 | x64 | C | 75.5 ns | 65.2 us | 84.5 ms | 11.83 GB/s |
| Hash128 (SSE2/AVX2)| x64 | C# | 151.6 ns | 64.5 us | 80.5 ms | 12.04 GB/s | | Hash128 (SSE2/AVX2)| x64 | C# | 151.6 ns | 64.5 us | 80.5 ms | 12.04 GB/s |
| Hash128 (SSE2/AVX2)| x64 | C | 84.4 ns | 38.3 us | 57.4 ms | 17.42 GB/s | | Hash128 (SSE2/AVX2)| x64 | C | 84.4 ns | 38.3 us | 57.4 ms | 17.42 GB/s |
| Hash3 (SSE2/AVX2)| x64 | C# | 77.6 ns | 62.1 us | 78.5 ms | 12.08 GB/s | | Hash3 (SSE2/AVX2)| x64 | C# | 77.6 ns | 62.1 us | 78.5 ms | 12.08 GB/s |

View File

@ -27,23 +27,28 @@ public partial class xxHash32
do do
{ {
var reg1 = *((uint*)(input + 0));
var reg2 = *((uint*)(input + 4));
var reg3 = *((uint*)(input + 8));
var reg4 = *((uint*)(input + 12));
// XXH32_round // XXH32_round
v1 += *((uint*) (input+0)) * XXH_PRIME32_2; v1 += reg1 * XXH_PRIME32_2;
v1 = (v1 << 13) | (v1 >> (32 - 13)); v1 = (v1 << 13) | (v1 >> (32 - 13));
v1 *= XXH_PRIME32_1; v1 *= XXH_PRIME32_1;
// XXH32_round // XXH32_round
v2 += *((uint*) (input+4)) * XXH_PRIME32_2; v2 += reg2 * XXH_PRIME32_2;
v2 = (v2 << 13) | (v2 >> (32 - 13)); v2 = (v2 << 13) | (v2 >> (32 - 13));
v2 *= XXH_PRIME32_1; v2 *= XXH_PRIME32_1;
// XXH32_round // XXH32_round
v3 += *((uint*) (input+8)) * XXH_PRIME32_2; v3 += reg3 * XXH_PRIME32_2;
v3 = (v3 << 13) | (v3 >> (32 - 13)); v3 = (v3 << 13) | (v3 >> (32 - 13));
v3 *= XXH_PRIME32_1; v3 *= XXH_PRIME32_1;
// XXH32_round // XXH32_round
v4 += *((uint*) (input+12)) * XXH_PRIME32_2; v4 += reg4 * XXH_PRIME32_2;
v4 = (v4 << 13) | (v4 >> (32 - 13)); v4 = (v4 << 13) | (v4 >> (32 - 13));
v4 *= XXH_PRIME32_1; v4 *= XXH_PRIME32_1;
@ -100,23 +105,28 @@ public partial class xxHash32
do do
{ {
var reg1 = *((uint*)(ptr + 0));
var reg2 = *((uint*)(ptr + 4));
var reg3 = *((uint*)(ptr + 8));
var reg4 = *((uint*)(ptr + 12));
// XXH32_round // XXH32_round
v1 += *((uint*)(ptr + 0)) * XXH_PRIME32_2; v1 += reg1 * XXH_PRIME32_2;
v1 = (v1 << 13) | (v1 >> (32 - 13)); v1 = (v1 << 13) | (v1 >> (32 - 13));
v1 *= XXH_PRIME32_1; v1 *= XXH_PRIME32_1;
// XXH32_round // XXH32_round
v2 += *((uint*)(ptr + 4)) * XXH_PRIME32_2; v2 += reg2 * XXH_PRIME32_2;
v2 = (v2 << 13) | (v2 >> (32 - 13)); v2 = (v2 << 13) | (v2 >> (32 - 13));
v2 *= XXH_PRIME32_1; v2 *= XXH_PRIME32_1;
// XXH32_round // XXH32_round
v3 += *((uint*)(ptr + 8)) * XXH_PRIME32_2; v3 += reg3 * XXH_PRIME32_2;
v3 = (v3 << 13) | (v3 >> (32 - 13)); v3 = (v3 << 13) | (v3 >> (32 - 13));
v3 *= XXH_PRIME32_1; v3 *= XXH_PRIME32_1;
// XXH32_round // XXH32_round
v4 += *((uint*)(ptr + 12)) * XXH_PRIME32_2; v4 += reg4 * XXH_PRIME32_2;
v4 = (v4 << 13) | (v4 >> (32 - 13)); v4 = (v4 << 13) | (v4 >> (32 - 13));
v4 *= XXH_PRIME32_1; v4 *= XXH_PRIME32_1;

View File

@ -27,29 +27,31 @@ public partial class xxHash64
do do
{ {
var reg1 = *((ulong*)(input + 0));
var reg2 = *((ulong*)(input + 8));
var reg3 = *((ulong*)(input + 16));
var reg4 = *((ulong*)(input + 24));
// XXH64_round // XXH64_round
v1 += *((ulong*) input) * XXH_PRIME64_2; v1 += reg1 * XXH_PRIME64_2;
v1 = (v1 << 31) | (v1 >> (64 - 31)); v1 = (v1 << 31) | (v1 >> (64 - 31));
v1 *= XXH_PRIME64_1; v1 *= XXH_PRIME64_1;
input += 8;
// XXH64_round // XXH64_round
v2 += *((ulong*) input) * XXH_PRIME64_2; v2 += reg2 * XXH_PRIME64_2;
v2 = (v2 << 31) | (v2 >> (64 - 31)); v2 = (v2 << 31) | (v2 >> (64 - 31));
v2 *= XXH_PRIME64_1; v2 *= XXH_PRIME64_1;
input += 8;
// XXH64_round // XXH64_round
v3 += *((ulong*) input) * XXH_PRIME64_2; v3 += reg3 * XXH_PRIME64_2;
v3 = (v3 << 31) | (v3 >> (64 - 31)); v3 = (v3 << 31) | (v3 >> (64 - 31));
v3 *= XXH_PRIME64_1; v3 *= XXH_PRIME64_1;
input += 8;
// XXH64_round // XXH64_round
v4 += *((ulong*) input) * XXH_PRIME64_2; v4 += reg4 * XXH_PRIME64_2;
v4 = (v4 << 31) | (v4 >> (64 - 31)); v4 = (v4 << 31) | (v4 >> (64 - 31));
v4 *= XXH_PRIME64_1; v4 *= XXH_PRIME64_1;
input += 8; input += 32;
} while (input < limit); } while (input < limit);
h64 = ((v1 << 1) | (v1 >> (64 - 1))) + h64 = ((v1 << 1) | (v1 >> (64 - 1))) +
@ -134,29 +136,31 @@ public partial class xxHash64
do do
{ {
var reg1 = *((ulong*)(ptr + 0));
var reg2 = *((ulong*)(ptr + 8));
var reg3 = *((ulong*)(ptr + 16));
var reg4 = *((ulong*)(ptr + 24));
// XXH64_round // XXH64_round
v1 += *((ulong*) ptr) * XXH_PRIME64_2; v1 += reg1 * XXH_PRIME64_2;
v1 = (v1 << 31) | (v1 >> (64 - 31)); v1 = (v1 << 31) | (v1 >> (64 - 31));
v1 *= XXH_PRIME64_1; v1 *= XXH_PRIME64_1;
ptr += 8;
// XXH64_round // XXH64_round
v2 += *((ulong*) ptr) * XXH_PRIME64_2; v2 += reg2 * XXH_PRIME64_2;
v2 = (v2 << 31) | (v2 >> (64 - 31)); v2 = (v2 << 31) | (v2 >> (64 - 31));
v2 *= XXH_PRIME64_1; v2 *= XXH_PRIME64_1;
ptr += 8;
// XXH64_round // XXH64_round
v3 += *((ulong*) ptr) * XXH_PRIME64_2; v3 += reg3 * XXH_PRIME64_2;
v3 = (v3 << 31) | (v3 >> (64 - 31)); v3 = (v3 << 31) | (v3 >> (64 - 31));
v3 *= XXH_PRIME64_1; v3 *= XXH_PRIME64_1;
ptr += 8;
// XXH64_round // XXH64_round
v4 += *((ulong*) ptr) * XXH_PRIME64_2; v4 += reg4 * XXH_PRIME64_2;
v4 = (v4 << 31) | (v4 >> (64 - 31)); v4 = (v4 << 31) | (v4 >> (64 - 31));
v4 *= XXH_PRIME64_1; v4 *= XXH_PRIME64_1;
ptr += 8; ptr += 32;
} while (ptr < limit); } while (ptr < limit);
} }
} }

View File

@ -6,11 +6,11 @@ namespace Standart.Hash.xxHash;
public static partial class xxHash32 public static partial class xxHash32
{ {
private const uint XXH_PRIME32_1 = 2654435761U; private static readonly uint XXH_PRIME32_1 = 2654435761U;
private const uint XXH_PRIME32_2 = 2246822519U; private static readonly uint XXH_PRIME32_2 = 2246822519U;
private const uint XXH_PRIME32_3 = 3266489917U; private static readonly uint XXH_PRIME32_3 = 3266489917U;
private const uint XXH_PRIME32_4 = 668265263U; private static readonly uint XXH_PRIME32_4 = 668265263U;
private const uint XXH_PRIME32_5 = 374761393U; private static readonly uint XXH_PRIME32_5 = 374761393U;
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint XXH_rotl32(uint x, int r) private static uint XXH_rotl32(uint x, int r)

View File

@ -6,11 +6,11 @@ namespace Standart.Hash.xxHash;
public static partial class xxHash64 public static partial class xxHash64
{ {
private const ulong XXH_PRIME64_1 = 11400714785074694791UL; private static readonly ulong XXH_PRIME64_1 = 11400714785074694791UL;
private const ulong XXH_PRIME64_2 = 14029467366897019727UL; private static readonly ulong XXH_PRIME64_2 = 14029467366897019727UL;
private const ulong XXH_PRIME64_3 = 1609587929392839161UL; private static readonly ulong XXH_PRIME64_3 = 1609587929392839161UL;
private const ulong XXH_PRIME64_4 = 9650029242287828579UL; private static readonly ulong XXH_PRIME64_4 = 9650029242287828579UL;
private const ulong XXH_PRIME64_5 = 2870177450012600261UL; private static readonly ulong XXH_PRIME64_5 = 2870177450012600261UL;
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong XXH_rotl64(ulong x, int r) private static ulong XXH_rotl64(ulong x, int r)