Add SSE2
This commit is contained in:
parent
0a27c518a7
commit
ab1a2055b7
@ -1,6 +1,7 @@
|
|||||||
// ReSharper disable InconsistentNaming
|
// ReSharper disable InconsistentNaming
|
||||||
|
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Runtime.Intrinsics.X86;
|
||||||
|
|
||||||
namespace Standart.Hash.xxHash
|
namespace Standart.Hash.xxHash
|
||||||
{
|
{
|
||||||
@ -86,11 +87,23 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static uint128 XXH_mult64to128(ulong lhs, ulong rhs)
|
private static uint128 XXH_mult64to128(ulong lhs, ulong rhs)
|
||||||
{
|
{
|
||||||
// TODO: SIMD
|
if (Bmi2.IsSupported)
|
||||||
|
return XXH_mult64to128_bmi2(lhs, rhs);
|
||||||
|
|
||||||
return XXH_mult64to128_scalar(lhs, rhs);
|
return XXH_mult64to128_scalar(lhs, rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static unsafe uint128 XXH_mult64to128_bmi2(ulong lhs, ulong rhs)
|
||||||
|
{
|
||||||
|
ulong product_high;
|
||||||
|
ulong product_low = Bmi2.X64.MultiplyNoFlags(lhs, rhs, &product_high);
|
||||||
|
uint128 r128;
|
||||||
|
r128.low64 = product_low;
|
||||||
|
r128.high64 = product_high;
|
||||||
|
return r128;
|
||||||
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static uint128 XXH_mult64to128_scalar(ulong lhs, ulong rhs)
|
private static uint128 XXH_mult64to128_scalar(ulong lhs, ulong rhs)
|
||||||
{
|
{
|
||||||
@ -114,5 +127,11 @@ namespace Standart.Hash.xxHash
|
|||||||
{
|
{
|
||||||
*(ulong*) dst = v64;
|
*(ulong*) dst = v64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static byte _MM_SHUFFLE(byte p3, byte p2, byte p1, byte p0)
|
||||||
|
{
|
||||||
|
return (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Runtime.Intrinsics;
|
||||||
|
using System.Runtime.Intrinsics.X86;
|
||||||
|
|
||||||
namespace Standart.Hash.xxHash
|
namespace Standart.Hash.xxHash
|
||||||
{
|
{
|
||||||
@ -377,9 +379,33 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_accumulate_512(ulong* acc, byte* input, byte* secret)
|
private static unsafe void XXH3_accumulate_512(ulong* acc, byte* input, byte* secret)
|
||||||
{
|
{
|
||||||
// TODO: SIMD
|
if (Sse2.IsSupported)
|
||||||
|
XXH3_accumulate_512_sse2(acc, input, secret);
|
||||||
|
else
|
||||||
|
XXH3_accumulate_512_scalar(acc, input, secret);
|
||||||
|
}
|
||||||
|
|
||||||
XXH3_accumulate_512_scalar(acc, input, secret);
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static unsafe void XXH3_accumulate_512_sse2(ulong* acc, byte* input, byte* secret)
|
||||||
|
{
|
||||||
|
const int m128i_size = 16;
|
||||||
|
|
||||||
|
for (int i = 0; i < XXH_STRIPE_LEN / m128i_size; i++)
|
||||||
|
{
|
||||||
|
int uint32_offset = i * 4;
|
||||||
|
int uint64_offset = i * 2;
|
||||||
|
|
||||||
|
var acc_vec = Sse2.LoadVector128(acc + uint64_offset);
|
||||||
|
var data_vec = Sse2.LoadVector128((uint*) input + uint32_offset);
|
||||||
|
var key_vec = Sse2.LoadVector128((uint*) secret + uint32_offset);
|
||||||
|
var data_key = Sse2.Xor(data_vec, key_vec);
|
||||||
|
var data_key_lo = Sse2.Shuffle(data_key, _MM_SHUFFLE(0, 3, 0, 1));
|
||||||
|
var product = Sse2.Multiply(data_key, data_key_lo);
|
||||||
|
var data_swap = Sse2.Shuffle(data_vec, _MM_SHUFFLE(1, 0, 3, 2)).AsUInt64();
|
||||||
|
var sum = Sse2.Add(acc_vec, data_swap);
|
||||||
|
var result = Sse2.Add(product, sum);
|
||||||
|
Sse2.Store(acc + uint64_offset, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
@ -407,9 +433,34 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_scrambleAcc(ulong* acc, byte* secret)
|
private static unsafe void XXH3_scrambleAcc(ulong* acc, byte* secret)
|
||||||
{
|
{
|
||||||
// TODO: SIMD
|
if (Sse2.IsSupported)
|
||||||
|
XXH3_scrambleAcc_sse2(acc, secret);
|
||||||
|
else
|
||||||
|
XXH3_scrambleAcc_scalar(acc, secret);
|
||||||
|
}
|
||||||
|
|
||||||
XXH3_scrambleAcc_scalar(acc, secret);
|
private static unsafe void XXH3_scrambleAcc_sse2(ulong* acc, byte* secret)
|
||||||
|
{
|
||||||
|
const int m128i_size = 16;
|
||||||
|
|
||||||
|
var prime32 = Vector128.Create(XXH_PRIME32_1);
|
||||||
|
|
||||||
|
for (int i = 0; i < XXH_STRIPE_LEN / m128i_size; i++)
|
||||||
|
{
|
||||||
|
int uint32_offset = i * 4;
|
||||||
|
int uint64_offset = i * 2;
|
||||||
|
|
||||||
|
var acc_vec = Sse2.LoadVector128(acc + uint64_offset).AsUInt32();
|
||||||
|
var shifted = Sse2.ShiftRightLogical(acc_vec, 47);
|
||||||
|
var data_vec = Sse2.Xor(acc_vec, shifted);
|
||||||
|
var key_vec = Sse2.LoadVector128((uint*) secret + uint32_offset);
|
||||||
|
var data_key = Sse2.Xor(data_vec, key_vec);
|
||||||
|
var data_key_hi = Sse2.Shuffle(data_key.AsUInt32(), _MM_SHUFFLE(0, 3, 0, 1));
|
||||||
|
var prod_lo = Sse2.Multiply(data_key, prime32);
|
||||||
|
var prod_hi = Sse2.Multiply(data_key_hi, prime32);
|
||||||
|
var result = Sse2.Add(prod_lo, Sse2.ShiftLeftLogical(prod_hi, 32));
|
||||||
|
Sse2.Store(acc + uint64_offset, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
@ -438,11 +489,33 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_initCustomSecret(byte* customSecret, ulong seed)
|
private static unsafe void XXH3_initCustomSecret(byte* customSecret, ulong seed)
|
||||||
{
|
{
|
||||||
// TODO: SIMD
|
if (Sse2.IsSupported)
|
||||||
|
XXH3_initCustomSecret_sse2(customSecret, seed);
|
||||||
XXH3_initCustomSecret_scalar(customSecret, seed);
|
else
|
||||||
|
XXH3_initCustomSecret_scalar(customSecret, seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static unsafe void XXH3_initCustomSecret_sse2(byte* customSecret, ulong seed64)
|
||||||
|
{
|
||||||
|
const int m128i_size = 16;
|
||||||
|
|
||||||
|
var seed = Vector128.Create((long) seed64, (long) (0U - seed64));
|
||||||
|
|
||||||
|
fixed (byte* secret = &XXH3_SECRET[0])
|
||||||
|
{
|
||||||
|
for (int i = 0; i < XXH_SECRET_DEFAULT_SIZE / m128i_size; ++i)
|
||||||
|
{
|
||||||
|
int uint64_offset = i * 2;
|
||||||
|
|
||||||
|
var src16 = Sse2.LoadVector128((long*) secret + uint64_offset);
|
||||||
|
var dst16 = Sse2.Add(src16, seed);
|
||||||
|
Sse2.Store((long*) customSecret, dst16);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_initCustomSecret_scalar(byte* customSecret, ulong seed)
|
private static unsafe void XXH3_initCustomSecret_scalar(byte* customSecret, ulong seed)
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user