Add support for AVX2
This commit is contained in:
parent
a19b68185e
commit
d64922c63b
@ -379,12 +379,39 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_accumulate_512(ulong* acc, byte* input, byte* secret)
|
private static unsafe void XXH3_accumulate_512(ulong* acc, byte* input, byte* secret)
|
||||||
{
|
{
|
||||||
if (Sse2.IsSupported)
|
if (Avx2.IsSupported)
|
||||||
|
XXH3_accumulate_512_avx2(acc, input, secret);
|
||||||
|
else if (Sse2.IsSupported)
|
||||||
XXH3_accumulate_512_sse2(acc, input, secret);
|
XXH3_accumulate_512_sse2(acc, input, secret);
|
||||||
else
|
else
|
||||||
XXH3_accumulate_512_scalar(acc, input, secret);
|
XXH3_accumulate_512_scalar(acc, input, secret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static unsafe void XXH3_accumulate_512_avx2(ulong* acc, byte* input, byte* secret)
|
||||||
|
{
|
||||||
|
const int m256i_size = 32;
|
||||||
|
const byte _MM_SHUFFLE_0_3_0_1 = 0b0011_0001;
|
||||||
|
const byte _MM_SHUFFLE_1_0_3_2 = 0b0100_1110;
|
||||||
|
|
||||||
|
for (int i = 0; i < XXH_STRIPE_LEN / m256i_size; i++)
|
||||||
|
{
|
||||||
|
int uint32_offset = i * 8;
|
||||||
|
int uint64_offset = i * 4;
|
||||||
|
|
||||||
|
var acc_vec = Avx2.LoadVector256(acc + uint64_offset);
|
||||||
|
var data_vec = Avx2.LoadVector256((uint*)input + uint32_offset);
|
||||||
|
var key_vec = Avx2.LoadVector256((uint*)secret + uint32_offset);
|
||||||
|
var data_key = Avx2.Xor(data_vec, key_vec);
|
||||||
|
var data_key_lo = Avx2.Shuffle(data_key, _MM_SHUFFLE_0_3_0_1);
|
||||||
|
var product = Avx2.Multiply(data_key, data_key_lo);
|
||||||
|
var data_swap = Avx2.Shuffle(data_vec, _MM_SHUFFLE_1_0_3_2).AsUInt64();
|
||||||
|
var sum = Avx2.Add(acc_vec, data_swap);
|
||||||
|
var result = Avx2.Add(product, sum);
|
||||||
|
Avx2.Store(acc + uint64_offset, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_accumulate_512_sse2(ulong* acc, byte* input, byte* secret)
|
private static unsafe void XXH3_accumulate_512_sse2(ulong* acc, byte* input, byte* secret)
|
||||||
{
|
{
|
||||||
@ -435,12 +462,38 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_scrambleAcc(ulong* acc, byte* secret)
|
private static unsafe void XXH3_scrambleAcc(ulong* acc, byte* secret)
|
||||||
{
|
{
|
||||||
if (Sse2.IsSupported)
|
if (Avx2.IsSupported)
|
||||||
|
XXH3_scrambleAcc_avx2(acc, secret);
|
||||||
|
else if (Sse2.IsSupported)
|
||||||
XXH3_scrambleAcc_sse2(acc, secret);
|
XXH3_scrambleAcc_sse2(acc, secret);
|
||||||
else
|
else
|
||||||
XXH3_scrambleAcc_scalar(acc, secret);
|
XXH3_scrambleAcc_scalar(acc, secret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static unsafe void XXH3_scrambleAcc_avx2(ulong* acc, byte* secret)
|
||||||
|
{
|
||||||
|
const int m256i_size = 32;
|
||||||
|
const byte _MM_SHUFFLE_0_3_0_1 = 0b0011_0001;
|
||||||
|
|
||||||
|
var prime32 = Vector256.Create(XXH_PRIME32_1);
|
||||||
|
|
||||||
|
for (int i = 0; i < XXH_STRIPE_LEN / m256i_size; i++)
|
||||||
|
{
|
||||||
|
int uint64_offset = i * 4;
|
||||||
|
|
||||||
|
var acc_vec = Avx2.LoadVector256(acc + uint64_offset);
|
||||||
|
var shifted = Avx2.ShiftRightLogical(acc_vec, 47);
|
||||||
|
var data_vec = Avx2.Xor(acc_vec, shifted);
|
||||||
|
var key_vec = Avx2.LoadVector256((ulong*) secret + uint64_offset);
|
||||||
|
var data_key = Avx2.Xor(data_vec, key_vec).AsUInt32();
|
||||||
|
var data_key_hi = Avx2.Shuffle(data_key, _MM_SHUFFLE_0_3_0_1);
|
||||||
|
var prod_lo = Avx2.Multiply(data_key, prime32);
|
||||||
|
var prod_hi = Avx2.Multiply(data_key_hi, prime32);
|
||||||
|
var result = Avx2.Add(prod_lo, Avx2.ShiftLeftLogical(prod_hi, 32));
|
||||||
|
Avx2.Store(acc + uint64_offset, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static unsafe void XXH3_scrambleAcc_sse2(ulong* acc, byte* secret)
|
private static unsafe void XXH3_scrambleAcc_sse2(ulong* acc, byte* secret)
|
||||||
{
|
{
|
||||||
const int m128i_size = 16;
|
const int m128i_size = 16;
|
||||||
@ -492,12 +545,35 @@ namespace Standart.Hash.xxHash
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_initCustomSecret(byte* customSecret, ulong seed)
|
private static unsafe void XXH3_initCustomSecret(byte* customSecret, ulong seed)
|
||||||
{
|
{
|
||||||
if (Sse2.IsSupported)
|
if (Avx2.IsSupported)
|
||||||
|
XXH3_initCustomSecret_avx2(customSecret, seed);
|
||||||
|
else if (Sse2.IsSupported)
|
||||||
XXH3_initCustomSecret_sse2(customSecret, seed);
|
XXH3_initCustomSecret_sse2(customSecret, seed);
|
||||||
else
|
else
|
||||||
XXH3_initCustomSecret_scalar(customSecret, seed);
|
XXH3_initCustomSecret_scalar(customSecret, seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static unsafe void XXH3_initCustomSecret_avx2(byte* customSecret, ulong seed64)
|
||||||
|
{
|
||||||
|
const int m256i_size = 32;
|
||||||
|
|
||||||
|
var seed = Vector256.Create((ulong)seed64, (ulong)(0U - seed64), (ulong)seed64, (ulong)(0U - seed64));
|
||||||
|
|
||||||
|
fixed (byte* secret = &XXH3_SECRET[0])
|
||||||
|
{
|
||||||
|
for (int i = 0; i < XXH_SECRET_DEFAULT_SIZE / m256i_size; i++)
|
||||||
|
{
|
||||||
|
int uint64_offset = i * 4;
|
||||||
|
|
||||||
|
var src32 = Avx2.LoadVector256(((ulong*)secret) + uint64_offset);
|
||||||
|
var dst32 = Avx2.Add(src32, seed);
|
||||||
|
Avx2.Store((ulong*) customSecret + uint64_offset, dst32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static unsafe void XXH3_initCustomSecret_sse2(byte* customSecret, ulong seed64)
|
private static unsafe void XXH3_initCustomSecret_sse2(byte* customSecret, ulong seed64)
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user