diff --git a/nuget.props b/nuget.props index a7ef620..148981f 100644 --- a/nuget.props +++ b/nuget.props @@ -3,7 +3,7 @@ netstandard2.0 Standart.Hash.xxHash - 1.0.2 + 1.0.3 Standart.Hash.xxHash Standart.Hash.xxHash Alexander Melnik diff --git a/src/Standart.Hash.xxHash.Perf/xxHashBenchmark.cs b/src/Standart.Hash.xxHash.Perf/xxHashBenchmark.cs index b06f32b..5450743 100644 --- a/src/Standart.Hash.xxHash.Perf/xxHashBenchmark.cs +++ b/src/Standart.Hash.xxHash.Perf/xxHashBenchmark.cs @@ -40,12 +40,14 @@ [Benchmark] public uint Hash32_Stream() { + stream.Seek(0, SeekOrigin.Begin); return xxHash32.ComputeHash(stream); } [Benchmark] public async Task Hash32_StreamAsync() { + stream.Seek(0, SeekOrigin.Begin); return await xxHash32.ComputeHashAsync(stream); } @@ -58,12 +60,14 @@ [Benchmark] public ulong Hash64_Stream() { + stream.Seek(0, SeekOrigin.Begin); return xxHash64.ComputeHash(stream); } [Benchmark] public async Task Hash64_StreamAsync() { + stream.Seek(0, SeekOrigin.Begin); return await xxHash64.ComputeHashAsync(stream); } } diff --git a/src/Standart.Hash.xxHash/xxHash32.Stream.cs b/src/Standart.Hash.xxHash/xxHash32.Stream.cs index 3f26550..af31af1 100644 --- a/src/Standart.Hash.xxHash/xxHash32.Stream.cs +++ b/src/Standart.Hash.xxHash/xxHash32.Stream.cs @@ -1,14 +1,93 @@ namespace Standart.Hash.xxHash { + using System; using System.Buffers; using System.IO; using System.Threading.Tasks; public static partial class xxHash32 { - private const int min32 = 256; - private const int div16 = 0x7FFFFFF0; - + private static unsafe void Shift(byte[] data, int l, ref uint v1, ref uint v2, ref uint v3, ref uint v4) + { + fixed (byte* pData = &data[0]) + { + byte* ptr = pData; + byte* limit = ptr + l; + + do + { + v1 += *((uint*) ptr) * p2; + v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13 + v1 *= p1; + ptr += 4; + + v2 += *((uint*) ptr) * p2; + v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13 + v2 *= p1; + ptr += 4; + + v3 += *((uint*) ptr) * p2; + v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13 + v3 *= p1; + ptr += 4; + + v4 += *((uint*) ptr) * p2; + v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13 + v4 *= p1; + ptr += 4; + + } while (ptr < limit); + } + } + + private static unsafe uint Final(byte[] data, int l, ref uint v1, ref uint v2, ref uint v3, ref uint v4, long length, uint seed) + { + fixed (byte* pData = &data[0]) + { + byte* ptr = pData; + byte* end = pData + l; + uint h32; + + if (length >= 16) + { + h32 = ((v1 << 1) | (v1 >> (32 - 1))) + // rotl 1 + ((v2 << 7) | (v2 >> (32 - 7))) + // rotl 7 + ((v3 << 12) | (v3 >> (32 - 12))) + // rotl 12 + ((v4 << 18) | (v4 >> (32 - 18))); // rotl 18 + } + else + { + h32 = seed + p5; + } + + h32 += (uint) length; + + // finalize + while (ptr <= end - 4) + { + h32 += *((uint*)ptr) * p3; + h32 = ((h32 << 17) | (h32 >> (32 - 17))) * p4; // (rotl 17) * p4 + ptr += 4; + } + + while (ptr < end) + { + h32 += *((byte*)ptr) * p5; + h32 = ((h32 << 11) | (h32 >> (32 - 11))) * p1; // (rotl 11) * p1 + ptr += 1; + } + + // avalanche + h32 ^= h32 >> 15; + h32 *= p2; + h32 ^= h32 >> 13; + h32 *= p3; + h32 ^= h32 >> 16; + + return h32; + } + } + /// /// Compute xxHash for the stream /// @@ -18,163 +97,52 @@ /// The hash public static uint ComputeHash(Stream stream, int bufferSize = 4096, uint seed = 0) { - // Go to the beginning of the stream - stream.Seek(0, SeekOrigin.Begin); - - // Get length of the stream - long length = stream.Length; - - // The buffer size can't be less than 256 bytes - if (bufferSize < min32) bufferSize = min32; - else bufferSize &= div16; - - // Calculate the number of chunks and the remain - int chunks = (int) length / bufferSize; - int remain = (int) length % bufferSize; - int offset = bufferSize; - - // Calculate the offset - if (remain != 0) chunks++; - if (remain != 0 && remain < 16) offset -= 16; - // Optimizing memory allocation - byte[] buffer = ArrayPool.Shared.Rent(bufferSize); + byte[] buffer = ArrayPool.Shared.Rent(bufferSize + 16); + + int readBytes; + int offset = 0; + long length = 0; + + // Prepare the seed vector + uint v1 = seed + p1 + p2; + uint v2 = seed + p2; + uint v3 = seed + 0; + uint v4 = seed - p1; try { - // Prepare the seed vector - uint v1 = seed + p1 + p2; - uint v2 = seed + p2; - uint v3 = seed + 0; - uint v4 = seed - p1; - - // Process chunks - // Skip the last chunk. It will processed a little bit later - for (int i = 2; i <= chunks; i++) - { - // Change bufferSize for the last read - if (i == chunks) bufferSize = offset; - - // Read the next chunk - stream.Read(buffer, 0, bufferSize); - - unsafe - { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + bufferSize; - - do - { - v1 += *((uint*)ptr) * p2; - v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13 - v1 *= p1; - ptr += 4; - - v2 += *((uint*)ptr) * p2; - v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13 - v2 *= p1; - ptr += 4; - - v3 += *((uint*)ptr) * p2; - v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13 - v3 *= p1; - ptr += 4; - - v4 += *((uint*)ptr) * p2; - v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13 - v4 *= p1; - ptr += 4; - - } while (ptr < end); - } - } - } - - // Read the last chunk - offset = stream.Read(buffer, 0, bufferSize); - - // Process the last chunk - unsafe + // Read flow of bytes + while ((readBytes = stream.Read(buffer, offset, bufferSize)) > 0) { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + offset; - uint h32; - - if (length >= 16) - { - byte* limit = end - 16; - - do - { - v1 += *((uint*) ptr) * p2; - v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13 - v1 *= p1; - ptr += 4; - - v2 += *((uint*) ptr) * p2; - v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13 - v2 *= p1; - ptr += 4; - - v3 += *((uint*) ptr) * p2; - v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13 - v3 *= p1; - ptr += 4; - - v4 += *((uint*) ptr) * p2; - v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13 - v4 *= p1; - ptr += 4; - - } while (ptr <= limit); - - h32 = ((v1 << 1) | (v1 >> (32 - 1))) + // rotl 1 - ((v2 << 7) | (v2 >> (32 - 7))) + // rotl 7 - ((v3 << 12) | (v3 >> (32 - 12))) + // rotl 12 - ((v4 << 18) | (v4 >> (32 - 18))); // rotl 18 - } - else - { - h32 = seed + p5; - } - - h32 += (uint) length; - - while (ptr <= end - 4) - { - h32 += *((uint*) ptr) * p3; - h32 = ((h32 << 17) | (h32 >> (32 - 17))) * p4; // (rotl 17) * p4 - ptr += 4; - } - - while (ptr < end) - { - h32 += *((byte*) ptr) * p5; - h32 = ((h32 << 11) | (h32 >> (32 - 11))) * p1; // (rotl 11) * p1 - ptr += 1; - } - - h32 ^= h32 >> 15; - h32 *= p2; - h32 ^= h32 >> 13; - h32 *= p3; - h32 ^= h32 >> 16; - - return h32; - } + length = length + readBytes; + offset = offset + readBytes; + + if (offset < 16) continue; + + int r = offset % 16; // remain + int l = offset - r; // length + + // Process the next chunk + Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); + + // Put remaining bytes to buffer + Array.Copy(buffer, l, buffer, 0, r); + offset = r; } + + // Process the final chunk + uint h32 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); + + return h32; } - finally + finally { // Free memory ArrayPool.Shared.Return(buffer); } } - + /// /// Compute xxHash for the async stream /// @@ -184,155 +152,44 @@ /// The hash public static async Task ComputeHashAsync(Stream stream, int bufferSize = 4096, uint seed = 0) { - // Go to the beginning of the stream - stream.Seek(0, SeekOrigin.Begin); - - // Get length of the stream - long length = stream.Length; - - // The buffer size can't be less than 256 bytes - if (bufferSize < min32) bufferSize = min32; - else bufferSize &= div16; - - // Calculate the number of chunks and the remain - int chunks = (int) length / bufferSize; - int remain = (int) length % bufferSize; - int offset = bufferSize; - - // Calculate the offset - if (remain != 0) chunks++; - if (remain != 0 && remain < 16) offset -= 16; - // Optimizing memory allocation - byte[] buffer = ArrayPool.Shared.Rent(bufferSize); + byte[] buffer = ArrayPool.Shared.Rent(bufferSize + 16); + + int readBytes; + int offset = 0; + long length = 0; + + // Prepare the seed vector + uint v1 = seed + p1 + p2; + uint v2 = seed + p2; + uint v3 = seed + 0; + uint v4 = seed - p1; try { - // Prepare the seed vector - uint v1 = seed + p1 + p2; - uint v2 = seed + p2; - uint v3 = seed + 0; - uint v4 = seed - p1; - - // Process chunks - // Skip the last chunk. It will processed a little bit later - for (int i = 2; i <= chunks; i++) - { - // Change bufferSize for the last read - if (i == chunks) bufferSize = offset; - - // Read the next chunk - await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);; - - unsafe - { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + bufferSize; - - do - { - v1 += *((uint*)ptr) * p2; - v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13 - v1 *= p1; - ptr += 4; - - v2 += *((uint*)ptr) * p2; - v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13 - v2 *= p1; - ptr += 4; - - v3 += *((uint*)ptr) * p2; - v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13 - v3 *= p1; - ptr += 4; - - v4 += *((uint*)ptr) * p2; - v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13 - v4 *= p1; - ptr += 4; - - } while (ptr < end); - } - } - } - - // Read the last chunk - offset = await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false); - - // Process the last chunk - unsafe + // Read flow of bytes + while ((readBytes = await stream.ReadAsync(buffer, offset, bufferSize)) > 0) { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + offset; - uint h32; - - if (length >= 16) - { - byte* limit = end - 16; - - do - { - v1 += *((uint*) ptr) * p2; - v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13 - v1 *= p1; - ptr += 4; - - v2 += *((uint*) ptr) * p2; - v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13 - v2 *= p1; - ptr += 4; - - v3 += *((uint*) ptr) * p2; - v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13 - v3 *= p1; - ptr += 4; - - v4 += *((uint*) ptr) * p2; - v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13 - v4 *= p1; - ptr += 4; - - } while (ptr <= limit); - - h32 = ((v1 << 1) | (v1 >> (32 - 1))) + // rotl 1 - ((v2 << 7) | (v2 >> (32 - 7))) + // rotl 7 - ((v3 << 12) | (v3 >> (32 - 12))) + // rotl 12 - ((v4 << 18) | (v4 >> (32 - 18))); // rotl 18 - } - else - { - h32 = seed + p5; - } - - h32 += (uint) length; - - while (ptr <= end - 4) - { - h32 += *((uint*) ptr) * p3; - h32 = ((h32 << 17) | (h32 >> (32 - 17))) * p4; // (rotl 17) * p4 - ptr += 4; - } - - while (ptr < end) - { - h32 += *((byte*) ptr) * p5; - h32 = ((h32 << 11) | (h32 >> (32 - 11))) * p1; // (rotl 11) * p1 - ptr += 1; - } - - h32 ^= h32 >> 15; - h32 *= p2; - h32 ^= h32 >> 13; - h32 *= p3; - h32 ^= h32 >> 16; - - return h32; - } + length = length + readBytes; + offset = offset + readBytes; + + if (offset < 16) continue; + + int r = offset % 16; // remain + int l = offset - r; // length + + // Process the next chunk + Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); + + // Put remaining bytes to buffer + Array.Copy(buffer, l, buffer, 0, r); + offset = r; } + + // Process the final chunk + uint h32 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); + + return h32; } finally { diff --git a/src/Standart.Hash.xxHash/xxHash64.Stream.cs b/src/Standart.Hash.xxHash/xxHash64.Stream.cs index 2020dda..fc1adad 100644 --- a/src/Standart.Hash.xxHash/xxHash64.Stream.cs +++ b/src/Standart.Hash.xxHash/xxHash64.Stream.cs @@ -1,14 +1,133 @@ namespace Standart.Hash.xxHash { + using System; using System.Buffers; using System.IO; using System.Threading.Tasks; - public static partial class xxHash64 - { - private const int min64 = 1024; - private const int div32 = 0x7FFFFFE0; + public static partial class xxHash64 + { + private static unsafe void Shift(byte[] data, int l, ref ulong v1, ref ulong v2, ref ulong v3, ref ulong v4) + { + fixed (byte* pData = &data[0]) + { + byte* ptr = pData; + byte* limit = ptr + l; + + do + { + v1 += *((ulong*)ptr) * p2; + v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 + v1 *= p1; + ptr += 8; + + v2 += *((ulong*)ptr) * p2; + v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 + v2 *= p1; + ptr += 8; + + v3 += *((ulong*)ptr) * p2; + v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 + v3 *= p1; + ptr += 8; + + v4 += *((ulong*)ptr) * p2; + v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 + v4 *= p1; + ptr += 8; + + } while (ptr < limit); + } + } + + private static unsafe ulong Final(byte[] data, int l, ref ulong v1, ref ulong v2, ref ulong v3, ref ulong v4, long length, ulong seed) + { + fixed (byte* pData = &data[0]) + { + byte* ptr = pData; + byte* end = pData + l; + ulong h64; + + if (length >= 16) + { + h64 = ((v1 << 1) | (v1 >> (64 - 1))) + // rotl 1 + ((v2 << 7) | (v2 >> (64 - 7))) + // rotl 7 + ((v3 << 12) | (v3 >> (64 - 12))) + // rotl 12 + ((v4 << 18) | (v4 >> (64 - 18))); // rotl 18 + + // merge round + v1 *= p2; + v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 + v1 *= p1; + h64 ^= v1; + h64 = h64 * p1 + p4; + + // merge round + v2 *= p2; + v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 + v2 *= p1; + h64 ^= v2; + h64 = h64 * p1 + p4; + + // merge round + v3 *= p2; + v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 + v3 *= p1; + h64 ^= v3; + h64 = h64 * p1 + p4; + + // merge round + v4 *= p2; + v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 + v4 *= p1; + h64 ^= v4; + h64 = h64 * p1 + p4; + + } + else + { + h64 = seed + p5; + } + + h64 += (ulong) length; + + // finalize + while (ptr <= end - 8) + { + ulong t1 = *((ulong*)ptr) * p2; + t1 = (t1 << 31) | (t1 >> (64 - 31)); // rotl 31 + t1 *= p1; + h64 ^= t1; + h64 = ((h64 << 27) | (h64 >> (64 - 27))) * p1 + p4; // (rotl 27) * p1 + p4 + ptr += 8; + } + + if (ptr <= end - 4) + { + h64 ^= *((uint*)ptr) * p1; + h64 = ((h64 << 23) | (h64 >> (64 - 23))) * p2 + p3; // (rotl 27) * p2 + p3 + ptr += 4; + } + + while (ptr < end) + { + h64 ^= *((byte*)ptr) * p5; + h64 = ((h64 << 11) | (h64 >> (64 - 11))) * p1; // (rotl 11) * p1 + ptr += 1; + } + + // avalanche + h64 ^= h64 >> 33; + h64 *= p2; + h64 ^= h64 >> 29; + h64 *= p3; + h64 ^= h64 >> 32; + + return h64; + } + } + /// /// Compute xxHash for the stream /// @@ -18,201 +137,50 @@ /// The hash public static ulong ComputeHash(Stream stream, int bufferSize = 8192, ulong seed = 0) { - // Go to the beginning of the stream - stream.Seek(0, SeekOrigin.Begin); - - // Get length of the stream - long length = stream.Length; - - // The buffer can't be less than 1024 bytes - if (bufferSize < min64) bufferSize = min64; - else bufferSize &= div32; - - // Calculate the number of chunks and the remain - int chunks = (int) length / bufferSize; - int remain = (int) length % bufferSize; - int offset = bufferSize; - - // Calculate the offset - if (remain != 0) chunks++; - if (remain != 0 && remain < 32) offset -= 32; - // Optimizing memory allocation - byte[] buffer = ArrayPool.Shared.Rent(bufferSize); + byte[] buffer = ArrayPool.Shared.Rent(bufferSize + 32); + int readBytes; + int offset = 0; + long length = 0; + + // Prepare the seed vector + ulong v1 = seed + p1 + p2; + ulong v2 = seed + p2; + ulong v3 = seed + 0; + ulong v4 = seed - p1; + try { - // Prepare the seed vector - ulong v1 = seed + p1 + p2; - ulong v2 = seed + p2; - ulong v3 = seed + 0; - ulong v4 = seed - p1; - - // Process chunks - // Skip the last chunk. It will processed a little bit later - for (int i = 2; i <= chunks; i++) + // Read flow of bytes + while ((readBytes = stream.Read(buffer, offset, bufferSize)) > 0) { - // Change bufferSize for the last read - if (i == chunks) bufferSize = offset; - - // Read the next chunk - stream.Read(buffer, 0, bufferSize); - - unsafe - { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + bufferSize; - - do - { - v1 += *((ulong*) ptr) * p2; - v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 - v1 *= p1; - ptr += 8; - - v2 += *((ulong*) ptr) * p2; - v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 - v2 *= p1; - ptr += 8; - - v3 += *((ulong*) ptr) * p2; - v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 - v3 *= p1; - ptr += 8; - - v4 += *((ulong*) ptr) * p2; - v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 - v4 *= p1; - ptr += 8; - - } while (ptr < end); - } - } - } - - // Read the last chunk - offset = stream.Read(buffer, 0, bufferSize); - - // Process the last chunk - unsafe - { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + offset; - ulong h64; - - if (length >= 32) - { - byte* limit = end - 32; - - do - { - v1 += *((ulong*)ptr) * p2; - v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 - v1 *= p1; - ptr += 8; - - v2 += *((ulong*)ptr) * p2; - v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 - v2 *= p1; - ptr += 8; - - v3 += *((ulong*)ptr) * p2; - v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 - v3 *= p1; - ptr += 8; - - v4 += *((ulong*)ptr) * p2; - v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 - v4 *= p1; - ptr += 8; - - } while (ptr <= limit); - - h64 = ((v1 << 1) | (v1 >> (64 - 1))) + // rotl 1 - ((v2 << 7) | (v2 >> (64 - 7))) + // rotl 7 - ((v3 << 12) | (v3 >> (64 - 12))) + // rotl 12 - ((v4 << 18) | (v4 >> (64 - 18))); // rotl 18 - - // merge round - v1 *= p2; - v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 - v1 *= p1; - h64 ^= v1; - h64 = h64 * p1 + p4; - - // merge round - v2 *= p2; - v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 - v2 *= p1; - h64 ^= v2; - h64 = h64 * p1 + p4; - - // merge round - v3 *= p2; - v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 - v3 *= p1; - h64 ^= v3; - h64 = h64 * p1 + p4; - - // merge round - v4 *= p2; - v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 - v4 *= p1; - h64 ^= v4; - h64 = h64 * p1 + p4; - } - else - { - h64 = seed + p5; - } - - h64 += (ulong) length; - - // finalize - while (ptr <= end - 8) - { - ulong t1 = *((ulong*)ptr) * p2; - t1 = (t1 << 31) | (t1 >> (64 - 31)); // rotl 31 - t1 *= p1; - h64 ^= t1; - h64 = ((h64 << 27) | (h64 >> (64 - 27))) * p1 + p4; // (rotl 27) * p1 + p4 - ptr += 8; - } - - if (ptr <= end - 4) - { - h64 ^= *((uint*)ptr) * p1; - h64 = ((h64 << 23) | (h64 >> (64 - 23))) * p2 + p3; // (rotl 27) * p2 + p3 - ptr += 4; - } - - while (ptr < end) - { - h64 ^= *((byte*)ptr) * p5; - h64 = ((h64 << 11) | (h64 >> (64 - 11))) * p1; // (rotl 11) * p1 - ptr += 1; - } - - // avalanche - h64 ^= h64 >> 33; - h64 *= p2; - h64 ^= h64 >> 29; - h64 *= p3; - h64 ^= h64 >> 32; - - return h64; - } + length = length + readBytes; + offset = offset + readBytes; + + if (offset < 32) continue; + + int r = offset % 32; // remain + int l = offset - r; // length + + // Process the next chunk + Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); + + // Put remaining bytes to buffer + Array.Copy(buffer, l, buffer, 0, r); + offset = r; } + + // Process the final chunk + ulong h64 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); + + return h64; } finally { // Free memory ArrayPool.Shared.Return(buffer); - } + } } /// @@ -224,201 +192,50 @@ /// The hash public static async Task ComputeHashAsync(Stream stream, int bufferSize = 8192, ulong seed = 0) { - // Go to the beginning of the stream - stream.Seek(0, SeekOrigin.Begin); - - // Get length of the stream - long length = stream.Length; - - // The buffer can't be less than 1024 bytes - if (bufferSize < min64) bufferSize = min64; - else bufferSize &= div32; - - // Calculate the number of chunks and the remain - int chunks = (int) length / bufferSize; - int remain = (int) length % bufferSize; - int offset = bufferSize; - - // Calculate the offset - if (remain != 0) chunks++; - if (remain != 0 && remain < 32) offset -= 32; - // Optimizing memory allocation - byte[] buffer = ArrayPool.Shared.Rent(bufferSize); + byte[] buffer = ArrayPool.Shared.Rent(bufferSize + 32); + int readBytes; + int offset = 0; + long length = 0; + + // Prepare the seed vector + ulong v1 = seed + p1 + p2; + ulong v2 = seed + p2; + ulong v3 = seed + 0; + ulong v4 = seed - p1; + try { - // Prepare the seed vector - ulong v1 = seed + p1 + p2; - ulong v2 = seed + p2; - ulong v3 = seed + 0; - ulong v4 = seed - p1; - - // Process chunks - // Skip the last chunk. It will processed a little bit later - for (int i = 2; i <= chunks; i++) + // Read flow of bytes + while ((readBytes = await stream.ReadAsync(buffer, offset, bufferSize)) > 0) { - // Change bufferSize for the last read - if (i == chunks) bufferSize = offset; - - // Read the next chunk - await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);; - - unsafe - { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + bufferSize; - - do - { - v1 += *((ulong*) ptr) * p2; - v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 - v1 *= p1; - ptr += 8; - - v2 += *((ulong*) ptr) * p2; - v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 - v2 *= p1; - ptr += 8; - - v3 += *((ulong*) ptr) * p2; - v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 - v3 *= p1; - ptr += 8; - - v4 += *((ulong*) ptr) * p2; - v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 - v4 *= p1; - ptr += 8; - - } while (ptr < end); - } - } - } - - // Read the last chunk - offset = await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);; - - // Process the last chunk - unsafe - { - fixed (byte* pData = &buffer[0]) - { - byte* ptr = pData; - byte* end = pData + offset; - ulong h64; - - if (length >= 32) - { - byte* limit = end - 32; - - do - { - v1 += *((ulong*) ptr) * p2; - v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 - v1 *= p1; - ptr += 8; - - v2 += *((ulong*) ptr) * p2; - v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 - v2 *= p1; - ptr += 8; - - v3 += *((ulong*) ptr) * p2; - v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 - v3 *= p1; - ptr += 8; - - v4 += *((ulong*) ptr) * p2; - v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 - v4 *= p1; - ptr += 8; - - } while (ptr <= limit); - - h64 = ((v1 << 1) | (v1 >> (64 - 1))) + // rotl 1 - ((v2 << 7) | (v2 >> (64 - 7))) + // rotl 7 - ((v3 << 12) | (v3 >> (64 - 12))) + // rotl 12 - ((v4 << 18) | (v4 >> (64 - 18))); // rotl 18 - - // merge round - v1 *= p2; - v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31 - v1 *= p1; - h64 ^= v1; - h64 = h64 * p1 + p4; - - // merge round - v2 *= p2; - v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31 - v2 *= p1; - h64 ^= v2; - h64 = h64 * p1 + p4; - - // merge round - v3 *= p2; - v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31 - v3 *= p1; - h64 ^= v3; - h64 = h64 * p1 + p4; - - // merge round - v4 *= p2; - v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31 - v4 *= p1; - h64 ^= v4; - h64 = h64 * p1 + p4; - } - else - { - h64 = seed + p5; - } - - h64 += (ulong) length; - - // finalize - while (ptr <= end - 8) - { - ulong t1 = *((ulong*) ptr) * p2; - t1 = (t1 << 31) | (t1 >> (64 - 31)); // rotl 31 - t1 *= p1; - h64 ^= t1; - h64 = ((h64 << 27) | (h64 >> (64 - 27))) * p1 + p4; // (rotl 27) * p1 + p4 - ptr += 8; - } - - if (ptr <= end - 4) - { - h64 ^= *((uint*) ptr) * p1; - h64 = ((h64 << 23) | (h64 >> (64 - 23))) * p2 + p3; // (rotl 27) * p2 + p3 - ptr += 4; - } - - while (ptr < end) - { - h64 ^= *((byte*) ptr) * p5; - h64 = ((h64 << 11) | (h64 >> (64 - 11))) * p1; // (rotl 11) * p1 - ptr += 1; - } - - // avalanche - h64 ^= h64 >> 33; - h64 *= p2; - h64 ^= h64 >> 29; - h64 *= p3; - h64 ^= h64 >> 32; - - return h64; - } + length = length + readBytes; + offset = offset + readBytes; + + if (offset < 32) continue; + + int r = offset % 32; // remain + int l = offset - r; // length + + // Process the next chunk + Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); + + // Put remaining bytes to buffer + Array.Copy(buffer, l, buffer, 0, r); + offset = r; } + + // Process the final chunk + ulong h64 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed); + + return h64; } finally { // Free memory ArrayPool.Shared.Return(buffer); - } + } } } } \ No newline at end of file