Fix stream api

This commit is contained in:
Melnik Alexander 2018-09-27 18:56:49 +03:00
parent f5d67a126d
commit 8a489d5a37
4 changed files with 342 additions and 664 deletions

View File

@ -3,7 +3,7 @@
<PropertyGroup>
<TargetFrameworks>netstandard2.0</TargetFrameworks>
<PackageId>Standart.Hash.xxHash</PackageId>
<VersionPrefix>1.0.2</VersionPrefix>
<VersionPrefix>1.0.3</VersionPrefix>
<AssemblyName>Standart.Hash.xxHash</AssemblyName>
<AssemblyTitle>Standart.Hash.xxHash</AssemblyTitle>
<Authors>Alexander Melnik</Authors>

View File

@ -40,12 +40,14 @@
[Benchmark]
public uint Hash32_Stream()
{
stream.Seek(0, SeekOrigin.Begin);
return xxHash32.ComputeHash(stream);
}
[Benchmark]
public async Task<uint> Hash32_StreamAsync()
{
stream.Seek(0, SeekOrigin.Begin);
return await xxHash32.ComputeHashAsync(stream);
}
@ -58,12 +60,14 @@
[Benchmark]
public ulong Hash64_Stream()
{
stream.Seek(0, SeekOrigin.Begin);
return xxHash64.ComputeHash(stream);
}
[Benchmark]
public async Task<ulong> Hash64_StreamAsync()
{
stream.Seek(0, SeekOrigin.Begin);
return await xxHash64.ComputeHashAsync(stream);
}
}

View File

@ -1,69 +1,18 @@
namespace Standart.Hash.xxHash
{
using System;
using System.Buffers;
using System.IO;
using System.Threading.Tasks;
public static partial class xxHash32
{
private const int min32 = 256;
private const int div16 = 0x7FFFFFF0;
/// <summary>
/// Compute xxHash for the stream
/// </summary>
/// <param name="stream">The stream of data</param>
/// <param name="bufferSize">The buffer size</param>
/// <param name="seed">The seed number</param>
/// <returns>The hash</returns>
public static uint ComputeHash(Stream stream, int bufferSize = 4096, uint seed = 0)
private static unsafe void Shift(byte[] data, int l, ref uint v1, ref uint v2, ref uint v3, ref uint v4)
{
// Go to the beginning of the stream
stream.Seek(0, SeekOrigin.Begin);
// Get length of the stream
long length = stream.Length;
// The buffer size can't be less than 256 bytes
if (bufferSize < min32) bufferSize = min32;
else bufferSize &= div16;
// Calculate the number of chunks and the remain
int chunks = (int) length / bufferSize;
int remain = (int) length % bufferSize;
int offset = bufferSize;
// Calculate the offset
if (remain != 0) chunks++;
if (remain != 0 && remain < 16) offset -= 16;
// Optimizing memory allocation
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
try
{
// Prepare the seed vector
uint v1 = seed + p1 + p2;
uint v2 = seed + p2;
uint v3 = seed + 0;
uint v4 = seed - p1;
// Process chunks
// Skip the last chunk. It will processed a little bit later
for (int i = 2; i <= chunks; i++)
{
// Change bufferSize for the last read
if (i == chunks) bufferSize = offset;
// Read the next chunk
stream.Read(buffer, 0, bufferSize);
unsafe
{
fixed (byte* pData = &buffer[0])
fixed (byte* pData = &data[0])
{
byte* ptr = pData;
byte* end = pData + bufferSize;
byte* limit = ptr + l;
do
{
@ -87,51 +36,20 @@
v4 *= p1;
ptr += 4;
} while (ptr < end);
}
} while (ptr < limit);
}
}
// Read the last chunk
offset = stream.Read(buffer, 0, bufferSize);
// Process the last chunk
unsafe
private static unsafe uint Final(byte[] data, int l, ref uint v1, ref uint v2, ref uint v3, ref uint v4, long length, uint seed)
{
fixed (byte* pData = &buffer[0])
fixed (byte* pData = &data[0])
{
byte* ptr = pData;
byte* end = pData + offset;
byte* end = pData + l;
uint h32;
if (length >= 16)
{
byte* limit = end - 16;
do
{
v1 += *((uint*) ptr) * p2;
v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13
v1 *= p1;
ptr += 4;
v2 += *((uint*) ptr) * p2;
v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13
v2 *= p1;
ptr += 4;
v3 += *((uint*) ptr) * p2;
v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13
v3 *= p1;
ptr += 4;
v4 += *((uint*) ptr) * p2;
v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13
v4 *= p1;
ptr += 4;
} while (ptr <= limit);
h32 = ((v1 << 1) | (v1 >> (32 - 1))) + // rotl 1
((v2 << 7) | (v2 >> (32 - 7))) + // rotl 7
((v3 << 12) | (v3 >> (32 - 12))) + // rotl 12
@ -144,6 +62,7 @@
h32 += (uint) length;
// finalize
while (ptr <= end - 4)
{
h32 += *((uint*)ptr) * p3;
@ -158,6 +77,7 @@
ptr += 1;
}
// avalanche
h32 ^= h32 >> 15;
h32 *= p2;
h32 ^= h32 >> 13;
@ -167,6 +87,54 @@
return h32;
}
}
/// <summary>
/// Compute xxHash for the stream
/// </summary>
/// <param name="stream">The stream of data</param>
/// <param name="bufferSize">The buffer size</param>
/// <param name="seed">The seed number</param>
/// <returns>The hash</returns>
public static uint ComputeHash(Stream stream, int bufferSize = 4096, uint seed = 0)
{
// Optimizing memory allocation
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize + 16);
int readBytes;
int offset = 0;
long length = 0;
// Prepare the seed vector
uint v1 = seed + p1 + p2;
uint v2 = seed + p2;
uint v3 = seed + 0;
uint v4 = seed - p1;
try
{
// Read flow of bytes
while ((readBytes = stream.Read(buffer, offset, bufferSize)) > 0)
{
length = length + readBytes;
offset = offset + readBytes;
if (offset < 16) continue;
int r = offset % 16; // remain
int l = offset - r; // length
// Process the next chunk
Shift(buffer, l, ref v1, ref v2, ref v3, ref v4);
// Put remaining bytes to buffer
Array.Copy(buffer, l, buffer, 0, r);
offset = r;
}
// Process the final chunk
uint h32 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed);
return h32;
}
finally
{
@ -184,156 +152,45 @@
/// <returns>The hash</returns>
public static async Task<uint> ComputeHashAsync(Stream stream, int bufferSize = 4096, uint seed = 0)
{
// Go to the beginning of the stream
stream.Seek(0, SeekOrigin.Begin);
// Get length of the stream
long length = stream.Length;
// The buffer size can't be less than 256 bytes
if (bufferSize < min32) bufferSize = min32;
else bufferSize &= div16;
// Calculate the number of chunks and the remain
int chunks = (int) length / bufferSize;
int remain = (int) length % bufferSize;
int offset = bufferSize;
// Calculate the offset
if (remain != 0) chunks++;
if (remain != 0 && remain < 16) offset -= 16;
// Optimizing memory allocation
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize + 16);
int readBytes;
int offset = 0;
long length = 0;
try
{
// Prepare the seed vector
uint v1 = seed + p1 + p2;
uint v2 = seed + p2;
uint v3 = seed + 0;
uint v4 = seed - p1;
// Process chunks
// Skip the last chunk. It will processed a little bit later
for (int i = 2; i <= chunks; i++)
try
{
// Change bufferSize for the last read
if (i == chunks) bufferSize = offset;
// Read the next chunk
await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);;
unsafe
// Read flow of bytes
while ((readBytes = await stream.ReadAsync(buffer, offset, bufferSize)) > 0)
{
fixed (byte* pData = &buffer[0])
{
byte* ptr = pData;
byte* end = pData + bufferSize;
length = length + readBytes;
offset = offset + readBytes;
do
{
v1 += *((uint*)ptr) * p2;
v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13
v1 *= p1;
ptr += 4;
if (offset < 16) continue;
v2 += *((uint*)ptr) * p2;
v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13
v2 *= p1;
ptr += 4;
int r = offset % 16; // remain
int l = offset - r; // length
v3 += *((uint*)ptr) * p2;
v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13
v3 *= p1;
ptr += 4;
// Process the next chunk
Shift(buffer, l, ref v1, ref v2, ref v3, ref v4);
v4 += *((uint*)ptr) * p2;
v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13
v4 *= p1;
ptr += 4;
} while (ptr < end);
}
}
// Put remaining bytes to buffer
Array.Copy(buffer, l, buffer, 0, r);
offset = r;
}
// Read the last chunk
offset = await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);
// Process the last chunk
unsafe
{
fixed (byte* pData = &buffer[0])
{
byte* ptr = pData;
byte* end = pData + offset;
uint h32;
if (length >= 16)
{
byte* limit = end - 16;
do
{
v1 += *((uint*) ptr) * p2;
v1 = (v1 << 13) | (v1 >> (32 - 13)); // rotl 13
v1 *= p1;
ptr += 4;
v2 += *((uint*) ptr) * p2;
v2 = (v2 << 13) | (v2 >> (32 - 13)); // rotl 13
v2 *= p1;
ptr += 4;
v3 += *((uint*) ptr) * p2;
v3 = (v3 << 13) | (v3 >> (32 - 13)); // rotl 13
v3 *= p1;
ptr += 4;
v4 += *((uint*) ptr) * p2;
v4 = (v4 << 13) | (v4 >> (32 - 13)); // rotl 13
v4 *= p1;
ptr += 4;
} while (ptr <= limit);
h32 = ((v1 << 1) | (v1 >> (32 - 1))) + // rotl 1
((v2 << 7) | (v2 >> (32 - 7))) + // rotl 7
((v3 << 12) | (v3 >> (32 - 12))) + // rotl 12
((v4 << 18) | (v4 >> (32 - 18))); // rotl 18
}
else
{
h32 = seed + p5;
}
h32 += (uint) length;
while (ptr <= end - 4)
{
h32 += *((uint*) ptr) * p3;
h32 = ((h32 << 17) | (h32 >> (32 - 17))) * p4; // (rotl 17) * p4
ptr += 4;
}
while (ptr < end)
{
h32 += *((byte*) ptr) * p5;
h32 = ((h32 << 11) | (h32 >> (32 - 11))) * p1; // (rotl 11) * p1
ptr += 1;
}
h32 ^= h32 >> 15;
h32 *= p2;
h32 ^= h32 >> 13;
h32 *= p3;
h32 ^= h32 >> 16;
// Process the final chunk
uint h32 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed);
return h32;
}
}
}
finally
{
// Free memory

View File

@ -1,69 +1,19 @@
namespace Standart.Hash.xxHash
{
using System;
using System.Buffers;
using System.IO;
using System.Threading.Tasks;
public static partial class xxHash64
{
private const int min64 = 1024;
private const int div32 = 0x7FFFFFE0;
/// <summary>
/// Compute xxHash for the stream
/// </summary>
/// <param name="stream">The stream of data</param>
/// <param name="bufferSize">The buffer size</param>
/// <param name="seed">The seed number</param>
/// <returns>The hash</returns>
public static ulong ComputeHash(Stream stream, int bufferSize = 8192, ulong seed = 0)
private static unsafe void Shift(byte[] data, int l, ref ulong v1, ref ulong v2, ref ulong v3, ref ulong v4)
{
// Go to the beginning of the stream
stream.Seek(0, SeekOrigin.Begin);
// Get length of the stream
long length = stream.Length;
// The buffer can't be less than 1024 bytes
if (bufferSize < min64) bufferSize = min64;
else bufferSize &= div32;
// Calculate the number of chunks and the remain
int chunks = (int) length / bufferSize;
int remain = (int) length % bufferSize;
int offset = bufferSize;
// Calculate the offset
if (remain != 0) chunks++;
if (remain != 0 && remain < 32) offset -= 32;
// Optimizing memory allocation
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
try
{
// Prepare the seed vector
ulong v1 = seed + p1 + p2;
ulong v2 = seed + p2;
ulong v3 = seed + 0;
ulong v4 = seed - p1;
// Process chunks
// Skip the last chunk. It will processed a little bit later
for (int i = 2; i <= chunks; i++)
{
// Change bufferSize for the last read
if (i == chunks) bufferSize = offset;
// Read the next chunk
stream.Read(buffer, 0, bufferSize);
unsafe
{
fixed (byte* pData = &buffer[0])
fixed (byte* pData = &data[0])
{
byte* ptr = pData;
byte* end = pData + bufferSize;
byte* limit = ptr + l;
do
{
@ -87,51 +37,20 @@
v4 *= p1;
ptr += 8;
} while (ptr < end);
}
} while (ptr < limit);
}
}
// Read the last chunk
offset = stream.Read(buffer, 0, bufferSize);
// Process the last chunk
unsafe
private static unsafe ulong Final(byte[] data, int l, ref ulong v1, ref ulong v2, ref ulong v3, ref ulong v4, long length, ulong seed)
{
fixed (byte* pData = &buffer[0])
fixed (byte* pData = &data[0])
{
byte* ptr = pData;
byte* end = pData + offset;
byte* end = pData + l;
ulong h64;
if (length >= 32)
if (length >= 16)
{
byte* limit = end - 32;
do
{
v1 += *((ulong*)ptr) * p2;
v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31
v1 *= p1;
ptr += 8;
v2 += *((ulong*)ptr) * p2;
v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31
v2 *= p1;
ptr += 8;
v3 += *((ulong*)ptr) * p2;
v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31
v3 *= p1;
ptr += 8;
v4 += *((ulong*)ptr) * p2;
v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31
v4 *= p1;
ptr += 8;
} while (ptr <= limit);
h64 = ((v1 << 1) | (v1 >> (64 - 1))) + // rotl 1
((v2 << 7) | (v2 >> (64 - 7))) + // rotl 7
((v3 << 12) | (v3 >> (64 - 12))) + // rotl 12
@ -164,6 +83,7 @@
v4 *= p1;
h64 ^= v4;
h64 = h64 * p1 + p4;
}
else
{
@ -207,6 +127,54 @@
return h64;
}
}
/// <summary>
/// Compute xxHash for the stream
/// </summary>
/// <param name="stream">The stream of data</param>
/// <param name="bufferSize">The buffer size</param>
/// <param name="seed">The seed number</param>
/// <returns>The hash</returns>
public static ulong ComputeHash(Stream stream, int bufferSize = 8192, ulong seed = 0)
{
// Optimizing memory allocation
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize + 32);
int readBytes;
int offset = 0;
long length = 0;
// Prepare the seed vector
ulong v1 = seed + p1 + p2;
ulong v2 = seed + p2;
ulong v3 = seed + 0;
ulong v4 = seed - p1;
try
{
// Read flow of bytes
while ((readBytes = stream.Read(buffer, offset, bufferSize)) > 0)
{
length = length + readBytes;
offset = offset + readBytes;
if (offset < 32) continue;
int r = offset % 32; // remain
int l = offset - r; // length
// Process the next chunk
Shift(buffer, l, ref v1, ref v2, ref v3, ref v4);
// Put remaining bytes to buffer
Array.Copy(buffer, l, buffer, 0, r);
offset = r;
}
// Process the final chunk
ulong h64 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed);
return h64;
}
finally
{
@ -224,196 +192,45 @@
/// <returns>The hash</returns>
public static async Task<ulong> ComputeHashAsync(Stream stream, int bufferSize = 8192, ulong seed = 0)
{
// Go to the beginning of the stream
stream.Seek(0, SeekOrigin.Begin);
// Get length of the stream
long length = stream.Length;
// The buffer can't be less than 1024 bytes
if (bufferSize < min64) bufferSize = min64;
else bufferSize &= div32;
// Calculate the number of chunks and the remain
int chunks = (int) length / bufferSize;
int remain = (int) length % bufferSize;
int offset = bufferSize;
// Calculate the offset
if (remain != 0) chunks++;
if (remain != 0 && remain < 32) offset -= 32;
// Optimizing memory allocation
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
byte[] buffer = ArrayPool<byte>.Shared.Rent(bufferSize + 32);
int readBytes;
int offset = 0;
long length = 0;
try
{
// Prepare the seed vector
ulong v1 = seed + p1 + p2;
ulong v2 = seed + p2;
ulong v3 = seed + 0;
ulong v4 = seed - p1;
// Process chunks
// Skip the last chunk. It will processed a little bit later
for (int i = 2; i <= chunks; i++)
try
{
// Change bufferSize for the last read
if (i == chunks) bufferSize = offset;
// Read the next chunk
await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);;
unsafe
// Read flow of bytes
while ((readBytes = await stream.ReadAsync(buffer, offset, bufferSize)) > 0)
{
fixed (byte* pData = &buffer[0])
{
byte* ptr = pData;
byte* end = pData + bufferSize;
length = length + readBytes;
offset = offset + readBytes;
do
{
v1 += *((ulong*) ptr) * p2;
v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31
v1 *= p1;
ptr += 8;
if (offset < 32) continue;
v2 += *((ulong*) ptr) * p2;
v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31
v2 *= p1;
ptr += 8;
int r = offset % 32; // remain
int l = offset - r; // length
v3 += *((ulong*) ptr) * p2;
v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31
v3 *= p1;
ptr += 8;
// Process the next chunk
Shift(buffer, l, ref v1, ref v2, ref v3, ref v4);
v4 += *((ulong*) ptr) * p2;
v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31
v4 *= p1;
ptr += 8;
} while (ptr < end);
}
}
// Put remaining bytes to buffer
Array.Copy(buffer, l, buffer, 0, r);
offset = r;
}
// Read the last chunk
offset = await stream.ReadAsync(buffer, 0, bufferSize).ConfigureAwait(false);;
// Process the last chunk
unsafe
{
fixed (byte* pData = &buffer[0])
{
byte* ptr = pData;
byte* end = pData + offset;
ulong h64;
if (length >= 32)
{
byte* limit = end - 32;
do
{
v1 += *((ulong*) ptr) * p2;
v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31
v1 *= p1;
ptr += 8;
v2 += *((ulong*) ptr) * p2;
v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31
v2 *= p1;
ptr += 8;
v3 += *((ulong*) ptr) * p2;
v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31
v3 *= p1;
ptr += 8;
v4 += *((ulong*) ptr) * p2;
v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31
v4 *= p1;
ptr += 8;
} while (ptr <= limit);
h64 = ((v1 << 1) | (v1 >> (64 - 1))) + // rotl 1
((v2 << 7) | (v2 >> (64 - 7))) + // rotl 7
((v3 << 12) | (v3 >> (64 - 12))) + // rotl 12
((v4 << 18) | (v4 >> (64 - 18))); // rotl 18
// merge round
v1 *= p2;
v1 = (v1 << 31) | (v1 >> (64 - 31)); // rotl 31
v1 *= p1;
h64 ^= v1;
h64 = h64 * p1 + p4;
// merge round
v2 *= p2;
v2 = (v2 << 31) | (v2 >> (64 - 31)); // rotl 31
v2 *= p1;
h64 ^= v2;
h64 = h64 * p1 + p4;
// merge round
v3 *= p2;
v3 = (v3 << 31) | (v3 >> (64 - 31)); // rotl 31
v3 *= p1;
h64 ^= v3;
h64 = h64 * p1 + p4;
// merge round
v4 *= p2;
v4 = (v4 << 31) | (v4 >> (64 - 31)); // rotl 31
v4 *= p1;
h64 ^= v4;
h64 = h64 * p1 + p4;
}
else
{
h64 = seed + p5;
}
h64 += (ulong) length;
// finalize
while (ptr <= end - 8)
{
ulong t1 = *((ulong*) ptr) * p2;
t1 = (t1 << 31) | (t1 >> (64 - 31)); // rotl 31
t1 *= p1;
h64 ^= t1;
h64 = ((h64 << 27) | (h64 >> (64 - 27))) * p1 + p4; // (rotl 27) * p1 + p4
ptr += 8;
}
if (ptr <= end - 4)
{
h64 ^= *((uint*) ptr) * p1;
h64 = ((h64 << 23) | (h64 >> (64 - 23))) * p2 + p3; // (rotl 27) * p2 + p3
ptr += 4;
}
while (ptr < end)
{
h64 ^= *((byte*) ptr) * p5;
h64 = ((h64 << 11) | (h64 >> (64 - 11))) * p1; // (rotl 11) * p1
ptr += 1;
}
// avalanche
h64 ^= h64 >> 33;
h64 *= p2;
h64 ^= h64 >> 29;
h64 *= p3;
h64 ^= h64 >> 32;
// Process the final chunk
ulong h64 = Final(buffer, offset, ref v1, ref v2, ref v3, ref v4, length, seed);
return h64;
}
}
}
finally
{
// Free memory