From 4b019f5c48e1548b1ea045910db52bf592259080 Mon Sep 17 00:00:00 2001 From: Melnik Alexander Date: Thu, 27 Sep 2018 22:24:14 +0300 Subject: [PATCH] Added a fast copy operation for small buffers --- README.md | 2 +- nuget.props | 2 +- .../xxBufferBenchmark.cs | 41 ++++ src/Standart.Hash.xxHash.Test/xxBufferTest.cs | 39 +++ src/Standart.Hash.xxHash/xxBuffer.cs | 228 ++++++++++++++++++ src/Standart.Hash.xxHash/xxHash32.Stream.cs | 4 +- src/Standart.Hash.xxHash/xxHash64.Stream.cs | 4 +- 7 files changed, 314 insertions(+), 6 deletions(-) create mode 100644 src/Standart.Hash.xxHash.Perf/xxBufferBenchmark.cs create mode 100644 src/Standart.Hash.xxHash.Test/xxBufferTest.cs create mode 100644 src/Standart.Hash.xxHash/xxBuffer.cs diff --git a/README.md b/README.md index 7a9570f..9e7ac50 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ coverage - nuget + nuget platform diff --git a/nuget.props b/nuget.props index 148981f..cc75e4d 100644 --- a/nuget.props +++ b/nuget.props @@ -3,7 +3,7 @@ netstandard2.0 Standart.Hash.xxHash - 1.0.3 + 1.0.4 Standart.Hash.xxHash Standart.Hash.xxHash Alexander Melnik diff --git a/src/Standart.Hash.xxHash.Perf/xxBufferBenchmark.cs b/src/Standart.Hash.xxHash.Perf/xxBufferBenchmark.cs new file mode 100644 index 0000000..a25018c --- /dev/null +++ b/src/Standart.Hash.xxHash.Perf/xxBufferBenchmark.cs @@ -0,0 +1,41 @@ +namespace Standart.Hash.xxHash.Perf +{ + using System; + using BenchmarkDotNet.Attributes; + using BenchmarkDotNet.Attributes.Columns; + using BenchmarkDotNet.Attributes.Exporters; + + [RPlotExporter, RankColumn] + [MinColumn, MaxColumn] + [MemoryDiagnoser] + public class xxBufferBenchmark + { + private byte[] src; + private byte[] des; + + [GlobalSetup] + public void Setup() + { + src = new byte[32]; + des = new byte[32]; + } + + [Benchmark] + public void ArrayCopy() + { + Array.Copy(src, 0, des, 0, 32); + } + + [Benchmark] + public void BufferCopy() + { + Buffer.BlockCopy(src, 0, des, 0, 32); + } + + [Benchmark] + public void xxBufferCopy() + { + xxBuffer.BlockCopy(src, 0, des, 0, 32); + } + } +} \ No newline at end of file diff --git a/src/Standart.Hash.xxHash.Test/xxBufferTest.cs b/src/Standart.Hash.xxHash.Test/xxBufferTest.cs new file mode 100644 index 0000000..6ca6a42 --- /dev/null +++ b/src/Standart.Hash.xxHash.Test/xxBufferTest.cs @@ -0,0 +1,39 @@ +namespace Standart.Hash.xxHash.Test +{ + using System; + using Xunit; + + public class xxBufferTest + { + [Fact] + public void Copy_different_blocks() + { + // Arrange + byte[] src = new byte[100]; + byte[] dst = new byte[100]; + + int[] counts = new int[] + { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 64, 65, 66, 96, 99 + }; + + var rand = new Random(42); + rand.NextBytes(src); + + // Act, Assert + foreach (int count in counts) + { + xxBuffer.BlockCopy(src, 0, dst, 0, count); + + for (int i = 0; i < count; i++) + { + Assert.Equal(src[i], dst[i]); + } + } + } + } +} \ No newline at end of file diff --git a/src/Standart.Hash.xxHash/xxBuffer.cs b/src/Standart.Hash.xxHash/xxBuffer.cs new file mode 100644 index 0000000..fb65929 --- /dev/null +++ b/src/Standart.Hash.xxHash/xxBuffer.cs @@ -0,0 +1,228 @@ +namespace Standart.Hash.xxHash +{ + using System.Runtime.CompilerServices; + + public static class xxBuffer + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void BlockCopy(this byte[] src, int srcOffset, byte[] dst, int dstOffset, int count) + { + fixed (byte* ptrSrc = &src[srcOffset]) + fixed (byte* ptrDst = &dst[dstOffset]) + { + UnsafeBlockCopy(ptrSrc, ptrDst, count); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void UnsafeBlockCopy(byte* src, byte* dst, int count) + { + SMALLTABLE: + switch (count) + { + case 0: + return; + case 1: + *dst = *src; + return; + case 2: + *(short*)dst = *(short*)src; + return; + case 3: + *(short*)(dst + 0) = *(short*)(src + 0); + *(dst + 2) = *(src + 2); + return; + case 4: + *(int*)dst = *(int*)src; + return; + case 5: + *(int*)(dst + 0) = *(int*)(src + 0); + *(dst + 4) = *(src + 4); + return; + case 6: + *(int*)(dst + 0) = *(int*)(src + 0); + *(short*)(dst + 4) = *(short*)(src + 4); + return; + case 7: + *(int*)(dst + 0) = *(int*)(src + 0); + *(short*)(dst + 4) = *(short*)(src + 4); + *(dst + 6) = *(src + 6); + return; + case 8: + *(long*)dst = *(long*)src; + return; + case 9: + *(long*)(dst + 0) = *(long*)(src + 0); + *(dst + 8) = *(src + 8); + return; + case 10: + *(long*)(dst + 0) = *(long*)(src + 0); + *(short*)(dst + 8) = *(short*)(src + 8); + return; + case 11: + *(long*)(dst + 0) = *(long*)(src + 0); + *(short*)(dst + 8) = *(short*)(src + 8); + *(dst + 10) = *(src + 10); + return; + case 12: + *(long*)dst = *(long*)src; + *(int*)(dst + 8) = *(int*)(src + 8); + return; + case 13: + *(long*)(dst + 0) = *(long*)(src + 0); + *(int*)(dst + 8) = *(int*)(src + 8); + *(dst + 12) = *(src + 12); + return; + case 14: + *(long*)(dst + 0) = *(long*)(src + 0); + *(int*)(dst + 8) = *(int*)(src + 8); + *(short*)(dst + 12) = *(short*)(src + 12); + return; + case 15: + *(long*)(dst + 0) = *(long*)(src + 0); + *(int*)(dst + 8) = *(int*)(src + 8); + *(short*)(dst + 12) = *(short*)(src + 12); + *(dst + 14) = *(src + 14); + return; + case 16: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + return; + case 17: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(dst + 16) = *(src + 16); + return; + case 18: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(short*)(dst + 16) = *(short*)(src + 16); + return; + case 19: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(short*)(dst + 16) = *(short*)(src + 16); + *(dst + 18) = *(src + 18); + return; + case 20: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(int*)(dst + 16) = *(int*)(src + 16); + return; + + case 21: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(int*)(dst + 16) = *(int*)(src + 16); + *(dst + 20) = *(src + 20); + return; + case 22: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(int*)(dst + 16) = *(int*)(src + 16); + *(short*)(dst + 20) = *(short*)(src + 20); + return; + case 23: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(int*)(dst + 16) = *(int*)(src + 16); + *(short*)(dst + 20) = *(short*)(src + 20); + *(dst + 22) = *(src + 22); + return; + case 24: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + return; + case 25: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(dst + 24) = *(src + 24); + return; + case 26: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(short*)(dst + 24) = *(short*)(src + 24); + return; + case 27: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(short*)(dst + 24) = *(short*)(src + 24); + *(dst + 26) = *(src + 26); + return; + case 28: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(int*)(dst + 24) = *(int*)(src + 24); + return; + case 29: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(int*)(dst + 24) = *(int*)(src + 24); + *(dst + 28) = *(src + 28); + return; + case 30: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(int*)(dst + 24) = *(int*)(src + 24); + *(short*)(dst + 28) = *(short*)(src + 28); + return; + case 31: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(int*)(dst + 24) = *(int*)(src + 24); + *(short*)(dst + 28) = *(short*)(src + 28); + *(dst + 30) = *(src + 30); + return; + case 32: + *(long*)dst = *(long*)src; + *(long*)(dst + 8) = *(long*)(src + 8); + *(long*)(dst + 16) = *(long*)(src + 16); + *(long*)(dst + 24) = *(long*)(src + 24); + return; + default: + break; + } + + long* lpSrc = (long*)src; + long* ldSrc = (long*)dst; + while (count >= 64) + { + *(ldSrc + 0) = *(lpSrc + 0); + *(ldSrc + 1) = *(lpSrc + 1); + *(ldSrc + 2) = *(lpSrc + 2); + *(ldSrc + 3) = *(lpSrc + 3); + *(ldSrc + 4) = *(lpSrc + 4); + *(ldSrc + 5) = *(lpSrc + 5); + *(ldSrc + 6) = *(lpSrc + 6); + *(ldSrc + 7) = *(lpSrc + 7); + if (count == 64) + return; + count -= 64; + lpSrc += 8; + ldSrc += 8; + } + if (count > 32) + { + *(ldSrc + 0) = *(lpSrc + 0); + *(ldSrc + 1) = *(lpSrc + 1); + *(ldSrc + 2) = *(lpSrc + 2); + *(ldSrc + 3) = *(lpSrc + 3); + count -= 32; + lpSrc += 4; + ldSrc += 4; + } + + src = (byte*)lpSrc; + dst = (byte*)ldSrc; + goto SMALLTABLE; + } + } +} \ No newline at end of file diff --git a/src/Standart.Hash.xxHash/xxHash32.Stream.cs b/src/Standart.Hash.xxHash/xxHash32.Stream.cs index ae2a85a..77e1ef3 100644 --- a/src/Standart.Hash.xxHash/xxHash32.Stream.cs +++ b/src/Standart.Hash.xxHash/xxHash32.Stream.cs @@ -131,7 +131,7 @@ Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); // Put remaining bytes to buffer - Array.Copy(buffer, l, buffer, 0, r); + xxBuffer.BlockCopy(buffer, l, buffer, 0, r); offset = r; } @@ -186,7 +186,7 @@ Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); // Put remaining bytes to buffer - Array.Copy(buffer, l, buffer, 0, r); + xxBuffer.BlockCopy(buffer, l, buffer, 0, r); offset = r; } diff --git a/src/Standart.Hash.xxHash/xxHash64.Stream.cs b/src/Standart.Hash.xxHash/xxHash64.Stream.cs index 4a25209..0cf3b88 100644 --- a/src/Standart.Hash.xxHash/xxHash64.Stream.cs +++ b/src/Standart.Hash.xxHash/xxHash64.Stream.cs @@ -170,7 +170,7 @@ Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); // Put remaining bytes to buffer - Array.Copy(buffer, l, buffer, 0, r); + xxBuffer.BlockCopy(buffer, l, buffer, 0, r); offset = r; } @@ -225,7 +225,7 @@ Shift(buffer, l, ref v1, ref v2, ref v3, ref v4); // Put remaining bytes to buffer - Array.Copy(buffer, l, buffer, 0, r); + xxBuffer.BlockCopy(buffer, l, buffer, 0, r); offset = r; }