diff --git a/X10D.Tests/src/Core/SpanTest.cs b/X10D.Tests/src/Core/SpanTest.cs new file mode 100644 index 0000000..489ed18 --- /dev/null +++ b/X10D.Tests/src/Core/SpanTest.cs @@ -0,0 +1,103 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using X10D.Collections; +using X10D.Core; + +namespace X10D.Tests.Core; + +[TestClass] +public class SpanTest +{ + [TestMethod] + public void Pack8Bit_Should_Pack_Correctly() + { + Span span = stackalloc bool[8] { true, true, false, false, true, true, false, false }; + Assert.AreEqual(0b00110011, span.PackByte()); + } + + [TestMethod] + public void Pack8Bit_Should_Pack_Correctly_Randomize() + { + var value = new Random().NextByte(); + + Span unpacks = stackalloc bool[8]; + + value.Unpack(unpacks); + + Assert.AreEqual(value, unpacks.PackByte()); + } + + [TestMethod] + public void Pack16Bit_Should_Pack_Correctly() + { + ReadOnlySpan span = stackalloc bool[16] { + false, false, true, false, true, false, true, true, + true, false, true, true, false, true, false, false, + }; + Assert.AreEqual(0b00101101_11010100, span.PackInt16()); + } + + [TestMethod] + public void Pack16Bit_Should_Pack_Correctly_Randomize() + { + var value = new Random().NextInt16(); + + Span unpacks = stackalloc bool[16]; + + value.Unpack(unpacks); + + Assert.AreEqual(value, unpacks.PackInt16()); + } + + [TestMethod] + public void Pack32Bit_Should_Pack_Correctly() + { + ReadOnlySpan span = stackalloc bool[] { + false, true, false, true, false, true, false, true, + true, false, true, false, true, false, true, false, + false, true, false, true, false, true, false, true, + true, false, true, false, true, false, true, false, + }; + Assert.AreEqual(0b01010101_10101010_01010101_10101010, span.PackInt32()); + } + + [TestMethod] + public void Pack32Bit_Should_Pack_Correctly_Randomize() + { + var value = new Random().Next(int.MinValue, int.MaxValue); + + Span unpacks = stackalloc bool[32]; + + value.Unpack(unpacks); + + Assert.AreEqual(value, unpacks.PackInt32()); + } + + [TestMethod] + public void Pack64Bit_Should_Pack_Correctly() + { + ReadOnlySpan span = stackalloc bool[] { + true, false, true, false, false, true, false, true, + false, false, true, true, false, true, false, false, + true, true, true, false, true, false, false, true, + false, true, false, false, true, false, false, false, + false, true, true, false, true, false, true, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, true, true, + true, false, true, false, true, false, true, false, + }; + Assert.AreEqual(0b01010101_11010110_01101001_11010110_00010010_10010111_00101100_10100101, span.PackInt64()); + } + + [TestMethod] + public void Pack64Bit_Should_Pack_Correctly_Randomize() + { + var rand = new Random(); + long value = ((long)rand.Next() << 32) | (long)rand.Next(); + + Span unpacks = stackalloc bool[64]; + + value.Unpack(unpacks); + + Assert.AreEqual(value, unpacks.PackInt64()); + } +} diff --git a/X10D/src/Core/IntrinsicExtensions.cs b/X10D/src/Core/IntrinsicExtensions.cs new file mode 100644 index 0000000..f7f0313 --- /dev/null +++ b/X10D/src/Core/IntrinsicExtensions.cs @@ -0,0 +1,103 @@ +#if NETCOREAPP3_0_OR_GREATER +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics.Arm; +using System.Diagnostics.Contracts; +using System.Runtime.CompilerServices; +using System.Numerics; + +namespace X10D.Core; + +/// +/// Extension methods for SIMD vectors, namely , and . +/// +public static class IntrinsicExtensions +{ + /// + /// Correcting of into standard boolean values. + /// + /// Vector of byte to correct. + /// Corrected boolean in form of of bytes. + /// This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector64 CorrectBoolean(this Vector64 vector) + { + if (AdvSimd.IsSupported) + { + // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). + var cmp = AdvSimd.CompareEqual(vector, Vector64.Zero); + var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64.Zero); + + return result; + } + + if (Sse.IsSupported) + { + throw new PlatformNotSupportedException("Cannot correct boolean of Vector64 on SSE intrinsic set."); + } + + throw new PlatformNotSupportedException("Unknown Intrinsic platform."); + } + + /// + /// Correcting of into standard boolean values. + /// + /// Vector of byte to correct. + /// Corrected boolean in form of of bytes. + /// This method will ensure that every values can only be either 0 to represent and 1 to represent . Values of 0 will be kept, and others will be mapped back to 1. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 CorrectBoolean(this Vector128 vector) + { + if (Sse2.IsSupported) + { + var cmp = Sse2.CompareEqual(vector, Vector128.Zero); + var result = Sse2.AndNot(cmp, Vector128.Create((byte)1)); + + return result; + } + else if (AdvSimd.IsSupported) + { + // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). + var cmp = AdvSimd.CompareEqual(vector, Vector128.Zero); + var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128.Zero); + + return result; + } + + throw new PlatformNotSupportedException("Unknown Intrinsic platform."); + } + + /// + /// Correcting of into standard boolean values. + /// + /// Vector of byte to correct. + /// Corrected boolean in form of of bytes. + /// This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector256 CorrectBoolean(this Vector256 vector) + { + if (Avx2.IsSupported) + { + var cmp = Avx2.CompareEqual(vector, Vector256.Zero); + var result = Avx2.AndNot(cmp, Vector256.Create((byte)1)); + + return result; + } + + if (AdvSimd.IsSupported) + { + throw new PlatformNotSupportedException("Cannot correct boolean of Vector256 on ARM intrinsic set."); + } + + throw new PlatformNotSupportedException("Unknown Intrinsic platform."); + } +} +#endif diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs new file mode 100644 index 0000000..3524088 --- /dev/null +++ b/X10D/src/Core/IntrinsicUtility.cs @@ -0,0 +1,222 @@ +#if NETCOREAPP3_0_OR_GREATER + +using System.Diagnostics.CodeAnalysis; +using System.Diagnostics.Contracts; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace X10D.Core; + +/// +/// Provides utility methods for SIMD vector that is currently missing on common hardware instruction set. +/// +public static class IntrinsicUtility +{ + /// + ///
Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
+ ///
Operation:
+ /// + /// dest[0] = lhs[0] * rhs[0]; + /// dest[1] = lhs[1] * rhs[1]; + /// + ///
+ /// Left vector. + /// Right vector. + /// + /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares. + [Pure] + [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) + { + if (Sse2.IsSupported) + { + // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers + + Vector128 ac = Sse2.Multiply(lhs.AsUInt32(), rhs.AsUInt32()); + Vector128 b = Sse2.ShiftRightLogical(lhs, 32).AsUInt32(); + Vector128 bc = Sse2.Multiply(b, rhs.AsUInt32()); + Vector128 d = Sse2.ShiftRightLogical(rhs, 32).AsUInt32(); + Vector128 ad = Sse2.Multiply(lhs.AsUInt32(), d); + Vector128 high = Sse2.Add(bc, ad); + high = Sse2.ShiftLeftLogical(high, 32); + + return Sse2.Add(high, ac); + } + else if (AdvSimd.IsSupported) + { + // https://stackoverflow.com/questions/60236627/facing-problem-in-implementing-multiplication-of-64-bit-variables-using-arm-neon + + // Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware) + var a = AdvSimd.ExtractNarrowingLower(lhs); + var b = AdvSimd.ExtractNarrowingLower(rhs); + + var mul = AdvSimd.Multiply(rhs.AsUInt32(), AdvSimd.ReverseElement32(lhs).AsUInt32()); + + return AdvSimd.MultiplyWideningLowerAndAdd(AdvSimd.ShiftLeftLogical(mul.AsUInt64(), 32), a, b); + } + + throw new PlatformNotSupportedException("Unsupported SIMD platform."); + } + + /// + ///
Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
+ ///
Operation:
+ /// + /// dest[0] = lhs[0] * rhs[0]; + /// dest[1] = lhs[1] * rhs[1]; + /// dest[2] = lhs[2] * rhs[2]; + /// dest[3] = lhs[3] * rhs[3]; + /// + ///
+ /// Left vector. + /// Right vector. + /// + /// API avaliable on AVX2 hardware. + [Pure] + [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) + { + if (Avx2.IsSupported) + { + // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers + + Vector256 ac = Avx2.Multiply(lhs.AsUInt32(), rhs.AsUInt32()); + Vector256 b = Avx2.ShiftRightLogical(lhs, 32).AsUInt32(); + Vector256 bc = Avx2.Multiply(b, rhs.AsUInt32()); + Vector256 d = Avx2.ShiftRightLogical(rhs, 32).AsUInt32(); + Vector256 ad = Avx2.Multiply(lhs.AsUInt32(), d); + Vector256 high = Avx2.Add(bc, ad); + high = Avx2.ShiftLeftLogical(high, 32); + + return Avx2.Add(high, ac); + } + + throw new PlatformNotSupportedException("Unsupported SIMD platform."); + } + + /// + /// Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer. + /// Operation: + /// + /// dest[0] = lhs[0] * rhs[0]; + /// dest[1] = lhs[1] * rhs[1]; + /// + /// + /// Left vector. + /// Right vector. + /// + /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) + { + return Multiply(lhs.AsUInt64(), rhs.AsUInt64()).AsInt64(); + } + + /// + ///
Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.
+ ///
Operation:
+ /// + /// dest[0] = lhs[0] * rhs[0]; + /// dest[1] = lhs[1] * rhs[1]; + /// dest[2] = lhs[2] * rhs[2]; + /// dest[3] = lhs[3] * rhs[3]; + /// + ///
+ /// Left vector. + /// Right vector. + /// + /// API avaliable on AVX2 hardware. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) + { + return Multiply(lhs.AsUInt64(), rhs.AsUInt64()).AsInt64(); + } + + /// + /// Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs. + /// Operation: + /// + /// dest[0] = lhs[0] | lhs[1]; + /// dest[1] = lhs[2] | lhs[3]; + /// dest[2] = rhs[0] | rhs[1]; + /// dest[3] = rhs[2] | rhs[3]; + /// + /// + /// Left vector. + /// Right vector. + /// + /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) + { + if (Sse.IsSupported) + { + var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00); + var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01); + + return Sse.Or(s1, s2); + } + else if (AdvSimd.Arm64.IsSupported) + { + // Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware). + var s1 = AdvSimd.Arm64.UnzipEven(lhs, rhs); + var s2 = AdvSimd.Arm64.UnzipOdd(lhs, rhs); + + return AdvSimd.Or(s1, s2); + } + + throw new PlatformNotSupportedException("Unsupported SIMD platform."); + } + + /// + /// Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs. + /// Operation: + /// + /// dest[0] = lhs[0] | lhs[1]; + /// dest[1] = lhs[2] | lhs[3]; + /// dest[2] = rhs[0] | rhs[1]; + /// dest[3] = rhs[2] | rhs[3]; + /// + /// + /// Left vector. + /// Right vector. + /// + /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) + { + return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsInt32(); + } + + /// + /// Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs. + /// Operation: + /// + /// dest[0] = lhs[0] | lhs[1]; + /// dest[1] = lhs[2] | lhs[3]; + /// dest[2] = rhs[0] | rhs[1]; + /// dest[3] = rhs[2] | rhs[3]; + /// + /// + /// Left vector. + /// Right vector. + /// + /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + [CLSCompliant(false)] + public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) + { + return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsUInt32(); + } +} + +#endif