diff --git a/X10D.Tests/src/Core/IntrinsicTests.cs b/X10D.Tests/src/Core/IntrinsicTests.cs index df5ce04..c1ea98f 100644 --- a/X10D.Tests/src/Core/IntrinsicTests.cs +++ b/X10D.Tests/src/Core/IntrinsicTests.cs @@ -78,6 +78,124 @@ public class IntrinsicTests Assert.AreEqual(expectedResult, result); } + [TestMethod] + public void HorizontalOr_ShouldReturnCombinedVector_GivenInputVector128OfUInt32() + { + Vector128 left = Vector128.Create(1U, 2U, 3U, 4U); + Vector128 right = Vector128.Create(5U, 6U, 7U, 8U); + + Vector128 expected = Vector128.Create(3U, 7U, 7U, 15U); + Vector128 actual = IntrinsicUtility.HorizontalOr(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void HorizontalOrInternal_Sse_ShouldReturnCombinedVector_GivenInputVector128OfInt32() + { + Vector128 left = Vector128.Create(1, 2, 3, 4); + Vector128 right = Vector128.Create(5, 6, 7, 8); + + Vector128 expected = Vector128.Create(3, 7, 7, 15); + Vector128 actual = IntrinsicUtility.HorizontalOr_Sse(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void HorizontalOrInternal_Fallback_ShouldReturnCombinedVector_GivenInputVector128OfInt32() + { + Vector128 left = Vector128.Create(1, 2, 3, 4); + Vector128 right = Vector128.Create(5, 6, 7, 8); + + Vector128 expected = Vector128.Create(3, 7, 7, 15); + Vector128 actual = IntrinsicUtility.HorizontalOrInternal_Fallback(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void Multiply_ShouldReturnMultipliedVector_GivenInputVector128OfInt64() + { + Vector128 left = Vector128.Create(6L, 4L); + Vector128 right = Vector128.Create(2L, 3L); + + Vector128 expected = Vector128.Create(12L, 12L); + Vector128 actual = IntrinsicUtility.Multiply(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void MultiplyInternal_Sse2_ShouldReturnMultipliedVector_GivenInputVector128OfUInt64() + { + if (!Sse2.IsSupported) + { + return; + } + + Vector128 left = Vector128.Create(6UL, 4UL); + Vector128 right = Vector128.Create(2UL, 3UL); + + Vector128 expected = Vector128.Create(12UL, 12UL); + Vector128 actual = IntrinsicUtility.MultiplyInternal_Sse2(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void MultiplyInternal_Fallback_ShouldReturnMultipliedVector_GivenInputVector128OfUInt64() + { + Vector128 left = Vector128.Create(6UL, 4UL); + Vector128 right = Vector128.Create(2UL, 3UL); + + Vector128 expected = Vector128.Create(12UL, 12UL); + Vector128 actual = IntrinsicUtility.MultiplyInternal_Fallback(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void Multiply_ShouldReturnMultipliedVector_GivenInputVector256OfInt64() + { + Vector256 left = Vector256.Create(4L, 6L, 8L, 10L); + Vector256 right = Vector256.Create(2L, 3L, 4L, 5L); + + Vector256 expected = Vector256.Create(8L, 18L, 32L, 50L); + Vector256 actual = IntrinsicUtility.Multiply(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void MultiplyInternal_Avx2_ShouldReturnMultipliedVector_GivenInputVector256OfUInt64() + { + if (!Avx2.IsSupported) + { + return; + } + + Vector256 left = Vector256.Create(4UL, 6UL, 8UL, 10UL); + Vector256 right = Vector256.Create(2UL, 3UL, 4UL, 5UL); + + Vector256 expected = Vector256.Create(8UL, 18UL, 32UL, 50UL); + Vector256 actual = IntrinsicUtility.MultiplyInternal_Avx2(left, right); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void MultiplyInternal_Fallback_ShouldReturnMultipliedVector_GivenInputVector256OfUInt64() + { + Vector256 left = Vector256.Create(4UL, 6UL, 8UL, 10UL); + Vector256 right = Vector256.Create(2UL, 3UL, 4UL, 5UL); + + Vector256 expected = Vector256.Create(8UL, 18UL, 32UL, 50UL); + Vector256 actual = IntrinsicUtility.MultiplyInternal_Fallback(left, right); + + Assert.AreEqual(expected, actual); + } + [TestMethod] public void ReverseElementsInternal_Fallback_ShouldReturnExpectedVector128Result_GivenInputVector() { diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs index 005c735..2ea68ba 100644 --- a/X10D/src/Core/IntrinsicUtility.cs +++ b/X10D/src/Core/IntrinsicUtility.cs @@ -1,5 +1,6 @@ #if NETCOREAPP3_0_OR_GREATER +using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; @@ -22,48 +23,25 @@ public static class IntrinsicUtility /// /// Operation:
/// - /// dest[0] = lhs[0] * rhs[0]; - /// dest[1] = lhs[1] * rhs[1]; + /// dest[0] = left[0] * right[0]; + /// dest[1] = left[1] * right[1]; /// /// - /// Left vector. - /// Right vector. - /// - /// A of whose elements is 64-bit truncated product of lhs and rhs. - /// + /// Left vector. + /// Right vector. + /// The truncated product vector. [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) + [ExcludeFromCodeCoverage] + public static Vector128 Multiply(Vector128 left, Vector128 right) { if (Sse2.IsSupported) { - // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers - - Vector128 ac = Sse2.Multiply(lhs.AsUInt32(), rhs.AsUInt32()); - Vector128 b = Sse2.ShiftRightLogical(lhs, 32).AsUInt32(); - Vector128 bc = Sse2.Multiply(b, rhs.AsUInt32()); - Vector128 d = Sse2.ShiftRightLogical(rhs, 32).AsUInt32(); - Vector128 ad = Sse2.Multiply(lhs.AsUInt32(), d); - Vector128 high = Sse2.Add(bc, ad); - high = Sse2.ShiftLeftLogical(high, 32); - - return Sse2.Add(high, ac); + return MultiplyInternal_Sse2(left, right); } - // TODO: AdvSimd implementation. - // TODO: WasmSimd implementation. - - var output = GetUninitializedVector128(); - - Unsafe.As, ulong>(ref output) = - Unsafe.As, ulong>(ref lhs) * Unsafe.As, ulong>(ref rhs); - - Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) = - Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), 1) * - Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), 1); - - return output; + return MultiplyInternal_Fallback(left, right); } /// @@ -72,10 +50,10 @@ public static class IntrinsicUtility /// /// Operation:
/// - /// dest[0] = lhs[0] * rhs[0]; - /// dest[1] = lhs[1] * rhs[1]; - /// dest[2] = lhs[2] * rhs[2]; - /// dest[3] = lhs[3] * rhs[3]; + /// dest[0] = left[0] * right[0]; + /// dest[1] = left[1] * right[1]; + /// dest[2] = left[2] * right[2]; + /// dest[3] = left[3] * right[3]; /// ///
/// Left vector. @@ -86,33 +64,15 @@ public static class IntrinsicUtility [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + [ExcludeFromCodeCoverage] public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) { if (Avx2.IsSupported) { - // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers - - Vector256 ac = Avx2.Multiply(lhs.AsUInt32(), rhs.AsUInt32()); - Vector256 b = Avx2.ShiftRightLogical(lhs, 32).AsUInt32(); - Vector256 bc = Avx2.Multiply(b, rhs.AsUInt32()); - Vector256 d = Avx2.ShiftRightLogical(rhs, 32).AsUInt32(); - Vector256 ad = Avx2.Multiply(lhs.AsUInt32(), d); - Vector256 high = Avx2.Add(bc, ad); - high = Avx2.ShiftLeftLogical(high, 32); - - return Avx2.Add(high, ac); + return MultiplyInternal_Avx2(lhs, rhs); } - var output = GetUninitializedVector256(); - - for (int i = 0; i < Vector256.Count; i++) - { - Unsafe.Add(ref Unsafe.As, ulong>(ref output), i) = - Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), i) * - Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), i); - } - - return output; + return MultiplyInternal_Fallback(lhs, rhs); } /// @@ -121,8 +81,8 @@ public static class IntrinsicUtility /// /// Operation:
/// - /// dest[0] = lhs[0] * rhs[0]; - /// dest[1] = lhs[1] * rhs[1]; + /// dest[0] = left[0] * right[0]; + /// dest[1] = left[1] * right[1]; /// ///
/// Left vector. @@ -143,10 +103,10 @@ public static class IntrinsicUtility /// /// Operation:
/// - /// dest[0] = lhs[0] * rhs[0]; - /// dest[1] = lhs[1] * rhs[1]; - /// dest[2] = lhs[2] * rhs[2]; - /// dest[3] = lhs[3] * rhs[3]; + /// dest[0] = left[0] * right[0]; + /// dest[1] = left[1] * right[1]; + /// dest[2] = left[2] * right[2]; + /// dest[3] = left[3] * right[3]; /// /// /// Left vector. @@ -168,77 +128,32 @@ public static class IntrinsicUtility /// /// Operation:
/// - /// dest[0] = lhs[0] | lhs[1]; - /// dest[1] = lhs[2] | lhs[3]; - /// dest[2] = rhs[0] | rhs[1]; - /// dest[3] = rhs[2] | rhs[3]; + /// dest[0] = left[0] | left[1]; + /// dest[1] = left[2] | left[3]; + /// dest[2] = right[0] | right[1]; + /// dest[3] = right[2] | right[3]; /// /// - /// Left vector. - /// Right vector. + /// Left vector. + /// Right vector. /// /// A of with all elements is result of OR operation on adjacent pairs of /// elements in lhs and rhs. /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) + [ExcludeFromCodeCoverage] + public static Vector128 HorizontalOr(Vector128 left, Vector128 right) { if (Sse.IsSupported) { - var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00); // s1 = { lhs[0] ; lhs[2] ; rhs[0] ; rhs[2] } - var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01); // s2 = { lhs[1] ; lhs[3] ; rhs[1] ; rhs[3] } - - return Sse.Or(s1, s2); + return HorizontalOr_Sse(left, right); } // TODO: AdvSimd implementation. // TODO: WasmSimd implementation. (?) - Vector128 output = GetUninitializedVector128(); - - Unsafe.As, uint>(ref output) = - Unsafe.As, uint>(ref lhs) | - Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 1); - - Unsafe.Add(ref Unsafe.As, uint>(ref output), 1) = - Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 2) | - Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 3); - - Unsafe.Add(ref Unsafe.As, uint>(ref output), 2) = - Unsafe.As, uint>(ref rhs) | - Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 1); - - Unsafe.Add(ref Unsafe.As, uint>(ref output), 3) = - Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 2) | - Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 3); - - return output; - } - - /// - /// - /// Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs. - /// - /// Operation:
- /// - /// dest[0] = lhs[0] | lhs[1]; - /// dest[1] = lhs[2] | lhs[3]; - /// dest[2] = rhs[0] | rhs[1]; - /// dest[3] = rhs[2] | rhs[3]; - /// - ///
- /// Left vector. - /// Right vector. - /// - /// A of with all elements is result of OR operation on adjacent pairs of - /// elements in lhs and rhs. - /// - [Pure] - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) - { - return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsInt32(); + return HorizontalOrInternal_Fallback(left, right); } /// @@ -247,14 +162,14 @@ public static class IntrinsicUtility /// /// Operation:
/// - /// dest[0] = lhs[0] | lhs[1]; - /// dest[1] = lhs[2] | lhs[3]; - /// dest[2] = rhs[0] | rhs[1]; - /// dest[3] = rhs[2] | rhs[3]; + /// dest[0] = left[0] | left[1]; + /// dest[1] = left[2] | left[3]; + /// dest[2] = right[0] | right[1]; + /// dest[3] = right[2] | right[3]; /// ///
- /// Left vector. - /// Right vector. + /// Left vector. + /// Right vector. /// /// A of with all elements is result of OR operation on adjacent pairs of /// elements in lhs and rhs. @@ -262,9 +177,9 @@ public static class IntrinsicUtility [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] [CLSCompliant(false)] - public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) + public static Vector128 HorizontalOr(Vector128 left, Vector128 right) { - return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsUInt32(); + return HorizontalOr(left.AsInt32(), right.AsInt32()).AsUInt32(); } // Helper methods @@ -300,6 +215,109 @@ public static class IntrinsicUtility return default; #endif } + + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static Vector128 HorizontalOr_Sse(Vector128 left, Vector128 right) + { + Vector128 leftSingle = left.AsSingle(); + Vector128 rightSingle = right.AsSingle(); + + // first = { left[0] ; left[2] ; right[0] ; right[2] } + // second = { left[1] ; left[3] ; right[1] ; right[3] } + Vector128 first = Sse.Shuffle(leftSingle, rightSingle, 0b10_00_10_00); + Vector128 second = Sse.Shuffle(leftSingle, rightSingle, 0b11_01_11_01); + + return Sse.Or(first, second).AsInt32(); + } + + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static Vector128 HorizontalOrInternal_Fallback(Vector128 left, Vector128 right) + { + Vector128 output = GetUninitializedVector128(); + + ref int outputInteger = ref Unsafe.As, int>(ref output); + ref int leftInteger = ref Unsafe.As, int>(ref left); + ref int rightInteger = ref Unsafe.As, int>(ref right); + + outputInteger = leftInteger | Unsafe.Add(ref leftInteger, 1); + + Unsafe.Add(ref outputInteger, 1) = Unsafe.Add(ref leftInteger, 2) | Unsafe.Add(ref leftInteger, 3); + Unsafe.Add(ref outputInteger, 2) = rightInteger | Unsafe.Add(ref rightInteger, 1); + Unsafe.Add(ref outputInteger, 3) = Unsafe.Add(ref rightInteger, 2) | Unsafe.Add(ref rightInteger, 3); + + return output; + } + + [Pure] + [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static Vector128 MultiplyInternal_Fallback(Vector128 left, Vector128 right) + { + ulong leftInteger1 = Unsafe.As, ulong>(ref left); + ulong rightInteger1 = Unsafe.As, ulong>(ref right); + ulong result1 = leftInteger1 * rightInteger1; + + ulong leftInteger2 = Unsafe.Add(ref Unsafe.As, ulong>(ref left), 1); + ulong rightInteger2 = Unsafe.Add(ref Unsafe.As, ulong>(ref right), 1); + ulong result2 = leftInteger2 * rightInteger2; + + Vector128 output = Vector128.Create(result1, result2); + + return output; + } + + [Pure] + [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static Vector128 MultiplyInternal_Sse2(Vector128 left, Vector128 right) + { + // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers + + Vector128 ac = Sse2.Multiply(left.AsUInt32(), right.AsUInt32()); + Vector128 b = Sse2.ShiftRightLogical(left, 32).AsUInt32(); + Vector128 bc = Sse2.Multiply(b, right.AsUInt32()); + Vector128 d = Sse2.ShiftRightLogical(right, 32).AsUInt32(); + Vector128 ad = Sse2.Multiply(left.AsUInt32(), d); + Vector128 high = Sse2.Add(bc, ad); + high = Sse2.ShiftLeftLogical(high, 32); + + return Sse2.Add(high, ac); + } + + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static Vector256 MultiplyInternal_Fallback(Vector256 left, Vector256 right) + { + Vector256 output = GetUninitializedVector256(); + + for (var index = 0; index < Vector256.Count; index++) + { + Unsafe.Add(ref Unsafe.As, ulong>(ref output), index) = + Unsafe.Add(ref Unsafe.As, ulong>(ref left), index) * + Unsafe.Add(ref Unsafe.As, ulong>(ref right), index); + } + + return output; + } + + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + internal static Vector256 MultiplyInternal_Avx2(Vector256 left, Vector256 right) + { + // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers + + Vector256 ac = Avx2.Multiply(left.AsUInt32(), right.AsUInt32()); + Vector256 b = Avx2.ShiftRightLogical(left, 32).AsUInt32(); + Vector256 bc = Avx2.Multiply(b, right.AsUInt32()); + Vector256 d = Avx2.ShiftRightLogical(right, 32).AsUInt32(); + Vector256 ad = Avx2.Multiply(left.AsUInt32(), d); + Vector256 high = Avx2.Add(bc, ad); + high = Avx2.ShiftLeftLogical(high, 32); + + return Avx2.Add(high, ac); + } } #endif