mirror of
https://github.com/oliverbooth/X10D
synced 2024-11-10 03:45:41 +00:00
test: 100% coverage on IntrinsicUtility (#73)
This commit is contained in:
parent
783c4b0f8e
commit
6ef48fc3b9
@ -78,6 +78,124 @@ public class IntrinsicTests
|
|||||||
Assert.AreEqual(expectedResult, result);
|
Assert.AreEqual(expectedResult, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void HorizontalOr_ShouldReturnCombinedVector_GivenInputVector128OfUInt32()
|
||||||
|
{
|
||||||
|
Vector128<uint> left = Vector128.Create(1U, 2U, 3U, 4U);
|
||||||
|
Vector128<uint> right = Vector128.Create(5U, 6U, 7U, 8U);
|
||||||
|
|
||||||
|
Vector128<uint> expected = Vector128.Create(3U, 7U, 7U, 15U);
|
||||||
|
Vector128<uint> actual = IntrinsicUtility.HorizontalOr(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void HorizontalOrInternal_Sse_ShouldReturnCombinedVector_GivenInputVector128OfInt32()
|
||||||
|
{
|
||||||
|
Vector128<int> left = Vector128.Create(1, 2, 3, 4);
|
||||||
|
Vector128<int> right = Vector128.Create(5, 6, 7, 8);
|
||||||
|
|
||||||
|
Vector128<int> expected = Vector128.Create(3, 7, 7, 15);
|
||||||
|
Vector128<int> actual = IntrinsicUtility.HorizontalOr_Sse(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void HorizontalOrInternal_Fallback_ShouldReturnCombinedVector_GivenInputVector128OfInt32()
|
||||||
|
{
|
||||||
|
Vector128<int> left = Vector128.Create(1, 2, 3, 4);
|
||||||
|
Vector128<int> right = Vector128.Create(5, 6, 7, 8);
|
||||||
|
|
||||||
|
Vector128<int> expected = Vector128.Create(3, 7, 7, 15);
|
||||||
|
Vector128<int> actual = IntrinsicUtility.HorizontalOrInternal_Fallback(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void Multiply_ShouldReturnMultipliedVector_GivenInputVector128OfInt64()
|
||||||
|
{
|
||||||
|
Vector128<long> left = Vector128.Create(6L, 4L);
|
||||||
|
Vector128<long> right = Vector128.Create(2L, 3L);
|
||||||
|
|
||||||
|
Vector128<long> expected = Vector128.Create(12L, 12L);
|
||||||
|
Vector128<long> actual = IntrinsicUtility.Multiply(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void MultiplyInternal_Sse2_ShouldReturnMultipliedVector_GivenInputVector128OfUInt64()
|
||||||
|
{
|
||||||
|
if (!Sse2.IsSupported)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector128<ulong> left = Vector128.Create(6UL, 4UL);
|
||||||
|
Vector128<ulong> right = Vector128.Create(2UL, 3UL);
|
||||||
|
|
||||||
|
Vector128<ulong> expected = Vector128.Create(12UL, 12UL);
|
||||||
|
Vector128<ulong> actual = IntrinsicUtility.MultiplyInternal_Sse2(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void MultiplyInternal_Fallback_ShouldReturnMultipliedVector_GivenInputVector128OfUInt64()
|
||||||
|
{
|
||||||
|
Vector128<ulong> left = Vector128.Create(6UL, 4UL);
|
||||||
|
Vector128<ulong> right = Vector128.Create(2UL, 3UL);
|
||||||
|
|
||||||
|
Vector128<ulong> expected = Vector128.Create(12UL, 12UL);
|
||||||
|
Vector128<ulong> actual = IntrinsicUtility.MultiplyInternal_Fallback(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void Multiply_ShouldReturnMultipliedVector_GivenInputVector256OfInt64()
|
||||||
|
{
|
||||||
|
Vector256<long> left = Vector256.Create(4L, 6L, 8L, 10L);
|
||||||
|
Vector256<long> right = Vector256.Create(2L, 3L, 4L, 5L);
|
||||||
|
|
||||||
|
Vector256<long> expected = Vector256.Create(8L, 18L, 32L, 50L);
|
||||||
|
Vector256<long> actual = IntrinsicUtility.Multiply(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void MultiplyInternal_Avx2_ShouldReturnMultipliedVector_GivenInputVector256OfUInt64()
|
||||||
|
{
|
||||||
|
if (!Avx2.IsSupported)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector256<ulong> left = Vector256.Create(4UL, 6UL, 8UL, 10UL);
|
||||||
|
Vector256<ulong> right = Vector256.Create(2UL, 3UL, 4UL, 5UL);
|
||||||
|
|
||||||
|
Vector256<ulong> expected = Vector256.Create(8UL, 18UL, 32UL, 50UL);
|
||||||
|
Vector256<ulong> actual = IntrinsicUtility.MultiplyInternal_Avx2(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void MultiplyInternal_Fallback_ShouldReturnMultipliedVector_GivenInputVector256OfUInt64()
|
||||||
|
{
|
||||||
|
Vector256<ulong> left = Vector256.Create(4UL, 6UL, 8UL, 10UL);
|
||||||
|
Vector256<ulong> right = Vector256.Create(2UL, 3UL, 4UL, 5UL);
|
||||||
|
|
||||||
|
Vector256<ulong> expected = Vector256.Create(8UL, 18UL, 32UL, 50UL);
|
||||||
|
Vector256<ulong> actual = IntrinsicUtility.MultiplyInternal_Fallback(left, right);
|
||||||
|
|
||||||
|
Assert.AreEqual(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
[TestMethod]
|
[TestMethod]
|
||||||
public void ReverseElementsInternal_Fallback_ShouldReturnExpectedVector128Result_GivenInputVector()
|
public void ReverseElementsInternal_Fallback_ShouldReturnExpectedVector128Result_GivenInputVector()
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#if NETCOREAPP3_0_OR_GREATER
|
#if NETCOREAPP3_0_OR_GREATER
|
||||||
|
|
||||||
|
using System.Diagnostics.CodeAnalysis;
|
||||||
using System.Diagnostics.Contracts;
|
using System.Diagnostics.Contracts;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.Intrinsics;
|
using System.Runtime.Intrinsics;
|
||||||
@ -22,48 +23,25 @@ public static class IntrinsicUtility
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// Operation:<br/>
|
/// Operation:<br/>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = left[0] * right[0];
|
||||||
/// dest[1] = lhs[1] * rhs[1];
|
/// dest[1] = left[1] * right[1];
|
||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="left">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="right">Right vector.</param>
|
||||||
/// <returns>
|
/// <returns>The truncated product vector.</returns>
|
||||||
/// A <see cref="Vector128{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.
|
|
||||||
/// </returns>
|
|
||||||
[Pure]
|
[Pure]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector128<ulong> Multiply(Vector128<ulong> lhs, Vector128<ulong> rhs)
|
[ExcludeFromCodeCoverage]
|
||||||
|
public static Vector128<ulong> Multiply(Vector128<ulong> left, Vector128<ulong> right)
|
||||||
{
|
{
|
||||||
if (Sse2.IsSupported)
|
if (Sse2.IsSupported)
|
||||||
{
|
{
|
||||||
// https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers
|
return MultiplyInternal_Sse2(left, right);
|
||||||
|
|
||||||
Vector128<ulong> ac = Sse2.Multiply(lhs.AsUInt32(), rhs.AsUInt32());
|
|
||||||
Vector128<uint> b = Sse2.ShiftRightLogical(lhs, 32).AsUInt32();
|
|
||||||
Vector128<ulong> bc = Sse2.Multiply(b, rhs.AsUInt32());
|
|
||||||
Vector128<uint> d = Sse2.ShiftRightLogical(rhs, 32).AsUInt32();
|
|
||||||
Vector128<ulong> ad = Sse2.Multiply(lhs.AsUInt32(), d);
|
|
||||||
Vector128<ulong> high = Sse2.Add(bc, ad);
|
|
||||||
high = Sse2.ShiftLeftLogical(high, 32);
|
|
||||||
|
|
||||||
return Sse2.Add(high, ac);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: AdvSimd implementation.
|
return MultiplyInternal_Fallback(left, right);
|
||||||
// TODO: WasmSimd implementation.
|
|
||||||
|
|
||||||
var output = GetUninitializedVector128<ulong>();
|
|
||||||
|
|
||||||
Unsafe.As<Vector128<ulong>, ulong>(ref output) =
|
|
||||||
Unsafe.As<Vector128<ulong>, ulong>(ref lhs) * Unsafe.As<Vector128<ulong>, ulong>(ref rhs);
|
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref output), 1) =
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref lhs), 1) *
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref rhs), 1);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -72,10 +50,10 @@ public static class IntrinsicUtility
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// Operation:<br/>
|
/// Operation:<br/>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = left[0] * right[0];
|
||||||
/// dest[1] = lhs[1] * rhs[1];
|
/// dest[1] = left[1] * right[1];
|
||||||
/// dest[2] = lhs[2] * rhs[2];
|
/// dest[2] = left[2] * right[2];
|
||||||
/// dest[3] = lhs[3] * rhs[3];
|
/// dest[3] = left[3] * right[3];
|
||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
@ -86,33 +64,15 @@ public static class IntrinsicUtility
|
|||||||
[Pure]
|
[Pure]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
[ExcludeFromCodeCoverage]
|
||||||
public static Vector256<ulong> Multiply(Vector256<ulong> lhs, Vector256<ulong> rhs)
|
public static Vector256<ulong> Multiply(Vector256<ulong> lhs, Vector256<ulong> rhs)
|
||||||
{
|
{
|
||||||
if (Avx2.IsSupported)
|
if (Avx2.IsSupported)
|
||||||
{
|
{
|
||||||
// https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers
|
return MultiplyInternal_Avx2(lhs, rhs);
|
||||||
|
|
||||||
Vector256<ulong> ac = Avx2.Multiply(lhs.AsUInt32(), rhs.AsUInt32());
|
|
||||||
Vector256<uint> b = Avx2.ShiftRightLogical(lhs, 32).AsUInt32();
|
|
||||||
Vector256<ulong> bc = Avx2.Multiply(b, rhs.AsUInt32());
|
|
||||||
Vector256<uint> d = Avx2.ShiftRightLogical(rhs, 32).AsUInt32();
|
|
||||||
Vector256<ulong> ad = Avx2.Multiply(lhs.AsUInt32(), d);
|
|
||||||
Vector256<ulong> high = Avx2.Add(bc, ad);
|
|
||||||
high = Avx2.ShiftLeftLogical(high, 32);
|
|
||||||
|
|
||||||
return Avx2.Add(high, ac);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var output = GetUninitializedVector256<ulong>();
|
return MultiplyInternal_Fallback(lhs, rhs);
|
||||||
|
|
||||||
for (int i = 0; i < Vector256<ulong>.Count; i++)
|
|
||||||
{
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref output), i) =
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref lhs), i) *
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref rhs), i);
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -121,8 +81,8 @@ public static class IntrinsicUtility
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// Operation:<br/>
|
/// Operation:<br/>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = left[0] * right[0];
|
||||||
/// dest[1] = lhs[1] * rhs[1];
|
/// dest[1] = left[1] * right[1];
|
||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
@ -143,10 +103,10 @@ public static class IntrinsicUtility
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// Operation:<br/>
|
/// Operation:<br/>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = left[0] * right[0];
|
||||||
/// dest[1] = lhs[1] * rhs[1];
|
/// dest[1] = left[1] * right[1];
|
||||||
/// dest[2] = lhs[2] * rhs[2];
|
/// dest[2] = left[2] * right[2];
|
||||||
/// dest[3] = lhs[3] * rhs[3];
|
/// dest[3] = left[3] * right[3];
|
||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
@ -168,77 +128,32 @@ public static class IntrinsicUtility
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// Operation:<br/>
|
/// Operation:<br/>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] | lhs[1];
|
/// dest[0] = left[0] | left[1];
|
||||||
/// dest[1] = lhs[2] | lhs[3];
|
/// dest[1] = left[2] | left[3];
|
||||||
/// dest[2] = rhs[0] | rhs[1];
|
/// dest[2] = right[0] | right[1];
|
||||||
/// dest[3] = rhs[2] | rhs[3];
|
/// dest[3] = right[2] | right[3];
|
||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="left">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="right">Right vector.</param>
|
||||||
/// <returns>
|
/// <returns>
|
||||||
/// A <see cref="Vector128{T}"/> of <see langword="float"/> with all elements is result of OR operation on adjacent pairs of
|
/// A <see cref="Vector128{T}"/> of <see langword="float"/> with all elements is result of OR operation on adjacent pairs of
|
||||||
/// elements in lhs and rhs.
|
/// elements in lhs and rhs.
|
||||||
/// </returns>
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector128<float> HorizontalOr(Vector128<float> lhs, Vector128<float> rhs)
|
[ExcludeFromCodeCoverage]
|
||||||
|
public static Vector128<int> HorizontalOr(Vector128<int> left, Vector128<int> right)
|
||||||
{
|
{
|
||||||
if (Sse.IsSupported)
|
if (Sse.IsSupported)
|
||||||
{
|
{
|
||||||
var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00); // s1 = { lhs[0] ; lhs[2] ; rhs[0] ; rhs[2] }
|
return HorizontalOr_Sse(left, right);
|
||||||
var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01); // s2 = { lhs[1] ; lhs[3] ; rhs[1] ; rhs[3] }
|
|
||||||
|
|
||||||
return Sse.Or(s1, s2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: AdvSimd implementation.
|
// TODO: AdvSimd implementation.
|
||||||
// TODO: WasmSimd implementation. (?)
|
// TODO: WasmSimd implementation. (?)
|
||||||
|
|
||||||
Vector128<float> output = GetUninitializedVector128<float>();
|
return HorizontalOrInternal_Fallback(left, right);
|
||||||
|
|
||||||
Unsafe.As<Vector128<float>, uint>(ref output) =
|
|
||||||
Unsafe.As<Vector128<float>, uint>(ref lhs) |
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 1);
|
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 1) =
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 2) |
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 3);
|
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 2) =
|
|
||||||
Unsafe.As<Vector128<float>, uint>(ref rhs) |
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 1);
|
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 3) =
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 2) |
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 3);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// <para>
|
|
||||||
/// Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.
|
|
||||||
/// </para>
|
|
||||||
/// Operation:<br/>
|
|
||||||
/// <code>
|
|
||||||
/// dest[0] = lhs[0] | lhs[1];
|
|
||||||
/// dest[1] = lhs[2] | lhs[3];
|
|
||||||
/// dest[2] = rhs[0] | rhs[1];
|
|
||||||
/// dest[3] = rhs[2] | rhs[3];
|
|
||||||
/// </code>
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="lhs">Left vector.</param>
|
|
||||||
/// <param name="rhs">Right vector.</param>
|
|
||||||
/// <returns>
|
|
||||||
/// A <see cref="Vector128{T}"/> of <see langword="int"/> with all elements is result of OR operation on adjacent pairs of
|
|
||||||
/// elements in lhs and rhs.
|
|
||||||
/// </returns>
|
|
||||||
[Pure]
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
|
||||||
public static Vector128<int> HorizontalOr(Vector128<int> lhs, Vector128<int> rhs)
|
|
||||||
{
|
|
||||||
return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsInt32();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -247,14 +162,14 @@ public static class IntrinsicUtility
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// Operation:<br/>
|
/// Operation:<br/>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] | lhs[1];
|
/// dest[0] = left[0] | left[1];
|
||||||
/// dest[1] = lhs[2] | lhs[3];
|
/// dest[1] = left[2] | left[3];
|
||||||
/// dest[2] = rhs[0] | rhs[1];
|
/// dest[2] = right[0] | right[1];
|
||||||
/// dest[3] = rhs[2] | rhs[3];
|
/// dest[3] = right[2] | right[3];
|
||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="left">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="right">Right vector.</param>
|
||||||
/// <returns>
|
/// <returns>
|
||||||
/// A <see cref="Vector128{T}"/> of <see langword="uint"/> with all elements is result of OR operation on adjacent pairs of
|
/// A <see cref="Vector128{T}"/> of <see langword="uint"/> with all elements is result of OR operation on adjacent pairs of
|
||||||
/// elements in lhs and rhs.
|
/// elements in lhs and rhs.
|
||||||
@ -262,9 +177,9 @@ public static class IntrinsicUtility
|
|||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
public static Vector128<uint> HorizontalOr(Vector128<uint> lhs, Vector128<uint> rhs)
|
public static Vector128<uint> HorizontalOr(Vector128<uint> left, Vector128<uint> right)
|
||||||
{
|
{
|
||||||
return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsUInt32();
|
return HorizontalOr(left.AsInt32(), right.AsInt32()).AsUInt32();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper methods
|
// Helper methods
|
||||||
@ -300,6 +215,109 @@ public static class IntrinsicUtility
|
|||||||
return default;
|
return default;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Pure]
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
internal static Vector128<int> HorizontalOr_Sse(Vector128<int> left, Vector128<int> right)
|
||||||
|
{
|
||||||
|
Vector128<float> leftSingle = left.AsSingle();
|
||||||
|
Vector128<float> rightSingle = right.AsSingle();
|
||||||
|
|
||||||
|
// first = { left[0] ; left[2] ; right[0] ; right[2] }
|
||||||
|
// second = { left[1] ; left[3] ; right[1] ; right[3] }
|
||||||
|
Vector128<float> first = Sse.Shuffle(leftSingle, rightSingle, 0b10_00_10_00);
|
||||||
|
Vector128<float> second = Sse.Shuffle(leftSingle, rightSingle, 0b11_01_11_01);
|
||||||
|
|
||||||
|
return Sse.Or(first, second).AsInt32();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Pure]
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
internal static Vector128<int> HorizontalOrInternal_Fallback(Vector128<int> left, Vector128<int> right)
|
||||||
|
{
|
||||||
|
Vector128<int> output = GetUninitializedVector128<int>();
|
||||||
|
|
||||||
|
ref int outputInteger = ref Unsafe.As<Vector128<int>, int>(ref output);
|
||||||
|
ref int leftInteger = ref Unsafe.As<Vector128<int>, int>(ref left);
|
||||||
|
ref int rightInteger = ref Unsafe.As<Vector128<int>, int>(ref right);
|
||||||
|
|
||||||
|
outputInteger = leftInteger | Unsafe.Add(ref leftInteger, 1);
|
||||||
|
|
||||||
|
Unsafe.Add(ref outputInteger, 1) = Unsafe.Add(ref leftInteger, 2) | Unsafe.Add(ref leftInteger, 3);
|
||||||
|
Unsafe.Add(ref outputInteger, 2) = rightInteger | Unsafe.Add(ref rightInteger, 1);
|
||||||
|
Unsafe.Add(ref outputInteger, 3) = Unsafe.Add(ref rightInteger, 2) | Unsafe.Add(ref rightInteger, 3);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
[Pure]
|
||||||
|
[CLSCompliant(false)]
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
internal static Vector128<ulong> MultiplyInternal_Fallback(Vector128<ulong> left, Vector128<ulong> right)
|
||||||
|
{
|
||||||
|
ulong leftInteger1 = Unsafe.As<Vector128<ulong>, ulong>(ref left);
|
||||||
|
ulong rightInteger1 = Unsafe.As<Vector128<ulong>, ulong>(ref right);
|
||||||
|
ulong result1 = leftInteger1 * rightInteger1;
|
||||||
|
|
||||||
|
ulong leftInteger2 = Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref left), 1);
|
||||||
|
ulong rightInteger2 = Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref right), 1);
|
||||||
|
ulong result2 = leftInteger2 * rightInteger2;
|
||||||
|
|
||||||
|
Vector128<ulong> output = Vector128.Create(result1, result2);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
[Pure]
|
||||||
|
[CLSCompliant(false)]
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
internal static Vector128<ulong> MultiplyInternal_Sse2(Vector128<ulong> left, Vector128<ulong> right)
|
||||||
|
{
|
||||||
|
// https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers
|
||||||
|
|
||||||
|
Vector128<ulong> ac = Sse2.Multiply(left.AsUInt32(), right.AsUInt32());
|
||||||
|
Vector128<uint> b = Sse2.ShiftRightLogical(left, 32).AsUInt32();
|
||||||
|
Vector128<ulong> bc = Sse2.Multiply(b, right.AsUInt32());
|
||||||
|
Vector128<uint> d = Sse2.ShiftRightLogical(right, 32).AsUInt32();
|
||||||
|
Vector128<ulong> ad = Sse2.Multiply(left.AsUInt32(), d);
|
||||||
|
Vector128<ulong> high = Sse2.Add(bc, ad);
|
||||||
|
high = Sse2.ShiftLeftLogical(high, 32);
|
||||||
|
|
||||||
|
return Sse2.Add(high, ac);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Pure]
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
internal static Vector256<ulong> MultiplyInternal_Fallback(Vector256<ulong> left, Vector256<ulong> right)
|
||||||
|
{
|
||||||
|
Vector256<ulong> output = GetUninitializedVector256<ulong>();
|
||||||
|
|
||||||
|
for (var index = 0; index < Vector256<ulong>.Count; index++)
|
||||||
|
{
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref output), index) =
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref left), index) *
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref right), index);
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
[Pure]
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
internal static Vector256<ulong> MultiplyInternal_Avx2(Vector256<ulong> left, Vector256<ulong> right)
|
||||||
|
{
|
||||||
|
// https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers
|
||||||
|
|
||||||
|
Vector256<ulong> ac = Avx2.Multiply(left.AsUInt32(), right.AsUInt32());
|
||||||
|
Vector256<uint> b = Avx2.ShiftRightLogical(left, 32).AsUInt32();
|
||||||
|
Vector256<ulong> bc = Avx2.Multiply(b, right.AsUInt32());
|
||||||
|
Vector256<uint> d = Avx2.ShiftRightLogical(right, 32).AsUInt32();
|
||||||
|
Vector256<ulong> ad = Avx2.Multiply(left.AsUInt32(), d);
|
||||||
|
Vector256<ulong> high = Avx2.Add(bc, ad);
|
||||||
|
high = Avx2.ShiftLeftLogical(high, 32);
|
||||||
|
|
||||||
|
return Avx2.Add(high, ac);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user