From e176f65e9730e20cf90bbc382b092d0d5a4fbade Mon Sep 17 00:00:00 2001
From: RealityProgrammer <DestinyHero789@gmail.com>
Date: Tue, 7 Mar 2023 16:50:04 +0700
Subject: [PATCH] Commit git's untracked files

---
 X10D.Tests/src/Core/SpanTest.cs      | 103 +++++++++++++
 X10D/src/Core/IntrinsicExtensions.cs | 103 +++++++++++++
 X10D/src/Core/IntrinsicUtility.cs    | 222 +++++++++++++++++++++++++++
 3 files changed, 428 insertions(+)
 create mode 100644 X10D.Tests/src/Core/SpanTest.cs
 create mode 100644 X10D/src/Core/IntrinsicExtensions.cs
 create mode 100644 X10D/src/Core/IntrinsicUtility.cs
diff --git a/X10D.Tests/src/Core/SpanTest.cs b/X10D.Tests/src/Core/SpanTest.cs
new file mode 100644
index 0000000..489ed18
--- /dev/null
+++ b/X10D.Tests/src/Core/SpanTest.cs
@@ -0,0 +1,103 @@
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using X10D.Collections;
+using X10D.Core;
+
+namespace X10D.Tests.Core;
+
+[TestClass]
+public class SpanTest
+{
+    [TestMethod]
+    public void Pack8Bit_Should_Pack_Correctly()
+    {
+        Span<bool> span = stackalloc bool[8] { true, true, false, false, true, true, false, false };
+        Assert.AreEqual(0b00110011, span.PackByte());
+    }
+
+    [TestMethod]
+    public void Pack8Bit_Should_Pack_Correctly_Randomize()
+    {
+        var value = new Random().NextByte();
+
+        Span<bool> unpacks = stackalloc bool[8];
+
+        value.Unpack(unpacks);
+
+        Assert.AreEqual(value, unpacks.PackByte());
+    }
+
+    [TestMethod]
+    public void Pack16Bit_Should_Pack_Correctly()
+    {
+        ReadOnlySpan<bool> span = stackalloc bool[16] {
+            false, false, true, false, true, false, true, true,
+            true, false, true, true, false, true, false, false,
+        };
+        Assert.AreEqual(0b00101101_11010100, span.PackInt16());
+    }
+
+    [TestMethod]
+    public void Pack16Bit_Should_Pack_Correctly_Randomize()
+    {
+        var value = new Random().NextInt16();
+
+        Span<bool> unpacks = stackalloc bool[16];
+
+        value.Unpack(unpacks);
+
+        Assert.AreEqual(value, unpacks.PackInt16());
+    }
+
+    [TestMethod]
+    public void Pack32Bit_Should_Pack_Correctly()
+    {
+        ReadOnlySpan<bool> span = stackalloc bool[] {
+            false, true, false, true, false, true, false, true,
+            true, false, true, false, true, false, true, false,
+            false, true, false, true, false, true, false, true,
+            true, false, true, false, true, false, true, false,
+        };
+        Assert.AreEqual(0b01010101_10101010_01010101_10101010, span.PackInt32());
+    }
+
+    [TestMethod]
+    public void Pack32Bit_Should_Pack_Correctly_Randomize()
+    {
+        var value = new Random().Next(int.MinValue, int.MaxValue);
+
+        Span<bool> unpacks = stackalloc bool[32];
+
+        value.Unpack(unpacks);
+
+        Assert.AreEqual(value, unpacks.PackInt32());
+    }
+
+    [TestMethod]
+    public void Pack64Bit_Should_Pack_Correctly()
+    {
+        ReadOnlySpan<bool> span = stackalloc bool[] {
+            true, false, true, false, false, true, false, true,
+            false, false, true, true, false, true, false, false,
+            true, true, true, false, true, false, false, true,
+            false, true, false, false, true, false, false, false,
+            false, true, true, false, true, false, true, true,
+            true, false, false, true, false, true, true, false,
+            false, true, true, false, true, false, true, true,
+            true, false, true, false, true, false, true, false,
+        };
+        Assert.AreEqual(0b01010101_11010110_01101001_11010110_00010010_10010111_00101100_10100101, span.PackInt64());
+    }
+
+    [TestMethod]
+    public void Pack64Bit_Should_Pack_Correctly_Randomize()
+    {
+        var rand = new Random();
+        long value = ((long)rand.Next() << 32) | (long)rand.Next();
+
+        Span<bool> unpacks = stackalloc bool[64];
+
+        value.Unpack(unpacks);
+
+        Assert.AreEqual(value, unpacks.PackInt64());
+    }
+}
diff --git a/X10D/src/Core/IntrinsicExtensions.cs b/X10D/src/Core/IntrinsicExtensions.cs
new file mode 100644
index 0000000..f7f0313
--- /dev/null
+++ b/X10D/src/Core/IntrinsicExtensions.cs
@@ -0,0 +1,103 @@
+﻿#if NETCOREAPP3_0_OR_GREATER
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics.Arm;
+using System.Diagnostics.Contracts;
+using System.Runtime.CompilerServices;
+using System.Numerics;
+
+namespace X10D.Core;
+
+/// <summary>
+///     Extension methods for SIMD vectors, namely <see cref="Vector64{T}"/>, <see cref="Vector128{T}"/> and <see cref="Vector256{T}"/>.
+/// </summary>
+public static class IntrinsicExtensions
+{
+    /// <summary>
+    ///     Correcting <see cref="Vector64{T}"/> of <see langword="byte"/> into standard boolean values.
+    /// </summary>
+    /// <param name="vector">Vector of byte to correct.</param>
+    /// <returns>Corrected boolean in form of <see cref="Vector64{T}"/> of bytes.</returns>
+    /// <remarks>This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector64<byte> CorrectBoolean(this Vector64<byte> vector)
+    {
+        if (AdvSimd.IsSupported)
+        {
+            // Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
+            var cmp = AdvSimd.CompareEqual(vector, Vector64<byte>.Zero);
+            var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64<byte>.Zero);
+
+            return result;
+        }
+
+        if (Sse.IsSupported)
+        {
+            throw new PlatformNotSupportedException("Cannot correct boolean of Vector64<byte> on SSE intrinsic set.");
+        }
+
+        throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
+    }
+
+    /// <summary>
+    ///     Correcting <see cref="Vector128{T}"/> of <see langword="byte"/> into standard boolean values.
+    /// </summary>
+    /// <param name="vector">Vector of byte to correct.</param>
+    /// <returns>Corrected boolean in form of <see cref="Vector128{T}"/> of bytes.</returns>
+    /// <remarks>This method will ensure that every values can only be either 0 to represent <see langword="false"/> and 1 to represent <see langword="true"/>. Values of 0 will be kept, and others will be mapped back to 1.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector128<byte> CorrectBoolean(this Vector128<byte> vector)
+    {
+        if (Sse2.IsSupported)
+        {
+            var cmp = Sse2.CompareEqual(vector, Vector128<byte>.Zero);
+            var result = Sse2.AndNot(cmp, Vector128.Create((byte)1));
+
+            return result;
+        }
+        else if (AdvSimd.IsSupported)
+        {
+            // Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
+            var cmp = AdvSimd.CompareEqual(vector, Vector128<byte>.Zero);
+            var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128<byte>.Zero);
+
+            return result;
+        }
+
+        throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
+    }
+
+    /// <summary>
+    ///     Correcting <see cref="Vector256{T}"/> of <see langword="byte"/> into standard boolean values.
+    /// </summary>
+    /// <param name="vector">Vector of byte to correct.</param>
+    /// <returns>Corrected boolean in form of <see cref="Vector256{T}"/> of bytes.</returns>
+    /// <remarks>This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector256<byte> CorrectBoolean(this Vector256<byte> vector)
+    {
+        if (Avx2.IsSupported)
+        {
+            var cmp = Avx2.CompareEqual(vector, Vector256<byte>.Zero);
+            var result = Avx2.AndNot(cmp, Vector256.Create((byte)1));
+
+            return result;
+        }
+
+        if (AdvSimd.IsSupported)
+        {
+            throw new PlatformNotSupportedException("Cannot correct boolean of Vector256<byte> on ARM intrinsic set.");
+        }
+
+        throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
+    }
+}
+#endif
diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs
new file mode 100644
index 0000000..3524088
--- /dev/null
+++ b/X10D/src/Core/IntrinsicUtility.cs
@@ -0,0 +1,222 @@
+﻿#if NETCOREAPP3_0_OR_GREATER
+
+using System.Diagnostics.CodeAnalysis;
+using System.Diagnostics.Contracts;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace X10D.Core;
+
+/// <summary>
+///     Provides utility methods for SIMD vector that is currently missing on common hardware instruction set.
+/// </summary>
+public static class IntrinsicUtility
+{
+    /// <summary>
+    ///     <br>Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.</br>
+    ///     <br>Operation:</br>
+    ///     <code>
+    ///     dest[0] = lhs[0] * rhs[0];
+    ///     dest[1] = lhs[1] * rhs[1];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares.</remarks>
+    [Pure]
+    [CLSCompliant(false)]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector128<ulong> Multiply(Vector128<ulong> lhs, Vector128<ulong> rhs)
+    {
+        if (Sse2.IsSupported)
+        {
+            // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers
+
+            Vector128<ulong> ac = Sse2.Multiply(lhs.AsUInt32(), rhs.AsUInt32());
+            Vector128<uint> b = Sse2.ShiftRightLogical(lhs, 32).AsUInt32();
+            Vector128<ulong> bc = Sse2.Multiply(b, rhs.AsUInt32());
+            Vector128<uint> d = Sse2.ShiftRightLogical(rhs, 32).AsUInt32();
+            Vector128<ulong> ad = Sse2.Multiply(lhs.AsUInt32(), d);
+            Vector128<ulong> high = Sse2.Add(bc, ad);
+            high = Sse2.ShiftLeftLogical(high, 32);
+
+            return Sse2.Add(high, ac);
+        }
+        else if (AdvSimd.IsSupported)
+        {
+            // https://stackoverflow.com/questions/60236627/facing-problem-in-implementing-multiplication-of-64-bit-variables-using-arm-neon
+
+            // Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware)
+            var a = AdvSimd.ExtractNarrowingLower(lhs);
+            var b = AdvSimd.ExtractNarrowingLower(rhs);
+
+            var mul = AdvSimd.Multiply(rhs.AsUInt32(), AdvSimd.ReverseElement32(lhs).AsUInt32());
+
+            return AdvSimd.MultiplyWideningLowerAndAdd(AdvSimd.ShiftLeftLogical(mul.AsUInt64(), 32), a, b);
+        }
+
+        throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+    }
+
+    /// <summary>
+    ///     <br>Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.</br>
+    ///     <br>Operation:</br>
+    ///     <code>
+    ///     dest[0] = lhs[0] * rhs[0];
+    ///     dest[1] = lhs[1] * rhs[1];
+    ///     dest[2] = lhs[2] * rhs[2];
+    ///     dest[3] = lhs[3] * rhs[3];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on AVX2 hardware.</remarks>
+    [Pure]
+    [CLSCompliant(false)]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector256<ulong> Multiply(Vector256<ulong> lhs, Vector256<ulong> rhs)
+    {
+        if (Avx2.IsSupported)
+        {
+            // https://stackoverflow.com/questions/17863411/sse-multiplication-of-2-64-bit-integers
+
+            Vector256<ulong> ac = Avx2.Multiply(lhs.AsUInt32(), rhs.AsUInt32());
+            Vector256<uint> b = Avx2.ShiftRightLogical(lhs, 32).AsUInt32();
+            Vector256<ulong> bc = Avx2.Multiply(b, rhs.AsUInt32());
+            Vector256<uint> d = Avx2.ShiftRightLogical(rhs, 32).AsUInt32();
+            Vector256<ulong> ad = Avx2.Multiply(lhs.AsUInt32(), d);
+            Vector256<ulong> high = Avx2.Add(bc, ad);
+            high = Avx2.ShiftLeftLogical(high, 32);
+
+            return Avx2.Add(high, ac);
+        }
+
+        throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+    }
+
+    /// <summary>
+    ///     <para>Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.</para>
+    ///     <para>Operation:</para>
+    ///     <code>
+    ///     dest[0] = lhs[0] * rhs[0];
+    ///     dest[1] = lhs[1] * rhs[1];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector128<long> Multiply(Vector128<long> lhs, Vector128<long> rhs)
+    {
+        return Multiply(lhs.AsUInt64(), rhs.AsUInt64()).AsInt64();
+    }
+
+    /// <summary>
+    ///     <br>Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.</br>
+    ///     <br>Operation:</br>
+    ///     <code>
+    ///     dest[0] = lhs[0] * rhs[0];
+    ///     dest[1] = lhs[1] * rhs[1];
+    ///     dest[2] = lhs[2] * rhs[2];
+    ///     dest[3] = lhs[3] * rhs[3];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on AVX2 hardware.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector256<long> Multiply(Vector256<long> lhs, Vector256<long> rhs)
+    {
+        return Multiply(lhs.AsUInt64(), rhs.AsUInt64()).AsInt64();
+    }
+
+    /// <summary>
+    ///     <para>Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs.</para>
+    ///     <para>Operation:</para>
+    ///     <code>
+    ///     dest[0] = lhs[0] | lhs[1];
+    ///     dest[1] = lhs[2] | lhs[3];
+    ///     dest[2] = rhs[0] | rhs[1];
+    ///     dest[3] = rhs[2] | rhs[3];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector128<float> HorizontalOr(Vector128<float> lhs, Vector128<float> rhs)
+    {
+        if (Sse.IsSupported)
+        {
+            var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00);
+            var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01);
+
+            return Sse.Or(s1, s2);
+        }
+        else if (AdvSimd.Arm64.IsSupported)
+        {
+            // Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware).
+            var s1 = AdvSimd.Arm64.UnzipEven(lhs, rhs);
+            var s2 = AdvSimd.Arm64.UnzipOdd(lhs, rhs);
+
+            return AdvSimd.Or(s1, s2);
+        }
+
+        throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+    }
+
+    /// <summary>
+    ///     <para>Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.</para>
+    ///     <para>Operation:</para>
+    ///     <code>
+    ///     dest[0] = lhs[0] | lhs[1];
+    ///     dest[1] = lhs[2] | lhs[3];
+    ///     dest[2] = rhs[0] | rhs[1];
+    ///     dest[3] = rhs[2] | rhs[3];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    public static Vector128<int> HorizontalOr(Vector128<int> lhs, Vector128<int> rhs)
+    {
+        return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsInt32();
+    }
+
+    /// <summary>
+    ///     <para>Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.</para>
+    ///     <para>Operation:</para>
+    ///     <code>
+    ///     dest[0] = lhs[0] | lhs[1];
+    ///     dest[1] = lhs[2] | lhs[3];
+    ///     dest[2] = rhs[0] | rhs[1];
+    ///     dest[3] = rhs[2] | rhs[3];
+    ///     </code>
+    /// </summary>
+    /// <param name="lhs">Left vector.</param>
+    /// <param name="rhs">Right vector.</param>
+    /// <returns></returns>
+    /// <remarks>API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
+    [Pure]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    [CLSCompliant(false)]
+    public static Vector128<uint> HorizontalOr(Vector128<uint> lhs, Vector128<uint> rhs)
+    {
+        return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsUInt32();
+    }
+}
+
+#endif