diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs index f064124..9837933 100644 --- a/X10D/src/Core/IntrinsicUtility.cs +++ b/X10D/src/Core/IntrinsicUtility.cs @@ -17,8 +17,6 @@ public static class IntrinsicUtility // ANYTHING OPERATION OPERATION ON ANYTHING THAT ISN'T FLOAT IS NOT SSE COMPATIBLE, MUST BE SSE2 AND BEYOND VERSION // FOR API CONSISTENCY. - // TODO: Fallback? No idea if it is worth it since even CPU made from before 2000 support SSE and SSE2. - /// ///
Correcting of into 0 and 1 depend on their boolean truthiness.
///
Operation (raw):
@@ -36,8 +34,6 @@ public static class IntrinsicUtility ///
/// Vector of byte to correct. /// A of which remapped back to 0 and 1 based on boolean truthiness. - /// API avaliable on ARM NEON (untested) hardware. - /// Hardware doesn't suppot ARM NEON intrinsic set. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector64 CorrectBoolean(Vector64 vector) @@ -51,13 +47,20 @@ public static class IntrinsicUtility return result; } - // No comparison, bitwise AND with 64-bit vector on SSE and beyond. - if (Sse2.IsSupported) + var output = GetUninitializedVector64(); + + for (int i = 0; i < Vector64.Count; i++) { - throw new PlatformNotSupportedException("Operation is not supported on SSE2 instruction set."); + ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i); +#if NET7_0_OR_GREATER + writeElement = vector[i] == 0 ? (byte)0 : (byte)1; +#else + var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i); + writeElement = element == 0 ? (byte)0 : (byte)1; +#endif } - throw new PlatformNotSupportedException("Unknown intrinsic instruction set."); + return output; } /// @@ -76,9 +79,7 @@ public static class IntrinsicUtility /// /// /// Vector of byte to correct. - /// A of which remapped back to 0 and 1 based on boolean truthiness. - /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares. - /// Hardware doesn't support ARM NEON or SSE2 instruction set. + /// A of which remapped back to 0 and 1 based on boolean truthiness. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 CorrectBoolean(Vector128 vector) @@ -99,7 +100,20 @@ public static class IntrinsicUtility return result; } - throw new PlatformNotSupportedException("Unknown intrinsic instruction set."); + var output = GetUninitializedVector128(); + + for (int i = 0; i < Vector128.Count; i++) + { + ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i); +#if NET7_0_OR_GREATER + writeElement = vector[i] == 0 ? (byte)0 : (byte)1; +#else + var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i); + writeElement = element == 0 ? (byte)0 : (byte)1; +#endif + } + + return output; } /// @@ -118,9 +132,7 @@ public static class IntrinsicUtility /// /// /// Vector of byte to correct. - /// A of which remapped back to 0 and 1 based on boolean truthiness. - /// API avaliable on AVX2 hardware. - /// Hardware doesn't support AVX2 instruction set. + /// A of which remapped back to 0 and 1 based on boolean truthiness. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector256 CorrectBoolean(Vector256 vector) @@ -133,12 +145,20 @@ public static class IntrinsicUtility return result; } - if (AdvSimd.IsSupported) + var output = GetUninitializedVector256(); + + for (int i = 0; i < Vector256.Count; i++) { - throw new PlatformNotSupportedException("Operation is not supported on ARM NEON instruction set."); + ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i); +#if NET7_0_OR_GREATER + writeElement = vector[i] == 0 ? (byte)0 : (byte)1; +#else + var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i); + writeElement = element == 0 ? (byte)0 : (byte)1; +#endif } - throw new PlatformNotSupportedException("Unknown intrinsic instruction set."); + return output; } /// @@ -152,8 +172,6 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// A of whose elements is 64-bit truncated product of lhs and rhs. - /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares. - /// Hardware doesn't support SSE2 or ARM NEON instruction set. [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] @@ -186,7 +204,15 @@ public static class IntrinsicUtility return AdvSimd.MultiplyWideningLowerAndAdd(AdvSimd.ShiftLeftLogical(mul.AsUInt64(), 32), a, b); } - throw new PlatformNotSupportedException("Unsupported SIMD platform."); + var output = GetUninitializedVector128(); + + Unsafe.As, ulong>(ref output) = + Unsafe.As, ulong>(ref lhs) * Unsafe.As, ulong>(ref rhs); + + Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) = + Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), 1) * Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), 1); + + return output; } /// @@ -202,8 +228,6 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// A of whose elements is 64-bit truncated product of lhs and rhs. - /// API avaliable on AVX2 hardware. - /// Hardware doesn't support AVX2 instruction set. [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] @@ -224,7 +248,15 @@ public static class IntrinsicUtility return Avx2.Add(high, ac); } - throw new PlatformNotSupportedException("Unsupported SIMD platform."); + var output = GetUninitializedVector256(); + + for (int i = 0; i < Vector256.Count; i++) + { + Unsafe.Add(ref Unsafe.As, ulong>(ref output), i) = + Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), i) * Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), i); + } + + return output; } /// @@ -238,8 +270,6 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// A of whose elements is 64-bit truncated product of lhs and rhs. - /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares. - /// Hardware doesn't support SSE2 or ARM NEON instruction set. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) @@ -260,8 +290,6 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// A of whose elements is 64-bit truncated product of lhs and rhs. - /// API avaliable on AVX2 hardware. - /// Hardware doesn't support AVX2 instruction set. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) @@ -282,8 +310,6 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// A of with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs. - /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. - /// Hardware doesn't support ARM64 NEON or SSE instruction set. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) @@ -304,7 +330,21 @@ public static class IntrinsicUtility return AdvSimd.Or(s1, s2); } - throw new PlatformNotSupportedException("Unsupported SIMD platform."); + Vector128 output = GetUninitializedVector128(); + + Unsafe.As, uint>(ref output) = + Unsafe.As, uint>(ref lhs) | Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 1); + + Unsafe.Add(ref Unsafe.As, uint>(ref output), 1) = + Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 2) | Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 3); + + Unsafe.Add(ref Unsafe.As, uint>(ref output), 2) = + Unsafe.As, uint>(ref rhs) | Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 1); + + Unsafe.Add(ref Unsafe.As, uint>(ref output), 3) = + Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 2) | Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 3); + + return output; } /// @@ -374,10 +414,47 @@ public static class IntrinsicUtility { return Sse2.Shuffle(vector.AsDouble(), vector.AsDouble(), 0b01).AsUInt64(); } - - // No idea how to implement this in ARM NEON (Reason: Unavailable hardware) - throw new PlatformNotSupportedException("Unsupported SIMD platform."); + Vector128 output = GetUninitializedVector128(); + + Unsafe.As, ulong>(ref output) = Unsafe.Add(ref Unsafe.As, ulong>(ref vector), 1); + Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) = Unsafe.As, ulong>(ref vector); + + return output; + } + + // Helper methods + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + private static Vector64 GetUninitializedVector64() where T : struct + { +#if NET6_0_OR_GREATER + Unsafe.SkipInit(out Vector64 output); + return output; +#else + return default; +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + private static Vector128 GetUninitializedVector128() where T : struct + { +#if NET6_0_OR_GREATER + Unsafe.SkipInit(out Vector128 output); + return output; +#else + return default; +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + private static Vector256 GetUninitializedVector256() where T : struct + { +#if NET6_0_OR_GREATER + Unsafe.SkipInit(out Vector256 output); + return output; +#else + return default; +#endif } } diff --git a/X10D/src/Core/SpanExtensions.cs b/X10D/src/Core/SpanExtensions.cs index 52da227..68bd05a 100644 --- a/X10D/src/Core/SpanExtensions.cs +++ b/X10D/src/Core/SpanExtensions.cs @@ -1,7 +1,6 @@ using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Numerics; #if NETCOREAPP3_0_OR_GREATER using X10D.Core; @@ -112,13 +111,13 @@ public static class SpanExtensions default: #if NET7_0_OR_GREATER throw new UnreachableException($"Enum with the size of {Unsafe.SizeOf()} bytes is unexpected."); -#else // NET7_0_OR_GREATER +#else throw new ArgumentException($"Enum with the size of {Unsafe.SizeOf()} bytes is unexpected."); -#endif // NET7_0_OR_GREATER +#endif } #pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type } -#else // NET6_0_OR_GREATER +#else foreach (var it in span) { if (EqualityComparer.Default.Equals(it, value)) @@ -128,7 +127,7 @@ public static class SpanExtensions } return false; -#endif // NET6_0_OR_GREATER +#endif } ///