diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs
index f064124..9837933 100644
--- a/X10D/src/Core/IntrinsicUtility.cs
+++ b/X10D/src/Core/IntrinsicUtility.cs
@@ -17,8 +17,6 @@ public static class IntrinsicUtility
// ANYTHING OPERATION OPERATION ON ANYTHING THAT ISN'T FLOAT IS NOT SSE COMPATIBLE, MUST BE SSE2 AND BEYOND VERSION
// FOR API CONSISTENCY.
- // TODO: Fallback? No idea if it is worth it since even CPU made from before 2000 support SSE and SSE2.
-
///
///
Correcting of into 0 and 1 depend on their boolean truthiness.
///
Operation (raw):
@@ -36,8 +34,6 @@ public static class IntrinsicUtility
///
/// Vector of byte to correct.
/// A of which remapped back to 0 and 1 based on boolean truthiness.
- /// API avaliable on ARM NEON (untested) hardware.
- /// Hardware doesn't suppot ARM NEON intrinsic set.
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static Vector64 CorrectBoolean(Vector64 vector)
@@ -51,13 +47,20 @@ public static class IntrinsicUtility
return result;
}
- // No comparison, bitwise AND with 64-bit vector on SSE and beyond.
- if (Sse2.IsSupported)
+ var output = GetUninitializedVector64();
+
+ for (int i = 0; i < Vector64.Count; i++)
{
- throw new PlatformNotSupportedException("Operation is not supported on SSE2 instruction set.");
+ ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i);
+#if NET7_0_OR_GREATER
+ writeElement = vector[i] == 0 ? (byte)0 : (byte)1;
+#else
+ var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i);
+ writeElement = element == 0 ? (byte)0 : (byte)1;
+#endif
}
- throw new PlatformNotSupportedException("Unknown intrinsic instruction set.");
+ return output;
}
///
@@ -76,9 +79,7 @@ public static class IntrinsicUtility
///
///
/// Vector of byte to correct.
- /// A of which remapped back to 0 and 1 based on boolean truthiness.
- /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares.
- /// Hardware doesn't support ARM NEON or SSE2 instruction set.
+ /// A of which remapped back to 0 and 1 based on boolean truthiness.
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static Vector128 CorrectBoolean(Vector128 vector)
@@ -99,7 +100,20 @@ public static class IntrinsicUtility
return result;
}
- throw new PlatformNotSupportedException("Unknown intrinsic instruction set.");
+ var output = GetUninitializedVector128();
+
+ for (int i = 0; i < Vector128.Count; i++)
+ {
+ ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i);
+#if NET7_0_OR_GREATER
+ writeElement = vector[i] == 0 ? (byte)0 : (byte)1;
+#else
+ var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i);
+ writeElement = element == 0 ? (byte)0 : (byte)1;
+#endif
+ }
+
+ return output;
}
///
@@ -118,9 +132,7 @@ public static class IntrinsicUtility
///
///
/// Vector of byte to correct.
- /// A of which remapped back to 0 and 1 based on boolean truthiness.
- /// API avaliable on AVX2 hardware.
- /// Hardware doesn't support AVX2 instruction set.
+ /// A of which remapped back to 0 and 1 based on boolean truthiness.
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static Vector256 CorrectBoolean(Vector256 vector)
@@ -133,12 +145,20 @@ public static class IntrinsicUtility
return result;
}
- if (AdvSimd.IsSupported)
+ var output = GetUninitializedVector256();
+
+ for (int i = 0; i < Vector256.Count; i++)
{
- throw new PlatformNotSupportedException("Operation is not supported on ARM NEON instruction set.");
+ ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i);
+#if NET7_0_OR_GREATER
+ writeElement = vector[i] == 0 ? (byte)0 : (byte)1;
+#else
+ var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i);
+ writeElement = element == 0 ? (byte)0 : (byte)1;
+#endif
}
- throw new PlatformNotSupportedException("Unknown intrinsic instruction set.");
+ return output;
}
///
@@ -152,8 +172,6 @@ public static class IntrinsicUtility
/// Left vector.
/// Right vector.
/// A of whose elements is 64-bit truncated product of lhs and rhs.
- /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares.
- /// Hardware doesn't support SSE2 or ARM NEON instruction set.
[Pure]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
@@ -186,7 +204,15 @@ public static class IntrinsicUtility
return AdvSimd.MultiplyWideningLowerAndAdd(AdvSimd.ShiftLeftLogical(mul.AsUInt64(), 32), a, b);
}
- throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+ var output = GetUninitializedVector128();
+
+ Unsafe.As, ulong>(ref output) =
+ Unsafe.As, ulong>(ref lhs) * Unsafe.As, ulong>(ref rhs);
+
+ Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) =
+ Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), 1) * Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), 1);
+
+ return output;
}
///
@@ -202,8 +228,6 @@ public static class IntrinsicUtility
/// Left vector.
/// Right vector.
/// A of whose elements is 64-bit truncated product of lhs and rhs.
- /// API avaliable on AVX2 hardware.
- /// Hardware doesn't support AVX2 instruction set.
[Pure]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
@@ -224,7 +248,15 @@ public static class IntrinsicUtility
return Avx2.Add(high, ac);
}
- throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+ var output = GetUninitializedVector256();
+
+ for (int i = 0; i < Vector256.Count; i++)
+ {
+ Unsafe.Add(ref Unsafe.As, ulong>(ref output), i) =
+ Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), i) * Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), i);
+ }
+
+ return output;
}
///
@@ -238,8 +270,6 @@ public static class IntrinsicUtility
/// Left vector.
/// Right vector.
/// A of whose elements is 64-bit truncated product of lhs and rhs.
- /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares.
- /// Hardware doesn't support SSE2 or ARM NEON instruction set.
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static Vector128 Multiply(Vector128 lhs, Vector128 rhs)
@@ -260,8 +290,6 @@ public static class IntrinsicUtility
/// Left vector.
/// Right vector.
/// A of whose elements is 64-bit truncated product of lhs and rhs.
- /// API avaliable on AVX2 hardware.
- /// Hardware doesn't support AVX2 instruction set.
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static Vector256 Multiply(Vector256 lhs, Vector256 rhs)
@@ -282,8 +310,6 @@ public static class IntrinsicUtility
/// Left vector.
/// Right vector.
/// A of with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs.
- /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.
- /// Hardware doesn't support ARM64 NEON or SSE instruction set.
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs)
@@ -304,7 +330,21 @@ public static class IntrinsicUtility
return AdvSimd.Or(s1, s2);
}
- throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+ Vector128 output = GetUninitializedVector128();
+
+ Unsafe.As, uint>(ref output) =
+ Unsafe.As, uint>(ref lhs) | Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 1);
+
+ Unsafe.Add(ref Unsafe.As, uint>(ref output), 1) =
+ Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 2) | Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 3);
+
+ Unsafe.Add(ref Unsafe.As, uint>(ref output), 2) =
+ Unsafe.As, uint>(ref rhs) | Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 1);
+
+ Unsafe.Add(ref Unsafe.As, uint>(ref output), 3) =
+ Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 2) | Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 3);
+
+ return output;
}
///
@@ -374,10 +414,47 @@ public static class IntrinsicUtility
{
return Sse2.Shuffle(vector.AsDouble(), vector.AsDouble(), 0b01).AsUInt64();
}
-
- // No idea how to implement this in ARM NEON (Reason: Unavailable hardware)
- throw new PlatformNotSupportedException("Unsupported SIMD platform.");
+ Vector128 output = GetUninitializedVector128();
+
+ Unsafe.As, ulong>(ref output) = Unsafe.Add(ref Unsafe.As, ulong>(ref vector), 1);
+ Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) = Unsafe.As, ulong>(ref vector);
+
+ return output;
+ }
+
+ // Helper methods
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+ private static Vector64 GetUninitializedVector64() where T : struct
+ {
+#if NET6_0_OR_GREATER
+ Unsafe.SkipInit(out Vector64 output);
+ return output;
+#else
+ return default;
+#endif
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+ private static Vector128 GetUninitializedVector128() where T : struct
+ {
+#if NET6_0_OR_GREATER
+ Unsafe.SkipInit(out Vector128 output);
+ return output;
+#else
+ return default;
+#endif
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+ private static Vector256 GetUninitializedVector256() where T : struct
+ {
+#if NET6_0_OR_GREATER
+ Unsafe.SkipInit(out Vector256 output);
+ return output;
+#else
+ return default;
+#endif
}
}
diff --git a/X10D/src/Core/SpanExtensions.cs b/X10D/src/Core/SpanExtensions.cs
index 52da227..68bd05a 100644
--- a/X10D/src/Core/SpanExtensions.cs
+++ b/X10D/src/Core/SpanExtensions.cs
@@ -1,7 +1,6 @@
using System.Diagnostics.Contracts;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-using System.Numerics;
#if NETCOREAPP3_0_OR_GREATER
using X10D.Core;
@@ -112,13 +111,13 @@ public static class SpanExtensions
default:
#if NET7_0_OR_GREATER
throw new UnreachableException($"Enum with the size of {Unsafe.SizeOf()} bytes is unexpected.");
-#else // NET7_0_OR_GREATER
+#else
throw new ArgumentException($"Enum with the size of {Unsafe.SizeOf()} bytes is unexpected.");
-#endif // NET7_0_OR_GREATER
+#endif
}
#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
}
-#else // NET6_0_OR_GREATER
+#else
foreach (var it in span)
{
if (EqualityComparer.Default.Equals(it, value))
@@ -128,7 +127,7 @@ public static class SpanExtensions
}
return false;
-#endif // NET6_0_OR_GREATER
+#endif
}
///