mirror of
https://github.com/oliverbooth/X10D
synced 2024-11-10 02:45:41 +00:00
Fix source validator's code reports, remove 95% of AdvSimd implementation to prevent future consequences
This commit is contained in:
parent
ec8e60c6dc
commit
8b8aeb3f56
@ -26,7 +26,7 @@ public static class BoolListExtensions
|
|||||||
throw new ArgumentNullException(nameof(source));
|
throw new ArgumentNullException(nameof(source));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (source.Count > 8)
|
if (source.Count > 8)
|
||||||
{
|
{
|
||||||
throw new ArgumentException("Source cannot contain more than than 8 elements.", nameof(source));
|
throw new ArgumentException("Source cannot contain more than than 8 elements.", nameof(source));
|
||||||
|
@ -47,7 +47,7 @@ public static class ByteExtensions
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FallbackImplementation(value, destination);
|
FallbackImplementation(value, destination);
|
||||||
|
|
||||||
#if NETCOREAPP3_0_OR_GREATER
|
#if NETCOREAPP3_0_OR_GREATER
|
||||||
|
@ -42,7 +42,7 @@ public static class Int32Extensions
|
|||||||
|
|
||||||
#if NETCOREAPP3_0_OR_GREATER
|
#if NETCOREAPP3_0_OR_GREATER
|
||||||
// TODO: AdvSimd support.
|
// TODO: AdvSimd support.
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/24225786/fastest-way-to-unpack-32-bits-to-a-32-byte-simd-vector
|
// https://stackoverflow.com/questions/24225786/fastest-way-to-unpack-32-bits-to-a-32-byte-simd-vector
|
||||||
if (Avx2.IsSupported)
|
if (Avx2.IsSupported)
|
||||||
{
|
{
|
||||||
@ -64,15 +64,15 @@ public static class Int32Extensions
|
|||||||
fixed (bool* pDestination = destination)
|
fixed (bool* pDestination = destination)
|
||||||
{
|
{
|
||||||
var mask1 = Vector256.Create(
|
var mask1 = Vector256.Create(
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||||||
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03
|
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03
|
||||||
).AsByte();
|
).AsByte();
|
||||||
var mask2 = Vector256.Create(
|
var mask2 = Vector256.Create(
|
||||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
||||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
||||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
||||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
|
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -81,7 +81,7 @@ public static class Int32Extensions
|
|||||||
var and = Avx2.AndNot(shuffle, mask2);
|
var and = Avx2.AndNot(shuffle, mask2);
|
||||||
var cmp = Avx2.CompareEqual(and, Vector256<byte>.Zero);
|
var cmp = Avx2.CompareEqual(and, Vector256<byte>.Zero);
|
||||||
var correctness = Avx2.And(cmp, Vector256.Create((byte)0x01));
|
var correctness = Avx2.And(cmp, Vector256.Create((byte)0x01));
|
||||||
|
|
||||||
Avx.Store((byte*)pDestination, correctness);
|
Avx.Store((byte*)pDestination, correctness);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -103,9 +103,9 @@ public static class Int32Extensions
|
|||||||
var and = Sse2.AndNot(shuffle, mask2);
|
var and = Sse2.AndNot(shuffle, mask2);
|
||||||
var cmp = Sse2.CompareEqual(and, Vector128<byte>.Zero);
|
var cmp = Sse2.CompareEqual(and, Vector128<byte>.Zero);
|
||||||
var correctness = Sse2.And(cmp, one);
|
var correctness = Sse2.And(cmp, one);
|
||||||
|
|
||||||
Sse2.Store((byte*)pDestination, correctness);
|
Sse2.Store((byte*)pDestination, correctness);
|
||||||
|
|
||||||
shuffle = Ssse3.Shuffle(vec, mask1Hi);
|
shuffle = Ssse3.Shuffle(vec, mask1Hi);
|
||||||
and = Sse2.AndNot(shuffle, mask2);
|
and = Sse2.AndNot(shuffle, mask2);
|
||||||
cmp = Sse2.CompareEqual(and, Vector128<byte>.Zero);
|
cmp = Sse2.CompareEqual(and, Vector128<byte>.Zero);
|
||||||
|
@ -5,7 +5,8 @@ using System.Runtime.Intrinsics;
|
|||||||
namespace X10D.Core;
|
namespace X10D.Core;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Extension methods for SIMD vectors, namely <see cref="Vector64{T}"/>, <see cref="Vector128{T}"/> and <see cref="Vector256{T}"/>.
|
/// Extension methods for SIMD vectors, namely <see cref="Vector64{T}"/>, <see cref="Vector128{T}"/> and
|
||||||
|
/// <see cref="Vector256{T}"/>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static class IntrinsicExtensions
|
public static class IntrinsicExtensions
|
||||||
{
|
{
|
||||||
|
@ -18,7 +18,9 @@ public static class IntrinsicUtility
|
|||||||
// FOR API CONSISTENCY.
|
// FOR API CONSISTENCY.
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Correcting <see cref="Vector64{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.</br>
|
/// <br>
|
||||||
|
/// Correcting <see cref="Vector64{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation (raw):</br>
|
/// <br>Operation (raw):</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// for (int i = 0; i < 8; i++) {
|
/// for (int i = 0; i < 8; i++) {
|
||||||
@ -33,19 +35,15 @@ public static class IntrinsicUtility
|
|||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="vector">Vector of byte to correct.</param>
|
/// <param name="vector">Vector of byte to correct.</param>
|
||||||
/// <returns>A <see cref="Vector64{T}"/> of <see langword="byte"/> which remapped back to 0 and 1 based on boolean truthiness.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector64{T}"/> of <see langword="byte"/> which remapped back to 0 and 1 based on boolean truthiness.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector64<byte> CorrectBoolean(Vector64<byte> vector)
|
public static Vector64<byte> CorrectBoolean(Vector64<byte> vector)
|
||||||
{
|
{
|
||||||
if (AdvSimd.IsSupported)
|
// TODO: AdvSimd implementation.
|
||||||
{
|
// TODO: WasmSimd implementation. (?)
|
||||||
// Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
|
|
||||||
var cmp = AdvSimd.CompareEqual(vector, Vector64<byte>.Zero);
|
|
||||||
var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64<byte>.Zero);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
var output = GetUninitializedVector64<byte>();
|
var output = GetUninitializedVector64<byte>();
|
||||||
|
|
||||||
@ -64,7 +62,9 @@ public static class IntrinsicUtility
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Correcting <see cref="Vector128{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.</br>
|
/// <br>
|
||||||
|
/// Correcting <see cref="Vector128{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation (raw):</br>
|
/// <br>Operation (raw):</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// for (int i = 0; i < 16; i++) {
|
/// for (int i = 0; i < 16; i++) {
|
||||||
@ -79,7 +79,9 @@ public static class IntrinsicUtility
|
|||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="vector">Vector of byte to correct.</param>
|
/// <param name="vector">Vector of byte to correct.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="byte"/> which remapped back to 0 and 1 based on boolean truthiness.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector128{T}"/> of <see langword="byte"/> which remapped back to 0 and 1 based on boolean truthiness.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector128<byte> CorrectBoolean(Vector128<byte> vector)
|
public static Vector128<byte> CorrectBoolean(Vector128<byte> vector)
|
||||||
@ -91,33 +93,25 @@ public static class IntrinsicUtility
|
|||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
if (AdvSimd.IsSupported)
|
|
||||||
{
|
|
||||||
// Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
|
|
||||||
var cmp = AdvSimd.CompareEqual(vector, Vector128<byte>.Zero);
|
|
||||||
var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128<byte>.Zero);
|
|
||||||
|
|
||||||
return result;
|
// TODO: AdvSimd implementation.
|
||||||
}
|
// TODO: WasmSimd implementation.
|
||||||
|
|
||||||
var output = GetUninitializedVector128<byte>();
|
var output = GetUninitializedVector128<byte>();
|
||||||
|
|
||||||
for (int i = 0; i < Vector128<byte>.Count; i++)
|
for (int i = 0; i < Vector128<byte>.Count; i++)
|
||||||
{
|
{
|
||||||
ref var writeElement = ref Unsafe.Add(ref Unsafe.As<Vector128<byte>, byte>(ref output), i);
|
Unsafe.Add(ref Unsafe.As<Vector128<byte>, byte>(ref output), i) =
|
||||||
#if NET7_0_OR_GREATER
|
Unsafe.Add(ref Unsafe.As<Vector128<byte>, byte>(ref vector), i) == 0 ? (byte)0 : (byte)1;
|
||||||
writeElement = vector[i] == 0 ? (byte)0 : (byte)1;
|
|
||||||
#else
|
|
||||||
var element = Unsafe.Add(ref Unsafe.As<Vector128<byte>, byte>(ref vector), i);
|
|
||||||
writeElement = element == 0 ? (byte)0 : (byte)1;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Correcting <see cref="Vector256{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.</br>
|
/// <br>
|
||||||
|
/// Correcting <see cref="Vector256{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation (raw):</br>
|
/// <br>Operation (raw):</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// for (int i = 0; i < 16; i++) {
|
/// for (int i = 0; i < 16; i++) {
|
||||||
@ -132,7 +126,9 @@ public static class IntrinsicUtility
|
|||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="vector">Vector of byte to correct.</param>
|
/// <param name="vector">Vector of byte to correct.</param>
|
||||||
/// <returns>A <see cref="Vector256{T}"/> of <see langword="byte"/> which remapped back to 0 and 1 based on boolean truthiness.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector256{T}"/> of <see langword="byte"/> which remapped back to 0 and 1 based on boolean truthiness.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector256<byte> CorrectBoolean(Vector256<byte> vector)
|
public static Vector256<byte> CorrectBoolean(Vector256<byte> vector)
|
||||||
@ -149,20 +145,17 @@ public static class IntrinsicUtility
|
|||||||
|
|
||||||
for (int i = 0; i < Vector256<byte>.Count; i++)
|
for (int i = 0; i < Vector256<byte>.Count; i++)
|
||||||
{
|
{
|
||||||
ref var writeElement = ref Unsafe.Add(ref Unsafe.As<Vector256<byte>, byte>(ref output), i);
|
Unsafe.Add(ref Unsafe.As<Vector256<byte>, byte>(ref output), i) =
|
||||||
#if NET7_0_OR_GREATER
|
Unsafe.Add(ref Unsafe.As<Vector256<byte>, byte>(ref vector), i) == 0 ? (byte)0 : (byte)1;
|
||||||
writeElement = vector[i] == 0 ? (byte)0 : (byte)1;
|
|
||||||
#else
|
|
||||||
var element = Unsafe.Add(ref Unsafe.As<Vector256<byte>, byte>(ref vector), i);
|
|
||||||
writeElement = element == 0 ? (byte)0 : (byte)1;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.</br>
|
/// <br>
|
||||||
|
/// Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = lhs[0] * rhs[0];
|
||||||
@ -171,7 +164,9 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector128{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
@ -191,32 +186,26 @@ public static class IntrinsicUtility
|
|||||||
|
|
||||||
return Sse2.Add(high, ac);
|
return Sse2.Add(high, ac);
|
||||||
}
|
}
|
||||||
if (AdvSimd.IsSupported)
|
|
||||||
{
|
|
||||||
// https://stackoverflow.com/questions/60236627/facing-problem-in-implementing-multiplication-of-64-bit-variables-using-arm-neon
|
|
||||||
|
|
||||||
// Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware)
|
// TODO: AdvSimd implementation.
|
||||||
var a = AdvSimd.ExtractNarrowingLower(lhs);
|
// TODO: WasmSimd implementation.
|
||||||
var b = AdvSimd.ExtractNarrowingLower(rhs);
|
|
||||||
|
|
||||||
var mul = AdvSimd.Multiply(rhs.AsUInt32(), AdvSimd.ReverseElement32(lhs).AsUInt32());
|
|
||||||
|
|
||||||
return AdvSimd.MultiplyWideningLowerAndAdd(AdvSimd.ShiftLeftLogical(mul.AsUInt64(), 32), a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
var output = GetUninitializedVector128<ulong>();
|
var output = GetUninitializedVector128<ulong>();
|
||||||
|
|
||||||
Unsafe.As<Vector128<ulong>, ulong>(ref output) =
|
Unsafe.As<Vector128<ulong>, ulong>(ref output) =
|
||||||
Unsafe.As<Vector128<ulong>, ulong>(ref lhs) * Unsafe.As<Vector128<ulong>, ulong>(ref rhs);
|
Unsafe.As<Vector128<ulong>, ulong>(ref lhs) * Unsafe.As<Vector128<ulong>, ulong>(ref rhs);
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref output), 1) =
|
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref output), 1) =
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref lhs), 1) * Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref rhs), 1);
|
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref lhs), 1) *
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector128<ulong>, ulong>(ref rhs), 1);
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.</br>
|
/// <br>
|
||||||
|
/// Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = lhs[0] * rhs[0];
|
||||||
@ -227,7 +216,9 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector256{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector256{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
@ -253,14 +244,17 @@ public static class IntrinsicUtility
|
|||||||
for (int i = 0; i < Vector256<ulong>.Count; i++)
|
for (int i = 0; i < Vector256<ulong>.Count; i++)
|
||||||
{
|
{
|
||||||
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref output), i) =
|
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref output), i) =
|
||||||
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref lhs), i) * Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref rhs), i);
|
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref lhs), i) *
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector256<ulong>, ulong>(ref rhs), i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.</br>
|
/// <br>
|
||||||
|
/// Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = lhs[0] * rhs[0];
|
||||||
@ -269,7 +263,9 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="long"/> whose elements is 64-bit truncated product of lhs and rhs.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector128{T}"/> of <see langword="long"/> whose elements is 64-bit truncated product of lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector128<long> Multiply(Vector128<long> lhs, Vector128<long> rhs)
|
public static Vector128<long> Multiply(Vector128<long> lhs, Vector128<long> rhs)
|
||||||
@ -278,7 +274,9 @@ public static class IntrinsicUtility
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.</br>
|
/// <br>
|
||||||
|
/// Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] * rhs[0];
|
/// dest[0] = lhs[0] * rhs[0];
|
||||||
@ -289,7 +287,9 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector256{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector256{T}"/> of <see langword="ulong"/> whose elements is 64-bit truncated product of lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector256<long> Multiply(Vector256<long> lhs, Vector256<long> rhs)
|
public static Vector256<long> Multiply(Vector256<long> lhs, Vector256<long> rhs)
|
||||||
@ -298,7 +298,10 @@ public static class IntrinsicUtility
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs.</br>
|
/// <br>
|
||||||
|
/// Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and
|
||||||
|
/// rhs.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] | lhs[1];
|
/// dest[0] = lhs[0] | lhs[1];
|
||||||
@ -309,7 +312,10 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="float"/> with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs.</returns>
|
/// <returns>
|
||||||
|
/// A <see cref="Vector128{T}"/> of <see langword="float"/> with all elements is result of OR operation on adjacent pairs of
|
||||||
|
/// elements in lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector128<float> HorizontalOr(Vector128<float> lhs, Vector128<float> rhs)
|
public static Vector128<float> HorizontalOr(Vector128<float> lhs, Vector128<float> rhs)
|
||||||
@ -321,34 +327,35 @@ public static class IntrinsicUtility
|
|||||||
|
|
||||||
return Sse.Or(s1, s2);
|
return Sse.Or(s1, s2);
|
||||||
}
|
}
|
||||||
if (AdvSimd.Arm64.IsSupported)
|
|
||||||
{
|
|
||||||
// Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware).
|
|
||||||
var s1 = AdvSimd.Arm64.UnzipEven(lhs, rhs);
|
|
||||||
var s2 = AdvSimd.Arm64.UnzipOdd(lhs, rhs);
|
|
||||||
|
|
||||||
return AdvSimd.Or(s1, s2);
|
// TODO: AdvSimd implementation.
|
||||||
}
|
// TODO: WasmSimd implementation. (?)
|
||||||
|
|
||||||
Vector128<float> output = GetUninitializedVector128<float>();
|
Vector128<float> output = GetUninitializedVector128<float>();
|
||||||
|
|
||||||
Unsafe.As<Vector128<float>, uint>(ref output) =
|
Unsafe.As<Vector128<float>, uint>(ref output) =
|
||||||
Unsafe.As<Vector128<float>, uint>(ref lhs) | Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 1);
|
Unsafe.As<Vector128<float>, uint>(ref lhs) |
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 1);
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 1) =
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 1) =
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 2) | Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 3);
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 2) |
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref lhs), 3);
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 2) =
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 2) =
|
||||||
Unsafe.As<Vector128<float>, uint>(ref rhs) | Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 1);
|
Unsafe.As<Vector128<float>, uint>(ref rhs) |
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 1);
|
||||||
|
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 3) =
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref output), 3) =
|
||||||
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 2) | Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 3);
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 2) |
|
||||||
|
Unsafe.Add(ref Unsafe.As<Vector128<float>, uint>(ref rhs), 3);
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.</br>
|
/// <br>
|
||||||
|
/// Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] | lhs[1];
|
/// dest[0] = lhs[0] | lhs[1];
|
||||||
@ -359,9 +366,10 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="int"/> with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs.</returns>
|
/// <returns>
|
||||||
/// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
|
/// A <see cref="Vector128{T}"/> of <see langword="int"/> with all elements is result of OR operation on adjacent pairs of
|
||||||
/// <exception cref="PlatformNotSupportedException">Hardware doesn't support ARM64 NEON or SSE instruction set.</exception>
|
/// elements in lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
public static Vector128<int> HorizontalOr(Vector128<int> lhs, Vector128<int> rhs)
|
public static Vector128<int> HorizontalOr(Vector128<int> lhs, Vector128<int> rhs)
|
||||||
@ -370,7 +378,9 @@ public static class IntrinsicUtility
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// <br>Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.</br>
|
/// <br>
|
||||||
|
/// Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.
|
||||||
|
/// </br>
|
||||||
/// <br>Operation:</br>
|
/// <br>Operation:</br>
|
||||||
/// <code>
|
/// <code>
|
||||||
/// dest[0] = lhs[0] | lhs[1];
|
/// dest[0] = lhs[0] | lhs[1];
|
||||||
@ -381,9 +391,10 @@ public static class IntrinsicUtility
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhs">Left vector.</param>
|
/// <param name="lhs">Left vector.</param>
|
||||||
/// <param name="rhs">Right vector.</param>
|
/// <param name="rhs">Right vector.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="uint"/> with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs.</returns>
|
/// <returns>
|
||||||
/// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
|
/// A <see cref="Vector128{T}"/> of <see langword="uint"/> with all elements is result of OR operation on adjacent pairs of
|
||||||
/// <exception cref="PlatformNotSupportedException">Hardware doesn't support ARM64 NEON or SSE2 instruction set.</exception>
|
/// elements in lhs and rhs.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
@ -402,9 +413,10 @@ public static class IntrinsicUtility
|
|||||||
/// </code>
|
/// </code>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="vector">Input vector.</param>
|
/// <param name="vector">Input vector.</param>
|
||||||
/// <returns>A <see cref="Vector128{T}"/> of <see langword="ulong"/> with elements the same as input vector except their positions/indices are reversed.</returns>
|
/// <returns>
|
||||||
/// <remarks>API available on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 hardwares.</remarks>
|
/// A <see cref="Vector128{T}"/> of <see langword="ulong"/> with elements the same as input vector except their positions
|
||||||
/// <exception cref="PlatformNotSupportedException">Hardware doesn't support SSE2 instruction set.</exception>
|
/// (or indices) are reversed.
|
||||||
|
/// </returns>
|
||||||
[Pure]
|
[Pure]
|
||||||
[CLSCompliant(false)]
|
[CLSCompliant(false)]
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
|
@ -72,13 +72,14 @@ public static class SpanExtensions
|
|||||||
public static bool Contains<T>(this ReadOnlySpan<T> span, T value) where T : struct, Enum
|
public static bool Contains<T>(this ReadOnlySpan<T> span, T value) where T : struct, Enum
|
||||||
{
|
{
|
||||||
#if NET6_0_OR_GREATER
|
#if NET6_0_OR_GREATER
|
||||||
// Use MemoryMarshal.CreateSpan instead of using creating new Span instance from pointer will trim down a lot of instructions
|
// Use MemoryMarshal.CreateSpan instead of using creating new Span instance from pointer will trim down a lot of
|
||||||
// on Release mode.
|
// instructions on Release mode.
|
||||||
// https://sharplab.io/#v2:EYLgxg9gTgpgtADwGwBYA0AXEBDAzgWwB8ABABgAJiBGAOgCUBXAOwwEt8YaBJFmKCAA4BlPgDdWYGLgDcAWABQZSrUYt2nAMIR8A1gBs+IqOMkyFxAExVzFIQAtsUAQBlsweszYc588wGZyGCYGfHIAFSkMAFFg0JByVhZyAG8FcnTyAEE0cgAhHI0cgBE0BQBfBX9KC3INFLSMgG0AKVYMAHEgvgkACgwATwEYCAAzHojcaNiASmmAXQb0xoBZGAw7CAATLh09HtX1rZ2BPQB5ATYIJlwaTIBzO9hcXFZRGB49RMS78kJyA4221250u11uDyeLzeIPYrAAXthQfNFpQAtQkORmLhsCMYORgBAIHp/mtAVQADxhAB8PSEAmwTEpVPIuHpTByYXIomwegYMGm5AA7nY+HjOfEYiF6vIMrLyLARgkkkEQrhyABeeUwRUAVWuOM4mVwlJyiQwNIVJPw0H6y0cuAcehonQwdG1oqYkh6rIZsx8coyxAA7FabXaoA6eTQNLBETA6QyepaVfhcDkfUwaM4gnd1tNo1cMNhErgenrsbjbsawqaWBbtVyeXy/SiKjKMiiWm1OkxumA+oNhmMJlMQrMFu2lgCjrt9qSZycYVcbvdHlIoe8mJ8mN9fiTDkDFxdWMvwWvnq8YDD8PDESemMjJ6jlBisQb8YTidPNhYmbS2UyLJshyja8vyQoirA4TkBKsTSgG6TBuQvaCuQCaMmaNLlgaVYAAoQGafBJg2qzWlAtr2o6zprG6uKwJ6MDemyszpmyWY5nmBYsMW1xlvqlZGiaSrmsRircmBLZPm2ZRAA===
|
|
||||||
|
|
||||||
// Also use reference instead of MemoryMarshal.Cast to remove boundary check (or something, it just result in something like that).
|
// Also use reference instead of MemoryMarshal.Cast to remove boundary check (or something, it just result in something
|
||||||
|
// like that).
|
||||||
|
|
||||||
// TODO: Figure out some kind of way to directly pass the Span directly into Contains call, which make method smaller and more prone to inlining...
|
// TODO: Figure out some kind of way to directly pass the Span directly into Contains call, which make method smaller and
|
||||||
|
// more prone to inlining...
|
||||||
unsafe
|
unsafe
|
||||||
{
|
{
|
||||||
#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
|
#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
|
||||||
@ -176,6 +177,10 @@ public static class SpanExtensions
|
|||||||
|
|
||||||
return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56));
|
return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Probably should remove this piece of code because it is untested, but I see no reason why it should fail
|
||||||
|
// unless vld1_u8 reverse positions of 8 bytes for some reason.
|
||||||
|
|
||||||
if (AdvSimd.IsSupported)
|
if (AdvSimd.IsSupported)
|
||||||
{
|
{
|
||||||
// Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware).
|
// Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||||
@ -240,12 +245,12 @@ public static class SpanExtensions
|
|||||||
goto default;
|
goto default;
|
||||||
}
|
}
|
||||||
|
|
||||||
fixed (bool* pSource = source)
|
// TODO: AdvSimd implementation.
|
||||||
{
|
// TODO: WasmSimd implementation.
|
||||||
// TODO: .NET 8.0 Wasm support.
|
|
||||||
// TODO: Implement a replacement for UInt64 vector multiplication (there are no instruction for this built-in).
|
|
||||||
|
|
||||||
if (Sse2.IsSupported)
|
if (Sse2.IsSupported)
|
||||||
|
{
|
||||||
|
fixed (bool* pSource = source)
|
||||||
{
|
{
|
||||||
var load = Sse2.LoadVector128((byte*)pSource);
|
var load = Sse2.LoadVector128((byte*)pSource);
|
||||||
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
||||||
@ -254,21 +259,9 @@ public static class SpanExtensions
|
|||||||
|
|
||||||
return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8));
|
return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8));
|
||||||
}
|
}
|
||||||
if (AdvSimd.IsSupported)
|
|
||||||
{
|
|
||||||
// Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware).
|
|
||||||
var load = AdvSimd.LoadVector128((byte*)pSource);
|
|
||||||
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
|
||||||
var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
|
|
||||||
var shift = AdvSimd.ShiftRightLogical(multiply, 56);
|
|
||||||
|
|
||||||
return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
goto default;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
goto default;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -324,9 +317,6 @@ public static class SpanExtensions
|
|||||||
|
|
||||||
fixed (bool* pSource = source)
|
fixed (bool* pSource = source)
|
||||||
{
|
{
|
||||||
// TODO: .NET 8.0 Wasm support.
|
|
||||||
// TODO: Implement a replacement for UInt64 vector multiplication (there are no instruction for this built-in).
|
|
||||||
|
|
||||||
if (Avx2.IsSupported)
|
if (Avx2.IsSupported)
|
||||||
{
|
{
|
||||||
var load = Avx.LoadVector256((byte*)pSource);
|
var load = Avx.LoadVector256((byte*)pSource);
|
||||||
|
Loading…
Reference in New Issue
Block a user