mirror of
https://github.com/oliverbooth/X10D
synced 2024-11-23 00:38:47 +00:00
Optimize Rune.Repeat(int) when UTF8 sequence length is 1 or 2 and reformat some intrinsic code
This commit is contained in:
parent
e176f65e97
commit
b251f880ff
@ -1,15 +1,6 @@
|
||||
#if NETCOREAPP3_0_OR_GREATER
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
using System.Diagnostics.Contracts;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Numerics;
|
||||
|
||||
namespace X10D.Core;
|
||||
|
||||
@ -18,86 +9,6 @@ namespace X10D.Core;
|
||||
/// </summary>
|
||||
public static class IntrinsicExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Correcting <see cref="Vector64{T}"/> of <see langword="byte"/> into standard boolean values.
|
||||
/// </summary>
|
||||
/// <param name="vector">Vector of byte to correct.</param>
|
||||
/// <returns>Corrected boolean in form of <see cref="Vector64{T}"/> of bytes.</returns>
|
||||
/// <remarks>This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector64<byte> CorrectBoolean(this Vector64<byte> vector)
|
||||
{
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var cmp = AdvSimd.CompareEqual(vector, Vector64<byte>.Zero);
|
||||
var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64<byte>.Zero);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (Sse.IsSupported)
|
||||
{
|
||||
throw new PlatformNotSupportedException("Cannot correct boolean of Vector64<byte> on SSE intrinsic set.");
|
||||
}
|
||||
|
||||
throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Correcting <see cref="Vector128{T}"/> of <see langword="byte"/> into standard boolean values.
|
||||
/// </summary>
|
||||
/// <param name="vector">Vector of byte to correct.</param>
|
||||
/// <returns>Corrected boolean in form of <see cref="Vector128{T}"/> of bytes.</returns>
|
||||
/// <remarks>This method will ensure that every values can only be either 0 to represent <see langword="false"/> and 1 to represent <see langword="true"/>. Values of 0 will be kept, and others will be mapped back to 1.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector128<byte> CorrectBoolean(this Vector128<byte> vector)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
var cmp = Sse2.CompareEqual(vector, Vector128<byte>.Zero);
|
||||
var result = Sse2.AndNot(cmp, Vector128.Create((byte)1));
|
||||
|
||||
return result;
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var cmp = AdvSimd.CompareEqual(vector, Vector128<byte>.Zero);
|
||||
var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128<byte>.Zero);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Correcting <see cref="Vector256{T}"/> of <see langword="byte"/> into standard boolean values.
|
||||
/// </summary>
|
||||
/// <param name="vector">Vector of byte to correct.</param>
|
||||
/// <returns>Corrected boolean in form of <see cref="Vector256{T}"/> of bytes.</returns>
|
||||
/// <remarks>This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector256<byte> CorrectBoolean(this Vector256<byte> vector)
|
||||
{
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
var cmp = Avx2.CompareEqual(vector, Vector256<byte>.Zero);
|
||||
var result = Avx2.AndNot(cmp, Vector256.Create((byte)1));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
throw new PlatformNotSupportedException("Cannot correct boolean of Vector256<byte> on ARM intrinsic set.");
|
||||
}
|
||||
|
||||
throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
|
||||
}
|
||||
// Got nothing for now.
|
||||
}
|
||||
#endif
|
||||
|
@ -1,6 +1,5 @@
|
||||
#if NETCOREAPP3_0_OR_GREATER
|
||||
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Diagnostics.Contracts;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
@ -14,6 +13,126 @@ namespace X10D.Core;
|
||||
/// </summary>
|
||||
public static class IntrinsicUtility
|
||||
{
|
||||
// NOTE:
|
||||
// ANYTHING OPERATION OPERATION ON ANYTHING THAT ISN'T FLOAT IS NOT SSE COMPATIBLE, MUST BE SSE2 AND BEYOND VERSION
|
||||
// FOR API CONSISTENCY.
|
||||
|
||||
/// <summary>
|
||||
/// <br>Correcting <see cref="Vector64{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.</br>
|
||||
/// <br>Operation (raw):</br>
|
||||
/// <code>
|
||||
/// for (int i = 0; i < 8; i++) {
|
||||
/// dest[i] = ~(vector[i] == 0 ? 0xFF : 0x00) & 1;
|
||||
/// }
|
||||
/// </code>
|
||||
/// <br>Operation (simplified):</br>
|
||||
/// <code>
|
||||
/// for (int i = 0; i < 8; i++) {
|
||||
/// dest[i] = vector[i] == 0 ? 0 : 1;
|
||||
/// }
|
||||
/// </code>
|
||||
/// </summary>
|
||||
/// <param name="vector">Vector of byte to correct.</param>
|
||||
/// <returns></returns>
|
||||
/// <remarks>API avaliable on ARM NEON (untested) hardware.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector64<byte> CorrectBoolean(Vector64<byte> vector)
|
||||
{
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var cmp = AdvSimd.CompareEqual(vector, Vector64<byte>.Zero);
|
||||
var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64<byte>.Zero);
|
||||
|
||||
return result;
|
||||
}
|
||||
if (Sse.IsSupported)
|
||||
{
|
||||
throw new PlatformNotSupportedException("Cannot correct boolean of Vector64<byte> on SSE intrinsic set.");
|
||||
}
|
||||
|
||||
throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <br>Correcting <see cref="Vector128{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.</br>
|
||||
/// <br>Operation (raw):</br>
|
||||
/// <code>
|
||||
/// for (int i = 0; i < 16; i++) {
|
||||
/// dest[i] = ~(vector[i] == 0 ? 0xFF : 0x00) & 1;
|
||||
/// }
|
||||
/// </code>
|
||||
/// <br>Operation (simplified):</br>
|
||||
/// <code>
|
||||
/// for (int i = 0; i < 16; i++) {
|
||||
/// dest[i] = vector[i] == 0 ? 0 : 1;
|
||||
/// }
|
||||
/// </code>
|
||||
/// </summary>
|
||||
/// <param name="vector">Vector of byte to correct.</param>
|
||||
/// <returns></returns>
|
||||
/// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector128<byte> CorrectBoolean(Vector128<byte> vector)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
var cmp = Sse2.CompareEqual(vector, Vector128<byte>.Zero);
|
||||
var result = Sse2.AndNot(cmp, Vector128.Create((byte)1));
|
||||
|
||||
return result;
|
||||
}
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Haven't tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var cmp = AdvSimd.CompareEqual(vector, Vector128<byte>.Zero);
|
||||
var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128<byte>.Zero);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <br>Correcting <see cref="Vector256{T}"/> of <see langword="byte"/> into 0 and 1 depend on their boolean truthiness.</br>
|
||||
/// <br>Operation (raw):</br>
|
||||
/// <code>
|
||||
/// for (int i = 0; i < 16; i++) {
|
||||
/// dest[i] = ~(vector[i] == 0 ? 0xFF : 0x00) & 1;
|
||||
/// }
|
||||
/// </code>
|
||||
/// <br>Operation (simplified):</br>
|
||||
/// <code>
|
||||
/// for (int i = 0; i < 16; i++) {
|
||||
/// dest[i] = vector[i] == 0 ? 0 : 1;
|
||||
/// }
|
||||
/// </code>
|
||||
/// </summary>
|
||||
/// <param name="vector">Vector of byte to correct.</param>
|
||||
/// <returns></returns>
|
||||
/// <remarks>API avaliable on AVX2 hardware.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector256<byte> CorrectBoolean(Vector256<byte> vector)
|
||||
{
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
var cmp = Avx2.CompareEqual(vector, Vector256<byte>.Zero);
|
||||
var result = Avx2.AndNot(cmp, Vector256.Create((byte)1));
|
||||
|
||||
return result;
|
||||
}
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
throw new PlatformNotSupportedException("Cannot correct boolean of Vector256<byte> on ARM intrinsic set.");
|
||||
}
|
||||
|
||||
throw new PlatformNotSupportedException("Unknown Intrinsic platform.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <br>Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.</br>
|
||||
/// <br>Operation:</br>
|
||||
@ -45,7 +164,7 @@ public static class IntrinsicUtility
|
||||
|
||||
return Sse2.Add(high, ac);
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// https://stackoverflow.com/questions/60236627/facing-problem-in-implementing-multiplication-of-64-bit-variables-using-arm-neon
|
||||
|
||||
@ -99,8 +218,8 @@ public static class IntrinsicUtility
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <para>Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.</para>
|
||||
/// <para>Operation:</para>
|
||||
/// <br>Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.</br>
|
||||
/// <br>Operation:</br>
|
||||
/// <code>
|
||||
/// dest[0] = lhs[0] * rhs[0];
|
||||
/// dest[1] = lhs[1] * rhs[1];
|
||||
@ -139,8 +258,8 @@ public static class IntrinsicUtility
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <para>Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs.</para>
|
||||
/// <para>Operation:</para>
|
||||
/// <br>Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs.</br>
|
||||
/// <br>Operation:</br>
|
||||
/// <code>
|
||||
/// dest[0] = lhs[0] | lhs[1];
|
||||
/// dest[1] = lhs[2] | lhs[3];
|
||||
@ -158,12 +277,12 @@ public static class IntrinsicUtility
|
||||
{
|
||||
if (Sse.IsSupported)
|
||||
{
|
||||
var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00);
|
||||
var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01);
|
||||
var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00); // s1 = { lhs[0] ; lhs[2] ; rhs[0] ; rhs[2] }
|
||||
var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01); // s2 = { lhs[1] ; lhs[3] ; rhs[1] ; rhs[3] }
|
||||
|
||||
return Sse.Or(s1, s2);
|
||||
}
|
||||
else if (AdvSimd.Arm64.IsSupported)
|
||||
if (AdvSimd.Arm64.IsSupported)
|
||||
{
|
||||
// Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware).
|
||||
var s1 = AdvSimd.Arm64.UnzipEven(lhs, rhs);
|
||||
@ -176,8 +295,8 @@ public static class IntrinsicUtility
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <para>Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.</para>
|
||||
/// <para>Operation:</para>
|
||||
/// <br>Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.</br>
|
||||
/// <br>Operation:</br>
|
||||
/// <code>
|
||||
/// dest[0] = lhs[0] | lhs[1];
|
||||
/// dest[1] = lhs[2] | lhs[3];
|
||||
@ -188,7 +307,7 @@ public static class IntrinsicUtility
|
||||
/// <param name="lhs">Left vector.</param>
|
||||
/// <param name="rhs">Right vector.</param>
|
||||
/// <returns></returns>
|
||||
/// <remarks>API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
|
||||
/// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector128<int> HorizontalOr(Vector128<int> lhs, Vector128<int> rhs)
|
||||
@ -197,8 +316,8 @@ public static class IntrinsicUtility
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <para>Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.</para>
|
||||
/// <para>Operation:</para>
|
||||
/// <br>Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.</br>
|
||||
/// <br>Operation:</br>
|
||||
/// <code>
|
||||
/// dest[0] = lhs[0] | lhs[1];
|
||||
/// dest[1] = lhs[2] | lhs[3];
|
||||
@ -209,7 +328,7 @@ public static class IntrinsicUtility
|
||||
/// <param name="lhs">Left vector.</param>
|
||||
/// <param name="rhs">Right vector.</param>
|
||||
/// <returns></returns>
|
||||
/// <remarks>API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
|
||||
/// <remarks>API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares.</remarks>
|
||||
[Pure]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
[CLSCompliant(false)]
|
||||
@ -217,6 +336,33 @@ public static class IntrinsicUtility
|
||||
{
|
||||
return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsUInt32();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <br>Reverse position of 2 64-bit unsigned integer.</br>
|
||||
/// <br>Operation:</br>
|
||||
/// <code>
|
||||
/// ulong tmp = vector[0];
|
||||
/// vector[0] = vector[1];
|
||||
/// vector[1] = tmp;
|
||||
/// </code>
|
||||
/// </summary>
|
||||
/// <param name="vector">Input vector.</param>
|
||||
/// <returns></returns>
|
||||
/// <remarks>API available on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 hardwares.</remarks>
|
||||
[Pure]
|
||||
[CLSCompliant(false)]
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static Vector128<ulong> ReverseElements(Vector128<ulong> vector)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
return Sse2.Shuffle(vector.AsDouble(), vector.AsDouble(), 0b01).AsUInt64();
|
||||
}
|
||||
|
||||
// No idea how to implement this in ARM NEON (Reason: Unavailable hardware)
|
||||
|
||||
throw new PlatformNotSupportedException("Unsupported SIMD platform.");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,7 @@
|
||||
using System.Diagnostics.Contracts;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Numerics;
|
||||
|
||||
#if NETCOREAPP3_0_OR_GREATER
|
||||
using X10D.Core;
|
||||
@ -9,6 +10,10 @@ using System.Runtime.Intrinsics.X86;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
#endif
|
||||
|
||||
#if NET7_0_OR_GREATER
|
||||
using System.Diagnostics;
|
||||
#endif
|
||||
|
||||
namespace X10D.Core;
|
||||
|
||||
/// <summary>
|
||||
@ -18,6 +23,20 @@ public static class SpanExtensions
|
||||
{
|
||||
#if NETCOREAPP3_0_OR_GREATER
|
||||
private const ulong IntegerPackingMagic = 0x0102040810204080;
|
||||
private static Vector64<ulong> IntegerPackingMagicV64
|
||||
{
|
||||
get => Vector64.Create(IntegerPackingMagic);
|
||||
}
|
||||
|
||||
private static Vector128<ulong> IntegerPackingMagicV128
|
||||
{
|
||||
get => Vector128.Create(IntegerPackingMagic);
|
||||
}
|
||||
|
||||
private static Vector256<ulong> IntegerPackingMagicV256
|
||||
{
|
||||
get => Vector256.Create(IntegerPackingMagic);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
@ -153,14 +172,18 @@ public static class SpanExtensions
|
||||
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
var scalar = Sse2.LoadScalarVector128((ulong*)pSource).AsByte().CorrectBoolean().AsUInt64();
|
||||
return unchecked((byte)(IntegerPackingMagic * scalar.GetElement(0) >> 56));
|
||||
var load = Sse2.LoadScalarVector128((ulong*)pSource).AsByte();
|
||||
var correct = IntrinsicUtility.CorrectBoolean(load);
|
||||
|
||||
return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56));
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var scalar = AdvSimd.LoadVector64((byte*)pSource).CorrectBoolean().AsUInt64();
|
||||
return unchecked((byte)(IntegerPackingMagic * scalar.GetElement(0) >> 56));
|
||||
var load = AdvSimd.LoadVector64((byte*)pSource);
|
||||
var correct = IntrinsicUtility.CorrectBoolean(load);
|
||||
|
||||
return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -225,18 +248,22 @@ public static class SpanExtensions
|
||||
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
var vector = Sse2.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64();
|
||||
var calc = Sse2.ShiftRightLogical(IntrinsicUtility.Multiply(Vector128.Create(IntegerPackingMagic), vector), 56);
|
||||
var load = Sse2.LoadVector128((byte*)pSource);
|
||||
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
||||
var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
|
||||
var shift = Sse2.ShiftRightLogical(multiply, 56);
|
||||
|
||||
return (short)(calc.GetElement(0) | (calc.GetElement(1) << 8));
|
||||
return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8));
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var vector = AdvSimd.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64();
|
||||
var calc = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(Vector128.Create(IntegerPackingMagic), vector), 56);
|
||||
var load = AdvSimd.LoadVector128((byte*)pSource);
|
||||
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
||||
var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
|
||||
var shift = AdvSimd.ShiftRightLogical(multiply, 56);
|
||||
|
||||
return (short)(calc.GetElement(0) | (calc.GetElement(1) << 8));
|
||||
return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -303,10 +330,12 @@ public static class SpanExtensions
|
||||
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
var vector = Avx.LoadVector256((byte*)pSource).CorrectBoolean().AsUInt64();
|
||||
var load = Avx.LoadVector256((byte*)pSource);
|
||||
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
||||
|
||||
var calc = Avx2.ShiftRightLogical(IntrinsicUtility.Multiply(Vector256.Create(IntegerPackingMagic), vector), 56);
|
||||
var shift = Avx2.ShiftLeftLogicalVariable(calc, Vector256.Create(0UL, 8, 16, 24));
|
||||
var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV256, correct);
|
||||
var shift = Avx2.ShiftRightLogical(multiply, 56);
|
||||
shift = Avx2.ShiftLeftLogicalVariable(shift, Vector256.Create(0UL, 8, 16, 24));
|
||||
|
||||
var p1 = Avx2.Permute4x64(shift, 0b10_11_00_01);
|
||||
var or1 = Avx2.Or(shift, p1);
|
||||
@ -317,29 +346,33 @@ public static class SpanExtensions
|
||||
}
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
var vector1 = Sse2.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64();
|
||||
var vector2 = Sse2.LoadVector128((byte*)(pSource + 16)).CorrectBoolean().AsUInt64();
|
||||
var load = Sse2.LoadVector128((byte*)pSource);
|
||||
var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
||||
|
||||
var magic = Vector128.Create(IntegerPackingMagic);
|
||||
var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
|
||||
var shift1 = Sse2.ShiftRightLogical(multiply, 56);
|
||||
shift1 = Sse2.ShiftLeftLogical(shift1, Vector128.Create(0UL, 8UL));
|
||||
|
||||
var calc1 = Sse2.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector1), 56);
|
||||
var calc2 = Sse2.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector2), 56);
|
||||
load = Sse2.LoadVector128((byte*)(pSource + 16));
|
||||
correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64();
|
||||
|
||||
var shift1 = Sse2.ShiftLeftLogical(calc1, Vector128.Create(0UL, 8UL));
|
||||
var shift2 = Sse2.ShiftLeftLogical(calc2, Vector128.Create(16UL, 24UL));
|
||||
multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
|
||||
var shift2 = Sse2.ShiftRightLogical(multiply, 56);
|
||||
shift2 = Sse2.ShiftLeftLogical(shift2, Vector128.Create(16UL, 24UL));
|
||||
|
||||
return (int)(shift1.GetElement(0) | shift1.GetElement(1) | shift2.GetElement(0) | shift2.GetElement(1));
|
||||
var or1 = Sse2.Or(shift1, shift2);
|
||||
var or2 = Sse2.Or(or1, IntrinsicUtility.ReverseElements(or1));
|
||||
|
||||
return (int)or2.GetElement(0);
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
if (AdvSimd.IsSupported)
|
||||
{
|
||||
// Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware).
|
||||
var vector1 = AdvSimd.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64();
|
||||
var vector2 = AdvSimd.LoadVector128((byte*)(pSource + 16)).CorrectBoolean().AsUInt64();
|
||||
var vector1 = IntrinsicUtility.CorrectBoolean(AdvSimd.LoadVector128((byte*)pSource)).AsUInt64();
|
||||
var vector2 = IntrinsicUtility.CorrectBoolean(AdvSimd.LoadVector128((byte*)(pSource + 16))).AsUInt64();
|
||||
|
||||
var magic = Vector128.Create(IntegerPackingMagic);
|
||||
|
||||
var calc1 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector1), 56);
|
||||
var calc2 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector2), 56);
|
||||
var calc1 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(IntegerPackingMagicV128, vector1), 56);
|
||||
var calc2 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(IntegerPackingMagicV128, vector2), 56);
|
||||
|
||||
var shift1 = AdvSimd.ShiftLogical(calc1, Vector128.Create(0, 8));
|
||||
var shift2 = AdvSimd.ShiftLogical(calc2, Vector128.Create(16, 24));
|
||||
|
@ -158,7 +158,6 @@ public static class ListOfByteExtensions
|
||||
throw new ArgumentNullException(nameof(source));
|
||||
}
|
||||
#endif
|
||||
|
||||
return BitConverter.ToInt64(source.ToArray(), startIndex);
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ namespace X10D.Math;
|
||||
public static class ByteExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes the digital root of this 16-bit integer.
|
||||
/// Computes the digital root of this 8-bit integer.
|
||||
/// </summary>
|
||||
/// <param name="value">The value whose digital root to compute.</param>
|
||||
/// <returns>The digital root of <paramref name="value" />.</returns>
|
||||
|
@ -1,6 +1,8 @@
|
||||
#if NETCOREAPP3_0_OR_GREATER
|
||||
using System;
|
||||
using System.Diagnostics.Contracts;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace X10D.Text;
|
||||
@ -44,11 +46,34 @@ public static class RuneExtensions
|
||||
return value.ToString();
|
||||
}
|
||||
|
||||
// Helpful documentation: https://en.wikipedia.org/wiki/UTF-8
|
||||
switch (value.Utf8SequenceLength)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
Unsafe.SkipInit(out byte bytes);
|
||||
value.EncodeToUtf8(MemoryMarshal.CreateSpan(ref bytes, 1));
|
||||
|
||||
return new string((char)value.Value, count);
|
||||
}
|
||||
|
||||
case 2:
|
||||
{
|
||||
Span<byte> bytes = stackalloc byte[2];
|
||||
value.EncodeToUtf8(bytes);
|
||||
|
||||
return new string(Encoding.UTF8.GetString(bytes)[0], count);
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
int utf8SequenceLength = value.Utf8SequenceLength;
|
||||
Span<byte> utf8 = stackalloc byte[utf8SequenceLength];
|
||||
value.EncodeToUtf8(utf8);
|
||||
|
||||
Span<byte> buffer = stackalloc byte[utf8.Length * count];
|
||||
// Limit to maximum 1024 bytes stack allocation (Rune.Utf8SequenceLength return value in range of [1; 4])
|
||||
Span<byte> buffer = count <= 256 ? stackalloc byte[utf8.Length * count] : new byte[utf8.Length * count];
|
||||
|
||||
for (var index = 0; index < count; index++)
|
||||
{
|
||||
utf8.CopyTo(buffer.Slice(index * utf8.Length, utf8.Length));
|
||||
@ -57,4 +82,6 @@ public static class RuneExtensions
|
||||
return Encoding.UTF8.GetString(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user