From b251f880ffcd6340045de717dfcff2cdef6efd93 Mon Sep 17 00:00:00 2001 From: RealityProgrammer Date: Wed, 8 Mar 2023 09:46:20 +0700 Subject: [PATCH] Optimize Rune.Repeat(int) when UTF8 sequence length is 1 or 2 and reformat some intrinsic code --- X10D/src/Core/IntrinsicExtensions.cs | 93 +------------- X10D/src/Core/IntrinsicUtility.cs | 176 ++++++++++++++++++++++++--- X10D/src/Core/SpanExtensions.cs | 93 +++++++++----- X10D/src/IO/ListOfByteExtensions.cs | 1 - X10D/src/Math/ByteExtensions.cs | 2 +- X10D/src/Text/RuneExtensions.cs | 45 +++++-- 6 files changed, 263 insertions(+), 147 deletions(-) diff --git a/X10D/src/Core/IntrinsicExtensions.cs b/X10D/src/Core/IntrinsicExtensions.cs index f7f0313..9e78dd1 100644 --- a/X10D/src/Core/IntrinsicExtensions.cs +++ b/X10D/src/Core/IntrinsicExtensions.cs @@ -1,15 +1,6 @@ #if NETCOREAPP3_0_OR_GREATER -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; + using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using System.Runtime.Intrinsics.Arm; -using System.Diagnostics.Contracts; -using System.Runtime.CompilerServices; -using System.Numerics; namespace X10D.Core; @@ -18,86 +9,6 @@ namespace X10D.Core; /// public static class IntrinsicExtensions { - /// - /// Correcting of into standard boolean values. - /// - /// Vector of byte to correct. - /// Corrected boolean in form of of bytes. - /// This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1. - [Pure] - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public static Vector64 CorrectBoolean(this Vector64 vector) - { - if (AdvSimd.IsSupported) - { - // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). - var cmp = AdvSimd.CompareEqual(vector, Vector64.Zero); - var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64.Zero); - - return result; - } - - if (Sse.IsSupported) - { - throw new PlatformNotSupportedException("Cannot correct boolean of Vector64 on SSE intrinsic set."); - } - - throw new PlatformNotSupportedException("Unknown Intrinsic platform."); - } - - /// - /// Correcting of into standard boolean values. - /// - /// Vector of byte to correct. - /// Corrected boolean in form of of bytes. - /// This method will ensure that every values can only be either 0 to represent and 1 to represent . Values of 0 will be kept, and others will be mapped back to 1. - [Pure] - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public static Vector128 CorrectBoolean(this Vector128 vector) - { - if (Sse2.IsSupported) - { - var cmp = Sse2.CompareEqual(vector, Vector128.Zero); - var result = Sse2.AndNot(cmp, Vector128.Create((byte)1)); - - return result; - } - else if (AdvSimd.IsSupported) - { - // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). - var cmp = AdvSimd.CompareEqual(vector, Vector128.Zero); - var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128.Zero); - - return result; - } - - throw new PlatformNotSupportedException("Unknown Intrinsic platform."); - } - - /// - /// Correcting of into standard boolean values. - /// - /// Vector of byte to correct. - /// Corrected boolean in form of of bytes. - /// This method will ensure that every value can only be 0 or 1. Values of 0 will be kept, and others will be set to 1. - [Pure] - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public static Vector256 CorrectBoolean(this Vector256 vector) - { - if (Avx2.IsSupported) - { - var cmp = Avx2.CompareEqual(vector, Vector256.Zero); - var result = Avx2.AndNot(cmp, Vector256.Create((byte)1)); - - return result; - } - - if (AdvSimd.IsSupported) - { - throw new PlatformNotSupportedException("Cannot correct boolean of Vector256 on ARM intrinsic set."); - } - - throw new PlatformNotSupportedException("Unknown Intrinsic platform."); - } + // Got nothing for now. } #endif diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs index 3524088..6776bbc 100644 --- a/X10D/src/Core/IntrinsicUtility.cs +++ b/X10D/src/Core/IntrinsicUtility.cs @@ -1,6 +1,5 @@ #if NETCOREAPP3_0_OR_GREATER -using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; @@ -14,6 +13,126 @@ namespace X10D.Core; /// public static class IntrinsicUtility { + // NOTE: + // ANYTHING OPERATION OPERATION ON ANYTHING THAT ISN'T FLOAT IS NOT SSE COMPATIBLE, MUST BE SSE2 AND BEYOND VERSION + // FOR API CONSISTENCY. + + /// + ///
Correcting of into 0 and 1 depend on their boolean truthiness.
+ ///
Operation (raw):
+ /// + /// for (int i = 0; i < 8; i++) { + /// dest[i] = ~(vector[i] == 0 ? 0xFF : 0x00) & 1; + /// } + /// + ///
Operation (simplified):
+ /// + /// for (int i = 0; i < 8; i++) { + /// dest[i] = vector[i] == 0 ? 0 : 1; + /// } + /// + ///
+ /// Vector of byte to correct. + /// + /// API avaliable on ARM NEON (untested) hardware. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector64 CorrectBoolean(Vector64 vector) + { + if (AdvSimd.IsSupported) + { + // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). + var cmp = AdvSimd.CompareEqual(vector, Vector64.Zero); + var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64.Zero); + + return result; + } + if (Sse.IsSupported) + { + throw new PlatformNotSupportedException("Cannot correct boolean of Vector64 on SSE intrinsic set."); + } + + throw new PlatformNotSupportedException("Unknown Intrinsic platform."); + } + + /// + ///
Correcting of into 0 and 1 depend on their boolean truthiness.
+ ///
Operation (raw):
+ /// + /// for (int i = 0; i < 16; i++) { + /// dest[i] = ~(vector[i] == 0 ? 0xFF : 0x00) & 1; + /// } + /// + ///
Operation (simplified):
+ /// + /// for (int i = 0; i < 16; i++) { + /// dest[i] = vector[i] == 0 ? 0 : 1; + /// } + /// + ///
+ /// Vector of byte to correct. + /// + /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM NEON (untested) hardwares. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 CorrectBoolean(Vector128 vector) + { + if (Sse2.IsSupported) + { + var cmp = Sse2.CompareEqual(vector, Vector128.Zero); + var result = Sse2.AndNot(cmp, Vector128.Create((byte)1)); + + return result; + } + if (AdvSimd.IsSupported) + { + // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). + var cmp = AdvSimd.CompareEqual(vector, Vector128.Zero); + var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128.Zero); + + return result; + } + + throw new PlatformNotSupportedException("Unknown Intrinsic platform."); + } + + /// + ///
Correcting of into 0 and 1 depend on their boolean truthiness.
+ ///
Operation (raw):
+ /// + /// for (int i = 0; i < 16; i++) { + /// dest[i] = ~(vector[i] == 0 ? 0xFF : 0x00) & 1; + /// } + /// + ///
Operation (simplified):
+ /// + /// for (int i = 0; i < 16; i++) { + /// dest[i] = vector[i] == 0 ? 0 : 1; + /// } + /// + ///
+ /// Vector of byte to correct. + /// + /// API avaliable on AVX2 hardware. + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector256 CorrectBoolean(Vector256 vector) + { + if (Avx2.IsSupported) + { + var cmp = Avx2.CompareEqual(vector, Vector256.Zero); + var result = Avx2.AndNot(cmp, Vector256.Create((byte)1)); + + return result; + } + if (AdvSimd.IsSupported) + { + throw new PlatformNotSupportedException("Cannot correct boolean of Vector256 on ARM intrinsic set."); + } + + throw new PlatformNotSupportedException("Unknown Intrinsic platform."); + } + /// ///
Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
///
Operation:
@@ -45,7 +164,7 @@ public static class IntrinsicUtility return Sse2.Add(high, ac); } - else if (AdvSimd.IsSupported) + if (AdvSimd.IsSupported) { // https://stackoverflow.com/questions/60236627/facing-problem-in-implementing-multiplication-of-64-bit-variables-using-arm-neon @@ -99,8 +218,8 @@ public static class IntrinsicUtility } /// - /// Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer. - /// Operation: + ///
Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.
+ ///
Operation:
/// /// dest[0] = lhs[0] * rhs[0]; /// dest[1] = lhs[1] * rhs[1]; @@ -139,8 +258,8 @@ public static class IntrinsicUtility } /// - /// Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs. - /// Operation: + ///
Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs.
+ ///
Operation:
/// /// dest[0] = lhs[0] | lhs[1]; /// dest[1] = lhs[2] | lhs[3]; @@ -158,12 +277,12 @@ public static class IntrinsicUtility { if (Sse.IsSupported) { - var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00); - var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01); + var s1 = Sse.Shuffle(lhs, rhs, 0b10_00_10_00); // s1 = { lhs[0] ; lhs[2] ; rhs[0] ; rhs[2] } + var s2 = Sse.Shuffle(lhs, rhs, 0b11_01_11_01); // s2 = { lhs[1] ; lhs[3] ; rhs[1] ; rhs[3] } return Sse.Or(s1, s2); } - else if (AdvSimd.Arm64.IsSupported) + if (AdvSimd.Arm64.IsSupported) { // Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware). var s1 = AdvSimd.Arm64.UnzipEven(lhs, rhs); @@ -176,8 +295,8 @@ public static class IntrinsicUtility } /// - /// Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs. - /// Operation: + ///
Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.
+ ///
Operation:
/// /// dest[0] = lhs[0] | lhs[1]; /// dest[1] = lhs[2] | lhs[3]; @@ -188,7 +307,7 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// - /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. + /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) @@ -197,8 +316,8 @@ public static class IntrinsicUtility } /// - /// Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs. - /// Operation: + ///
Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.
+ ///
Operation:
/// /// dest[0] = lhs[0] | lhs[1]; /// dest[1] = lhs[2] | lhs[3]; @@ -209,7 +328,7 @@ public static class IntrinsicUtility /// Left vector. /// Right vector. /// - /// API avaliable on SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. + /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] [CLSCompliant(false)] @@ -217,6 +336,33 @@ public static class IntrinsicUtility { return HorizontalOr(lhs.AsSingle(), rhs.AsSingle()).AsUInt32(); } + + /// + ///
Reverse position of 2 64-bit unsigned integer.
+ ///
Operation:
+ /// + /// ulong tmp = vector[0]; + /// vector[0] = vector[1]; + /// vector[1] = tmp; + /// + ///
+ /// Input vector. + /// + /// API available on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 hardwares. + [Pure] + [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static Vector128 ReverseElements(Vector128 vector) + { + if (Sse2.IsSupported) + { + return Sse2.Shuffle(vector.AsDouble(), vector.AsDouble(), 0b01).AsUInt64(); + } + + // No idea how to implement this in ARM NEON (Reason: Unavailable hardware) + + throw new PlatformNotSupportedException("Unsupported SIMD platform."); + } } #endif diff --git a/X10D/src/Core/SpanExtensions.cs b/X10D/src/Core/SpanExtensions.cs index b22d63d..a12e4bb 100644 --- a/X10D/src/Core/SpanExtensions.cs +++ b/X10D/src/Core/SpanExtensions.cs @@ -1,6 +1,7 @@ using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Numerics; #if NETCOREAPP3_0_OR_GREATER using X10D.Core; @@ -9,6 +10,10 @@ using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.Arm; #endif +#if NET7_0_OR_GREATER +using System.Diagnostics; +#endif + namespace X10D.Core; /// @@ -18,6 +23,20 @@ public static class SpanExtensions { #if NETCOREAPP3_0_OR_GREATER private const ulong IntegerPackingMagic = 0x0102040810204080; + private static Vector64 IntegerPackingMagicV64 + { + get => Vector64.Create(IntegerPackingMagic); + } + + private static Vector128 IntegerPackingMagicV128 + { + get => Vector128.Create(IntegerPackingMagic); + } + + private static Vector256 IntegerPackingMagicV256 + { + get => Vector256.Create(IntegerPackingMagic); + } #endif /// @@ -153,14 +172,18 @@ public static class SpanExtensions if (Sse2.IsSupported) { - var scalar = Sse2.LoadScalarVector128((ulong*)pSource).AsByte().CorrectBoolean().AsUInt64(); - return unchecked((byte)(IntegerPackingMagic * scalar.GetElement(0) >> 56)); + var load = Sse2.LoadScalarVector128((ulong*)pSource).AsByte(); + var correct = IntrinsicUtility.CorrectBoolean(load); + + return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56)); } - else if (AdvSimd.IsSupported) + if (AdvSimd.IsSupported) { // Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware). - var scalar = AdvSimd.LoadVector64((byte*)pSource).CorrectBoolean().AsUInt64(); - return unchecked((byte)(IntegerPackingMagic * scalar.GetElement(0) >> 56)); + var load = AdvSimd.LoadVector64((byte*)pSource); + var correct = IntrinsicUtility.CorrectBoolean(load); + + return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56)); } else { @@ -225,18 +248,22 @@ public static class SpanExtensions if (Sse2.IsSupported) { - var vector = Sse2.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64(); - var calc = Sse2.ShiftRightLogical(IntrinsicUtility.Multiply(Vector128.Create(IntegerPackingMagic), vector), 56); + var load = Sse2.LoadVector128((byte*)pSource); + var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); + var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); + var shift = Sse2.ShiftRightLogical(multiply, 56); - return (short)(calc.GetElement(0) | (calc.GetElement(1) << 8)); + return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8)); } - else if (AdvSimd.IsSupported) + if (AdvSimd.IsSupported) { // Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware). - var vector = AdvSimd.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64(); - var calc = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(Vector128.Create(IntegerPackingMagic), vector), 56); + var load = AdvSimd.LoadVector128((byte*)pSource); + var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); + var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); + var shift = AdvSimd.ShiftRightLogical(multiply, 56); - return (short)(calc.GetElement(0) | (calc.GetElement(1) << 8)); + return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8)); } else { @@ -303,10 +330,12 @@ public static class SpanExtensions if (Avx2.IsSupported) { - var vector = Avx.LoadVector256((byte*)pSource).CorrectBoolean().AsUInt64(); + var load = Avx.LoadVector256((byte*)pSource); + var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); - var calc = Avx2.ShiftRightLogical(IntrinsicUtility.Multiply(Vector256.Create(IntegerPackingMagic), vector), 56); - var shift = Avx2.ShiftLeftLogicalVariable(calc, Vector256.Create(0UL, 8, 16, 24)); + var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV256, correct); + var shift = Avx2.ShiftRightLogical(multiply, 56); + shift = Avx2.ShiftLeftLogicalVariable(shift, Vector256.Create(0UL, 8, 16, 24)); var p1 = Avx2.Permute4x64(shift, 0b10_11_00_01); var or1 = Avx2.Or(shift, p1); @@ -317,29 +346,33 @@ public static class SpanExtensions } if (Sse2.IsSupported) { - var vector1 = Sse2.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64(); - var vector2 = Sse2.LoadVector128((byte*)(pSource + 16)).CorrectBoolean().AsUInt64(); + var load = Sse2.LoadVector128((byte*)pSource); + var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); - var magic = Vector128.Create(IntegerPackingMagic); + var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); + var shift1 = Sse2.ShiftRightLogical(multiply, 56); + shift1 = Sse2.ShiftLeftLogical(shift1, Vector128.Create(0UL, 8UL)); - var calc1 = Sse2.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector1), 56); - var calc2 = Sse2.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector2), 56); + load = Sse2.LoadVector128((byte*)(pSource + 16)); + correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); - var shift1 = Sse2.ShiftLeftLogical(calc1, Vector128.Create(0UL, 8UL)); - var shift2 = Sse2.ShiftLeftLogical(calc2, Vector128.Create(16UL, 24UL)); + multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); + var shift2 = Sse2.ShiftRightLogical(multiply, 56); + shift2 = Sse2.ShiftLeftLogical(shift2, Vector128.Create(16UL, 24UL)); - return (int)(shift1.GetElement(0) | shift1.GetElement(1) | shift2.GetElement(0) | shift2.GetElement(1)); + var or1 = Sse2.Or(shift1, shift2); + var or2 = Sse2.Or(or1, IntrinsicUtility.ReverseElements(or1)); + + return (int)or2.GetElement(0); } - else if (AdvSimd.IsSupported) + if (AdvSimd.IsSupported) { // Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware). - var vector1 = AdvSimd.LoadVector128((byte*)pSource).CorrectBoolean().AsUInt64(); - var vector2 = AdvSimd.LoadVector128((byte*)(pSource + 16)).CorrectBoolean().AsUInt64(); + var vector1 = IntrinsicUtility.CorrectBoolean(AdvSimd.LoadVector128((byte*)pSource)).AsUInt64(); + var vector2 = IntrinsicUtility.CorrectBoolean(AdvSimd.LoadVector128((byte*)(pSource + 16))).AsUInt64(); - var magic = Vector128.Create(IntegerPackingMagic); - - var calc1 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector1), 56); - var calc2 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(magic, vector2), 56); + var calc1 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(IntegerPackingMagicV128, vector1), 56); + var calc2 = AdvSimd.ShiftRightLogical(IntrinsicUtility.Multiply(IntegerPackingMagicV128, vector2), 56); var shift1 = AdvSimd.ShiftLogical(calc1, Vector128.Create(0, 8)); var shift2 = AdvSimd.ShiftLogical(calc2, Vector128.Create(16, 24)); diff --git a/X10D/src/IO/ListOfByteExtensions.cs b/X10D/src/IO/ListOfByteExtensions.cs index d9fd480..d48e8e5 100644 --- a/X10D/src/IO/ListOfByteExtensions.cs +++ b/X10D/src/IO/ListOfByteExtensions.cs @@ -158,7 +158,6 @@ public static class ListOfByteExtensions throw new ArgumentNullException(nameof(source)); } #endif - return BitConverter.ToInt64(source.ToArray(), startIndex); } diff --git a/X10D/src/Math/ByteExtensions.cs b/X10D/src/Math/ByteExtensions.cs index 310c4e4..0ec06f4 100644 --- a/X10D/src/Math/ByteExtensions.cs +++ b/X10D/src/Math/ByteExtensions.cs @@ -9,7 +9,7 @@ namespace X10D.Math; public static class ByteExtensions { /// - /// Computes the digital root of this 16-bit integer. + /// Computes the digital root of this 8-bit integer. /// /// The value whose digital root to compute. /// The digital root of . diff --git a/X10D/src/Text/RuneExtensions.cs b/X10D/src/Text/RuneExtensions.cs index 0249fa3..7bb1684 100644 --- a/X10D/src/Text/RuneExtensions.cs +++ b/X10D/src/Text/RuneExtensions.cs @@ -1,6 +1,8 @@ #if NETCOREAPP3_0_OR_GREATER +using System; using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; namespace X10D.Text; @@ -44,17 +46,42 @@ public static class RuneExtensions return value.ToString(); } - int utf8SequenceLength = value.Utf8SequenceLength; - Span utf8 = stackalloc byte[utf8SequenceLength]; - value.EncodeToUtf8(utf8); - - Span buffer = stackalloc byte[utf8.Length * count]; - for (var index = 0; index < count; index++) + // Helpful documentation: https://en.wikipedia.org/wiki/UTF-8 + switch (value.Utf8SequenceLength) { - utf8.CopyTo(buffer.Slice(index * utf8.Length, utf8.Length)); - } + case 1: + { + Unsafe.SkipInit(out byte bytes); + value.EncodeToUtf8(MemoryMarshal.CreateSpan(ref bytes, 1)); - return Encoding.UTF8.GetString(buffer); + return new string((char)value.Value, count); + } + + case 2: + { + Span bytes = stackalloc byte[2]; + value.EncodeToUtf8(bytes); + + return new string(Encoding.UTF8.GetString(bytes)[0], count); + } + + default: + { + int utf8SequenceLength = value.Utf8SequenceLength; + Span utf8 = stackalloc byte[utf8SequenceLength]; + value.EncodeToUtf8(utf8); + + // Limit to maximum 1024 bytes stack allocation (Rune.Utf8SequenceLength return value in range of [1; 4]) + Span buffer = count <= 256 ? stackalloc byte[utf8.Length * count] : new byte[utf8.Length * count]; + + for (var index = 0; index < count; index++) + { + utf8.CopyTo(buffer.Slice(index * utf8.Length, utf8.Length)); + } + + return Encoding.UTF8.GetString(buffer); + } + } } } #endif