From cbbfa6d947a70d62bfd039688de87fddfd87454f Mon Sep 17 00:00:00 2001 From: Oliver Booth Date: Mon, 3 Apr 2023 00:42:29 +0100 Subject: [PATCH] fix: fix SSE2 implementation of PackInt32 Credit due to @RealityProgrammer, thank you. --- X10D/src/Core/SpanExtensions.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/X10D/src/Core/SpanExtensions.cs b/X10D/src/Core/SpanExtensions.cs index 505f294..991b510 100644 --- a/X10D/src/Core/SpanExtensions.cs +++ b/X10D/src/Core/SpanExtensions.cs @@ -535,19 +535,18 @@ public static class SpanExtensions Vector128 multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); Vector128 shift1 = Sse2.ShiftRightLogical(multiply, 56); - shift1 = Sse2.ShiftLeftLogical(shift1, Vector128.Create(0UL, 8UL)); load = Sse2.LoadVector128((byte*)(pSource + 16)); correct = load.CorrectBoolean().AsUInt64(); multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); Vector128 shift2 = Sse2.ShiftRightLogical(multiply, 56); - shift2 = Sse2.ShiftLeftLogical(shift2, Vector128.Create(16UL, 24UL)); - Vector128 or1 = Sse2.Or(shift1, shift2); - Vector128 or2 = Sse2.Or(or1, or1.ReverseElements()); - - return (int)or2.GetElement(0); + ulong shift1Element0 = shift1.GetElement(0); + ulong shift1Element1 = (shift1.GetElement(1) << 8); + ulong shift2Element0 = (shift2.GetElement(0) << 16); + ulong shift2Element1 = (shift2.GetElement(1) << 24); + return (int)(shift1Element0 | shift1Element1 | shift2Element0 | shift2Element1); } } }