1
0
mirror of https://github.com/oliverbooth/X10D synced 2024-11-09 23:25:43 +00:00

fix: fix SSE2 implementation of PackInt32

Credit due to @RealityProgrammer, thank you.
This commit is contained in:
Oliver Booth 2023-04-03 00:42:29 +01:00
parent 92855ddbab
commit cbbfa6d947
No known key found for this signature in database
GPG Key ID: 20BEB9DC87961025

View File

@ -535,19 +535,18 @@ public static class SpanExtensions
Vector128<ulong> multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); Vector128<ulong> multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
Vector128<ulong> shift1 = Sse2.ShiftRightLogical(multiply, 56); Vector128<ulong> shift1 = Sse2.ShiftRightLogical(multiply, 56);
shift1 = Sse2.ShiftLeftLogical(shift1, Vector128.Create(0UL, 8UL));
load = Sse2.LoadVector128((byte*)(pSource + 16)); load = Sse2.LoadVector128((byte*)(pSource + 16));
correct = load.CorrectBoolean().AsUInt64(); correct = load.CorrectBoolean().AsUInt64();
multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct);
Vector128<ulong> shift2 = Sse2.ShiftRightLogical(multiply, 56); Vector128<ulong> shift2 = Sse2.ShiftRightLogical(multiply, 56);
shift2 = Sse2.ShiftLeftLogical(shift2, Vector128.Create(16UL, 24UL));
Vector128<ulong> or1 = Sse2.Or(shift1, shift2); ulong shift1Element0 = shift1.GetElement(0);
Vector128<ulong> or2 = Sse2.Or(or1, or1.ReverseElements()); ulong shift1Element1 = (shift1.GetElement(1) << 8);
ulong shift2Element0 = (shift2.GetElement(0) << 16);
return (int)or2.GetElement(0); ulong shift2Element1 = (shift2.GetElement(1) << 24);
return (int)(shift1Element0 | shift1Element1 | shift2Element0 | shift2Element1);
} }
} }
} }