From 54ed9096bd4add5cf2ca320123f551f60c06a57f Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 22 Sep 2018 17:26:18 -0300 Subject: [PATCH] Add FMAXP and FMINP (Vector) instructions on the CPU (#412) * Add FMAXP and FMINP (Vector) instructions on the CPU * Address PR feedback --- ChocolArm64/AOpCodeTable.cs | 2 + .../Instruction/AInstEmitSimdArithmetic.cs | 303 +++++++++--------- .../Instruction/AInstEmitSimdHelper.cs | 36 +++ 3 files changed, 183 insertions(+), 158 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index fe3dce41..6404e14f 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -278,10 +278,12 @@ namespace ChocolArm64 SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmax_V, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx011010xxxxxxxxxx", AInstEmit.Fmaxnm_S, typeof(AOpCodeSimdReg)); SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", AInstEmit.Fmaxnm_V, typeof(AOpCodeSimdReg)); + SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmaxp_V, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx010110xxxxxxxxxx", AInstEmit.Fmin_S, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmin_V, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg)); + SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fminp_V, typeof(AOpCodeSimdReg)); SetA64("010111111<0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg)); SetA64("0x0011111<> 4; - int Pairs = Words >> Op.Size; - - for (int Index = 0; Index < Pairs; Index++) - { - int Idx = Index << 1; - - EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); - EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); - - Context.Emit(OpCodes.Add); - - if (Accumulate) - { - EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed); - - Context.Emit(OpCodes.Add); - } - - EmitVectorInsertTmp(Context, Index, Op.Size + 1); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - - private static void EmitDoublingMultiplyHighHalf(AILEmitterCtx Context, bool Round) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int ESize = 8 << Op.Size; - - Context.Emit(OpCodes.Mul); - - if (!Round) - { - Context.EmitAsr(ESize - 1); - } - else - { - long RoundConst = 1L << (ESize - 1); - - AILLabel LblTrue = new AILLabel(); - - Context.EmitLsl(1); - - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - - Context.EmitAsr(ESize); - - Context.Emit(OpCodes.Dup); - Context.EmitLdc_I8((long)int.MinValue); - Context.Emit(OpCodes.Bne_Un_S, LblTrue); - - Context.Emit(OpCodes.Neg); - - Context.MarkLabel(LblTrue); - } - } - - private static void EmitHighNarrow(AILEmitterCtx Context, Action Emit, bool Round) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int Elems = 8 >> Op.Size; - - int ESize = 8 << Op.Size; - - int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; - - long RoundConst = 1L << (ESize - 1); - - if (Part != 0) - { - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - } - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); - EmitVectorExtractZx(Context, Op.Rm, Index, Op.Size + 1); - - Emit(); - - if (Round) - { - Context.EmitLdc_I8(RoundConst); - - Context.Emit(OpCodes.Add); - } - - Context.EmitLsr(ESize); - - EmitVectorInsertTmp(Context, Part + Index, Op.Size); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - public static void Fabd_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => @@ -341,34 +211,7 @@ namespace ChocolArm64.Instruction public static void Faddp_V(AILEmitterCtx Context) { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Op.GetBitsCount() >> 3; - - int Elems = Bytes >> SizeF + 2; - int Half = Elems >> 1; - - for (int Index = 0; Index < Elems; Index++) - { - int Elem = (Index & (Half - 1)) << 1; - - EmitVectorExtractF(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, SizeF); - EmitVectorExtractF(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, SizeF); - - Context.Emit(OpCodes.Add); - - EmitVectorInsertTmpF(Context, Index, SizeF); - } - - Context.EmitLdvectmp(); - Context.EmitStvec(Op.Rd); - - if (Op.RegisterSize == ARegisterSize.SIMD64) - { - EmitVectorZeroUpper(Context, Op.Rd); - } + EmitVectorPairwiseOpF(Context, () => Context.Emit(OpCodes.Add)); } public static void Fdiv_S(AILEmitterCtx Context) @@ -436,6 +279,11 @@ namespace ChocolArm64.Instruction }); } + public static void Fmaxp_V(AILEmitterCtx Context) + { + EmitVectorPairwiseOpF(Context, () => EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max))); + } + public static void Fmin_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => @@ -468,6 +316,11 @@ namespace ChocolArm64.Instruction }); } + public static void Fminp_V(AILEmitterCtx Context) + { + EmitVectorPairwiseOpF(Context, () => EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min))); + } + public static void Fmla_Se(AILEmitterCtx Context) { EmitScalarTernaryOpByElemF(Context, () => @@ -1278,6 +1131,7 @@ namespace ChocolArm64.Instruction EmitVectorTernaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); + EmitAbs(Context); Context.Emit(OpCodes.Add); @@ -1289,6 +1143,7 @@ namespace ChocolArm64.Instruction EmitVectorWidenRnRmTernaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); + EmitAbs(Context); Context.Emit(OpCodes.Add); @@ -1300,6 +1155,7 @@ namespace ChocolArm64.Instruction EmitVectorBinaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); + EmitAbs(Context); }); } @@ -1309,6 +1165,7 @@ namespace ChocolArm64.Instruction EmitVectorWidenRnRmBinaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); + EmitAbs(Context); }); } @@ -1496,5 +1353,135 @@ namespace ChocolArm64.Instruction { EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); } + + private static void EmitAbs(AILEmitterCtx Context) + { + AILLabel LblTrue = new AILLabel(); + + Context.Emit(OpCodes.Dup); + Context.Emit(OpCodes.Ldc_I4_0); + Context.Emit(OpCodes.Bge_S, LblTrue); + + Context.Emit(OpCodes.Neg); + + Context.MarkLabel(LblTrue); + } + + private static void EmitAddLongPairwise(AILEmitterCtx Context, bool Signed, bool Accumulate) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; + + for (int Index = 0; Index < Pairs; Index++) + { + int Idx = Index << 1; + + EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); + + Context.Emit(OpCodes.Add); + + if (Accumulate) + { + EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed); + + Context.Emit(OpCodes.Add); + } + + EmitVectorInsertTmp(Context, Index, Op.Size + 1); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + private static void EmitDoublingMultiplyHighHalf(AILEmitterCtx Context, bool Round) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int ESize = 8 << Op.Size; + + Context.Emit(OpCodes.Mul); + + if (!Round) + { + Context.EmitAsr(ESize - 1); + } + else + { + long RoundConst = 1L << (ESize - 1); + + AILLabel LblTrue = new AILLabel(); + + Context.EmitLsl(1); + + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + + Context.EmitAsr(ESize); + + Context.Emit(OpCodes.Dup); + Context.EmitLdc_I8((long)int.MinValue); + Context.Emit(OpCodes.Bne_Un_S, LblTrue); + + Context.Emit(OpCodes.Neg); + + Context.MarkLabel(LblTrue); + } + } + + private static void EmitHighNarrow(AILEmitterCtx Context, Action Emit, bool Round) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int Elems = 8 >> Op.Size; + + int ESize = 8 << Op.Size; + + int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; + + long RoundConst = 1L << (ESize - 1); + + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); + EmitVectorExtractZx(Context, Op.Rm, Index, Op.Size + 1); + + Emit(); + + if (Round) + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + } + + Context.EmitLsr(ESize); + + EmitVectorInsertTmp(Context, Part + Index, Op.Size); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Part == 0) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 75a5a0d0..171de43b 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -813,6 +813,42 @@ namespace ChocolArm64.Instruction } } + public static void EmitVectorPairwiseOpF(AILEmitterCtx Context, Action Emit) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> SizeF + 2; + + for (int Index = 0; Index < Pairs; Index++) + { + int Idx = Index << 1; + + EmitVectorExtractF(Context, Op.Rn, Idx, SizeF); + EmitVectorExtractF(Context, Op.Rn, Idx + 1, SizeF); + + Emit(); + + EmitVectorExtractF(Context, Op.Rm, Idx, SizeF); + EmitVectorExtractF(Context, Op.Rm, Idx + 1, SizeF); + + Emit(); + + EmitVectorInsertTmpF(Context, Pairs + Index, SizeF); + EmitVectorInsertTmpF(Context, Index, SizeF); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + [Flags] public enum SaturatingFlags {