mirror of
https://github.com/ryujinx-mirror/ryujinx.git
synced 2024-12-22 06:15:40 +00:00
Implement a new JIT for Arm devices (#6057)
* Implement a new JIT for Arm devices * Auto-format * Make a lot of Assembler members read-only * More read-only * Fix more warnings * ObjectDisposedException.ThrowIf * New JIT cache for platforms that enforce W^X, currently unused * Remove unused using * Fix assert * Pass memory manager type around * Safe memory manager mode support + other improvements * Actual safe memory manager mode masking support * PR feedback
This commit is contained in:
parent
331c07807f
commit
427b7d06b5
@ -9,7 +9,7 @@ namespace ARMeilleure.Common
|
||||
/// Represents a table of guest address to a value.
|
||||
/// </summary>
|
||||
/// <typeparam name="TEntry">Type of the value</typeparam>
|
||||
unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
|
||||
public unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents a level in an <see cref="AddressTable{TEntry}"/>.
|
||||
|
@ -8,6 +8,7 @@ namespace ARMeilleure.Memory
|
||||
|
||||
void Commit(ulong offset, ulong size);
|
||||
|
||||
void MapAsRw(ulong offset, ulong size);
|
||||
void MapAsRx(ulong offset, ulong size);
|
||||
void MapAsRwx(ulong offset, ulong size);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ using System;
|
||||
|
||||
namespace ARMeilleure.Memory
|
||||
{
|
||||
class ReservedRegion
|
||||
public class ReservedRegion
|
||||
{
|
||||
public const int DefaultGranularity = 65536; // Mapping granularity in Windows.
|
||||
|
||||
|
@ -5,7 +5,7 @@ using System.Runtime.Versioning;
|
||||
namespace ARMeilleure.Native
|
||||
{
|
||||
[SupportedOSPlatform("macos")]
|
||||
internal static partial class JitSupportDarwin
|
||||
static partial class JitSupportDarwin
|
||||
{
|
||||
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
|
||||
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
|
||||
|
@ -8,7 +8,7 @@ namespace ARMeilleure.Translation
|
||||
/// </summary>
|
||||
/// <typeparam name="TK">Key</typeparam>
|
||||
/// <typeparam name="TV">Value</typeparam>
|
||||
class IntervalTree<TK, TV> where TK : IComparable<TK>
|
||||
public class IntervalTree<TK, TV> where TK : IComparable<TK>
|
||||
{
|
||||
private const int ArrayGrowthSize = 32;
|
||||
|
||||
|
@ -73,7 +73,7 @@ namespace ARMeilleure.Translation
|
||||
CountTable = new EntryTable<uint>();
|
||||
Functions = new TranslatorCache<TranslatedFunction>();
|
||||
FunctionTable = new AddressTable<ulong>(for64Bits ? _levels64Bit : _levels32Bit);
|
||||
Stubs = new TranslatorStubs(this);
|
||||
Stubs = new TranslatorStubs(FunctionTable);
|
||||
|
||||
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Instructions;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.State;
|
||||
@ -14,11 +15,11 @@ namespace ARMeilleure.Translation
|
||||
/// </summary>
|
||||
class TranslatorStubs : IDisposable
|
||||
{
|
||||
private static readonly Lazy<IntPtr> _slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true);
|
||||
private readonly Lazy<IntPtr> _slowDispatchStub;
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
private readonly Translator _translator;
|
||||
private readonly AddressTable<ulong> _functionTable;
|
||||
private readonly Lazy<IntPtr> _dispatchStub;
|
||||
private readonly Lazy<DispatcherFunction> _dispatchLoop;
|
||||
private readonly Lazy<WrapperFunction> _contextWrapper;
|
||||
@ -83,13 +84,14 @@ namespace ARMeilleure.Translation
|
||||
/// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified
|
||||
/// <see cref="Translator"/> instance.
|
||||
/// </summary>
|
||||
/// <param name="translator"><see cref="Translator"/> instance to use</param>
|
||||
/// <param name="functionTable">Function table used to store pointers to the functions that the guest code will call</param>
|
||||
/// <exception cref="ArgumentNullException"><paramref name="translator"/> is null</exception>
|
||||
public TranslatorStubs(Translator translator)
|
||||
public TranslatorStubs(AddressTable<ulong> functionTable)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(translator);
|
||||
ArgumentNullException.ThrowIfNull(functionTable);
|
||||
|
||||
_translator = translator;
|
||||
_functionTable = functionTable;
|
||||
_slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true);
|
||||
_dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
|
||||
_dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
|
||||
_contextWrapper = new(GenerateContextWrapper, isThreadSafe: true);
|
||||
@ -151,15 +153,15 @@ namespace ARMeilleure.Translation
|
||||
context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())));
|
||||
|
||||
// Check if guest address is within range of the AddressTable.
|
||||
Operand masked = context.BitwiseAnd(guestAddress, Const(~_translator.FunctionTable.Mask));
|
||||
Operand masked = context.BitwiseAnd(guestAddress, Const(~_functionTable.Mask));
|
||||
context.BranchIfTrue(lblFallback, masked);
|
||||
|
||||
Operand index = default;
|
||||
Operand page = Const((long)_translator.FunctionTable.Base);
|
||||
Operand page = Const((long)_functionTable.Base);
|
||||
|
||||
for (int i = 0; i < _translator.FunctionTable.Levels.Length; i++)
|
||||
for (int i = 0; i < _functionTable.Levels.Length; i++)
|
||||
{
|
||||
ref var level = ref _translator.FunctionTable.Levels[i];
|
||||
ref var level = ref _functionTable.Levels[i];
|
||||
|
||||
// level.Mask is not used directly because it is more often bigger than 32-bits, so it will not
|
||||
// be encoded as an immediate on x86's bitwise and operation.
|
||||
@ -167,7 +169,7 @@ namespace ARMeilleure.Translation
|
||||
|
||||
index = context.BitwiseAnd(context.ShiftRightUI(guestAddress, Const(level.Index)), mask);
|
||||
|
||||
if (i < _translator.FunctionTable.Levels.Length - 1)
|
||||
if (i < _functionTable.Levels.Length - 1)
|
||||
{
|
||||
page = context.Load(OperandType.I64, context.Add(page, context.ShiftLeft(index, Const(3))));
|
||||
context.BranchIfFalse(lblFallback, page);
|
||||
@ -196,7 +198,7 @@ namespace ARMeilleure.Translation
|
||||
/// Generates a <see cref="SlowDispatchStub"/>.
|
||||
/// </summary>
|
||||
/// <returns>Generated <see cref="SlowDispatchStub"/></returns>
|
||||
private static IntPtr GenerateSlowDispatchStub()
|
||||
private IntPtr GenerateSlowDispatchStub()
|
||||
{
|
||||
var context = new EmitterContext();
|
||||
|
||||
@ -205,8 +207,7 @@ namespace ARMeilleure.Translation
|
||||
Operand guestAddress = context.Load(OperandType.I64,
|
||||
context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())));
|
||||
|
||||
MethodInfo getFuncAddress = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress));
|
||||
Operand hostAddress = context.Call(getFuncAddress, guestAddress);
|
||||
Operand hostAddress = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), guestAddress);
|
||||
context.Tailcall(hostAddress, nativeContext);
|
||||
|
||||
var cfg = context.GetControlFlowGraph();
|
||||
|
@ -40,5 +40,9 @@ namespace Ryujinx.Cpu.AppleHv
|
||||
public void PrepareCodeRange(ulong address, ulong size)
|
||||
{
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
17
src/Ryujinx.Cpu/DummyDiskCacheLoadState.cs
Normal file
17
src/Ryujinx.Cpu/DummyDiskCacheLoadState.cs
Normal file
@ -0,0 +1,17 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu
|
||||
{
|
||||
public class DummyDiskCacheLoadState : IDiskCacheLoadState
|
||||
{
|
||||
#pragma warning disable CS0067 // The event is never used
|
||||
/// <inheritdoc/>
|
||||
public event Action<LoadState, int, int> StateChanged;
|
||||
#pragma warning restore CS0067
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Cancel()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
@ -1,9 +1,11 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu
|
||||
{
|
||||
/// <summary>
|
||||
/// CPU context interface.
|
||||
/// </summary>
|
||||
public interface ICpuContext
|
||||
public interface ICpuContext : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new execution context that will store thread CPU register state when executing guest code.
|
||||
|
@ -13,7 +13,7 @@ namespace Ryujinx.Cpu.Jit
|
||||
public JitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit)
|
||||
{
|
||||
_tickSource = tickSource;
|
||||
_translator = new Translator(new JitMemoryAllocator(), memory, for64Bit);
|
||||
_translator = new Translator(new JitMemoryAllocator(forJit: true), memory, for64Bit);
|
||||
|
||||
if (memory.Type.IsHostMapped())
|
||||
{
|
||||
@ -57,5 +57,9 @@ namespace Ryujinx.Cpu.Jit
|
||||
{
|
||||
_translator.PrepareCodeRange(address, size);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,14 @@ namespace Ryujinx.Cpu.Jit
|
||||
{
|
||||
public class JitMemoryAllocator : IJitMemoryAllocator
|
||||
{
|
||||
private readonly MemoryAllocationFlags _jitFlag;
|
||||
|
||||
public JitMemoryAllocator(bool forJit = false)
|
||||
{
|
||||
_jitFlag = forJit ? MemoryAllocationFlags.Jit : MemoryAllocationFlags.None;
|
||||
}
|
||||
|
||||
public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None);
|
||||
public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.Jit);
|
||||
public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | _jitFlag);
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ namespace Ryujinx.Cpu.Jit
|
||||
}
|
||||
|
||||
public void Commit(ulong offset, ulong size) => _impl.Commit(offset, size);
|
||||
public void MapAsRw(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadAndWrite);
|
||||
public void MapAsRx(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadAndExecute);
|
||||
public void MapAsRwx(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadWriteExecute);
|
||||
|
||||
|
32
src/Ryujinx.Cpu/LightningJit/AarchCompiler.cs
Normal file
32
src/Ryujinx.Cpu/LightningJit/AarchCompiler.cs
Normal file
@ -0,0 +1,32 @@
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.Arm32;
|
||||
using Ryujinx.Cpu.LightningJit.Arm64;
|
||||
using Ryujinx.Cpu.LightningJit.State;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit
|
||||
{
|
||||
class AarchCompiler
|
||||
{
|
||||
public static CompiledFunction Compile(
|
||||
CpuPreset cpuPreset,
|
||||
IMemoryManager memoryManager,
|
||||
ulong address,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr dispatchStubPtr,
|
||||
ExecutionMode executionMode,
|
||||
Architecture targetArch)
|
||||
{
|
||||
if (executionMode == ExecutionMode.Aarch64)
|
||||
{
|
||||
return A64Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr, targetArch);
|
||||
}
|
||||
else
|
||||
{
|
||||
return A32Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr, executionMode == ExecutionMode.Aarch32Thumb, targetArch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
18
src/Ryujinx.Cpu/LightningJit/AddressForm.cs
Normal file
18
src/Ryujinx.Cpu/LightningJit/AddressForm.cs
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit
|
||||
{
|
||||
enum AddressForm : byte
|
||||
{
|
||||
None,
|
||||
OffsetReg,
|
||||
PostIndexed,
|
||||
PreIndexed,
|
||||
SignedScaled,
|
||||
UnsignedScaled,
|
||||
BaseRegister,
|
||||
BasePlusOffset,
|
||||
Literal,
|
||||
StructNoOffset,
|
||||
StructPostIndexedReg,
|
||||
}
|
||||
}
|
30
src/Ryujinx.Cpu/LightningJit/Arm32/A32Compiler.cs
Normal file
30
src/Ryujinx.Cpu/LightningJit/Arm32/A32Compiler.cs
Normal file
@ -0,0 +1,30 @@
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
static class A32Compiler
|
||||
{
|
||||
public static CompiledFunction Compile(
|
||||
CpuPreset cpuPreset,
|
||||
IMemoryManager memoryManager,
|
||||
ulong address,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr dispatchStubPtr,
|
||||
bool isThumb,
|
||||
Architecture targetArch)
|
||||
{
|
||||
if (targetArch == Architecture.Arm64)
|
||||
{
|
||||
return Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr, isThumb);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new PlatformNotSupportedException();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
101
src/Ryujinx.Cpu/LightningJit/Arm32/Block.cs
Normal file
101
src/Ryujinx.Cpu/LightningJit/Arm32/Block.cs
Normal file
@ -0,0 +1,101 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
class Block
|
||||
{
|
||||
public readonly ulong Address;
|
||||
public readonly ulong EndAddress;
|
||||
public readonly List<InstInfo> Instructions;
|
||||
public readonly bool EndsWithBranch;
|
||||
public readonly bool HasHostCall;
|
||||
public readonly bool IsTruncated;
|
||||
public readonly bool IsLoopEnd;
|
||||
public readonly bool IsThumb;
|
||||
|
||||
public Block(
|
||||
ulong address,
|
||||
ulong endAddress,
|
||||
List<InstInfo> instructions,
|
||||
bool endsWithBranch,
|
||||
bool hasHostCall,
|
||||
bool isTruncated,
|
||||
bool isLoopEnd,
|
||||
bool isThumb)
|
||||
{
|
||||
Debug.Assert(isThumb || (int)((endAddress - address) / 4) == instructions.Count);
|
||||
|
||||
Address = address;
|
||||
EndAddress = endAddress;
|
||||
Instructions = instructions;
|
||||
EndsWithBranch = endsWithBranch;
|
||||
HasHostCall = hasHostCall;
|
||||
IsTruncated = isTruncated;
|
||||
IsLoopEnd = isLoopEnd;
|
||||
IsThumb = isThumb;
|
||||
}
|
||||
|
||||
public (Block, Block) SplitAtAddress(ulong address)
|
||||
{
|
||||
int splitIndex = FindSplitIndex(address);
|
||||
|
||||
if (splitIndex < 0)
|
||||
{
|
||||
return (null, null);
|
||||
}
|
||||
|
||||
int splitCount = Instructions.Count - splitIndex;
|
||||
|
||||
// Technically those are valid, but we don't want to create empty blocks.
|
||||
Debug.Assert(splitIndex != 0);
|
||||
Debug.Assert(splitCount != 0);
|
||||
|
||||
Block leftBlock = new(
|
||||
Address,
|
||||
address,
|
||||
Instructions.GetRange(0, splitIndex),
|
||||
false,
|
||||
HasHostCall,
|
||||
false,
|
||||
false,
|
||||
IsThumb);
|
||||
|
||||
Block rightBlock = new(
|
||||
address,
|
||||
EndAddress,
|
||||
Instructions.GetRange(splitIndex, splitCount),
|
||||
EndsWithBranch,
|
||||
HasHostCall,
|
||||
IsTruncated,
|
||||
IsLoopEnd,
|
||||
IsThumb);
|
||||
|
||||
return (leftBlock, rightBlock);
|
||||
}
|
||||
|
||||
private int FindSplitIndex(ulong address)
|
||||
{
|
||||
if (IsThumb)
|
||||
{
|
||||
ulong pc = Address;
|
||||
|
||||
for (int index = 0; index < Instructions.Count; index++)
|
||||
{
|
||||
if (pc == address)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
pc += Instructions[index].Flags.HasFlag(InstFlags.Thumb16) ? 2UL : 4UL;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (int)((address - Address) / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/Ryujinx.Cpu/LightningJit/Arm32/BranchType.cs
Normal file
15
src/Ryujinx.Cpu/LightningJit/Arm32/BranchType.cs
Normal file
@ -0,0 +1,15 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
enum BranchType
|
||||
{
|
||||
Branch,
|
||||
Call,
|
||||
IndirectBranch,
|
||||
TableBranchByte,
|
||||
TableBranchHalfword,
|
||||
IndirectCall,
|
||||
SyncPoint,
|
||||
SoftwareInterrupt,
|
||||
ReadCntpct,
|
||||
}
|
||||
}
|
198
src/Ryujinx.Cpu/LightningJit/Arm32/CodeGenContext.cs
Normal file
198
src/Ryujinx.Cpu/LightningJit/Arm32/CodeGenContext.cs
Normal file
@ -0,0 +1,198 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
class CodeGenContext
|
||||
{
|
||||
public CodeWriter CodeWriter { get; }
|
||||
public Assembler Arm64Assembler { get; }
|
||||
public RegisterAllocator RegisterAllocator { get; }
|
||||
|
||||
public MemoryManagerType MemoryManagerType { get; }
|
||||
|
||||
private uint _instructionAddress;
|
||||
|
||||
public bool IsThumb { get; }
|
||||
public uint Pc { get; private set; }
|
||||
public bool InITBlock { get; private set; }
|
||||
|
||||
private InstInfo _nextInstruction;
|
||||
private bool _skipNextInstruction;
|
||||
|
||||
private readonly ArmCondition[] _itConditions;
|
||||
private int _itCount;
|
||||
|
||||
private readonly List<PendingBranch> _pendingBranches;
|
||||
|
||||
private bool _nzcvModified;
|
||||
|
||||
public CodeGenContext(CodeWriter codeWriter, Assembler arm64Assembler, RegisterAllocator registerAllocator, MemoryManagerType mmType, bool isThumb)
|
||||
{
|
||||
CodeWriter = codeWriter;
|
||||
Arm64Assembler = arm64Assembler;
|
||||
RegisterAllocator = registerAllocator;
|
||||
MemoryManagerType = mmType;
|
||||
_itConditions = new ArmCondition[4];
|
||||
_pendingBranches = new();
|
||||
IsThumb = isThumb;
|
||||
}
|
||||
|
||||
public void SetPc(uint address)
|
||||
{
|
||||
// Due to historical reasons, the PC value is always 2 instructions ahead on 32-bit Arm CPUs.
|
||||
Pc = address + (IsThumb ? 4u : 8u);
|
||||
_instructionAddress = address;
|
||||
}
|
||||
|
||||
public void SetNextInstruction(InstInfo info)
|
||||
{
|
||||
_nextInstruction = info;
|
||||
}
|
||||
|
||||
public InstInfo PeekNextInstruction()
|
||||
{
|
||||
return _nextInstruction;
|
||||
}
|
||||
|
||||
public void SetSkipNextInstruction()
|
||||
{
|
||||
_skipNextInstruction = true;
|
||||
}
|
||||
|
||||
public bool ConsumeSkipNextInstruction()
|
||||
{
|
||||
bool skip = _skipNextInstruction;
|
||||
_skipNextInstruction = false;
|
||||
|
||||
return skip;
|
||||
}
|
||||
|
||||
public void AddPendingBranch(InstName name, int offset)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.Branch, Pc + (uint)offset, 0u, name, CodeWriter.InstructionPointer));
|
||||
}
|
||||
|
||||
public void AddPendingCall(uint targetAddress, uint nextAddress)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.Call, targetAddress, nextAddress, InstName.BlI, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(1);
|
||||
RegisterAllocator.MarkGprAsUsed(RegisterUtils.LrRegister);
|
||||
}
|
||||
|
||||
public void AddPendingIndirectBranch(InstName name, uint targetRegister)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.IndirectBranch, targetRegister, 0u, name, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.MarkGprAsUsed((int)targetRegister);
|
||||
}
|
||||
|
||||
public void AddPendingTableBranch(uint rn, uint rm, bool halfword)
|
||||
{
|
||||
_pendingBranches.Add(new(halfword ? BranchType.TableBranchHalfword : BranchType.TableBranchByte, rn, rm, InstName.Tbb, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(2);
|
||||
RegisterAllocator.MarkGprAsUsed((int)rn);
|
||||
RegisterAllocator.MarkGprAsUsed((int)rm);
|
||||
}
|
||||
|
||||
public void AddPendingIndirectCall(uint targetRegister, uint nextAddress)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.IndirectCall, targetRegister, nextAddress, InstName.BlxR, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(targetRegister == RegisterUtils.LrRegister ? 1 : 0);
|
||||
RegisterAllocator.MarkGprAsUsed((int)targetRegister);
|
||||
RegisterAllocator.MarkGprAsUsed(RegisterUtils.LrRegister);
|
||||
}
|
||||
|
||||
public void AddPendingSyncPoint()
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.SyncPoint, 0, 0, default, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(1);
|
||||
}
|
||||
|
||||
public void AddPendingBkpt(uint imm)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.SoftwareInterrupt, imm, _instructionAddress, InstName.Bkpt, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(1);
|
||||
}
|
||||
|
||||
public void AddPendingSvc(uint imm)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.SoftwareInterrupt, imm, _instructionAddress, InstName.Svc, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(1);
|
||||
}
|
||||
|
||||
public void AddPendingUdf(uint imm)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.SoftwareInterrupt, imm, _instructionAddress, InstName.Udf, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(1);
|
||||
}
|
||||
|
||||
public void AddPendingReadCntpct(uint rt, uint rt2)
|
||||
{
|
||||
_pendingBranches.Add(new(BranchType.ReadCntpct, rt, rt2, InstName.Mrrc, CodeWriter.InstructionPointer));
|
||||
|
||||
RegisterAllocator.EnsureTempGprRegisters(1);
|
||||
}
|
||||
|
||||
public IEnumerable<PendingBranch> GetPendingBranches()
|
||||
{
|
||||
return _pendingBranches;
|
||||
}
|
||||
|
||||
public void SetItBlockStart(ReadOnlySpan<ArmCondition> conditions)
|
||||
{
|
||||
_itCount = conditions.Length;
|
||||
|
||||
for (int index = 0; index < conditions.Length; index++)
|
||||
{
|
||||
_itConditions[index] = conditions[index];
|
||||
}
|
||||
|
||||
InITBlock = true;
|
||||
}
|
||||
|
||||
public bool ConsumeItCondition(out ArmCondition condition)
|
||||
{
|
||||
if (_itCount != 0)
|
||||
{
|
||||
condition = _itConditions[--_itCount];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
condition = ArmCondition.Al;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public void UpdateItState()
|
||||
{
|
||||
if (_itCount == 0)
|
||||
{
|
||||
InITBlock = false;
|
||||
}
|
||||
}
|
||||
|
||||
public void SetNzcvModified()
|
||||
{
|
||||
_nzcvModified = true;
|
||||
}
|
||||
|
||||
public bool ConsumeNzcvModified()
|
||||
{
|
||||
bool modified = _nzcvModified;
|
||||
_nzcvModified = false;
|
||||
|
||||
return modified;
|
||||
}
|
||||
}
|
||||
}
|
546
src/Ryujinx.Cpu/LightningJit/Arm32/Decoder.cs
Normal file
546
src/Ryujinx.Cpu/LightningJit/Arm32/Decoder.cs
Normal file
@ -0,0 +1,546 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
static class Decoder<T> where T : IInstEmit
|
||||
{
|
||||
public static MultiBlock DecodeMulti(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, bool isThumb)
|
||||
{
|
||||
List<Block> blocks = new();
|
||||
List<ulong> branchTargets = new();
|
||||
|
||||
while (true)
|
||||
{
|
||||
Block block = Decode(cpuPreset, memoryManager, address, isThumb);
|
||||
|
||||
if (!block.IsTruncated && TryGetBranchTarget(block, out ulong targetAddress))
|
||||
{
|
||||
branchTargets.Add(targetAddress);
|
||||
}
|
||||
|
||||
blocks.Add(block);
|
||||
|
||||
if (block.IsTruncated || !HasNextBlock(block, block.EndAddress - 4UL, branchTargets))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
address = block.EndAddress;
|
||||
}
|
||||
|
||||
branchTargets.Sort();
|
||||
SplitBlocks(blocks, branchTargets);
|
||||
|
||||
return new(blocks);
|
||||
}
|
||||
|
||||
private static bool TryGetBranchTarget(Block block, out ulong targetAddress)
|
||||
{
|
||||
// PC is 2 instructions ahead, since the end address is already one instruction after the last one, we just need to add
|
||||
// another instruction.
|
||||
|
||||
ulong pc = block.EndAddress + (block.IsThumb ? 2UL : 4UL);
|
||||
|
||||
return TryGetBranchTarget(block.Instructions[^1].Name, block.Instructions[^1].Flags, pc, block.Instructions[^1].Encoding, block.IsThumb, out targetAddress);
|
||||
}
|
||||
|
||||
private static bool TryGetBranchTarget(InstName name, InstFlags flags, ulong pc, uint encoding, bool isThumb, out ulong targetAddress)
|
||||
{
|
||||
int originalOffset;
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.B:
|
||||
if (isThumb)
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Thumb16))
|
||||
{
|
||||
if ((encoding & (1u << 29)) != 0)
|
||||
{
|
||||
InstImm11b16w11 inst = new(encoding);
|
||||
|
||||
originalOffset = ImmUtils.ExtractT16SImm11Times2(inst.Imm11);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstCondb24w4Imm8b16w8 inst = new(encoding);
|
||||
|
||||
originalOffset = ImmUtils.ExtractT16SImm8Times2(inst.Imm8);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((encoding & (1u << 12)) != 0)
|
||||
{
|
||||
InstSb26w1Imm10b16w10J1b13w1J2b11w1Imm11b0w11 inst = new(encoding);
|
||||
|
||||
originalOffset = ImmUtils.CombineSImm24Times2(inst.Imm11, inst.Imm10, inst.J1, inst.J2, inst.S);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstSb26w1Condb22w4Imm6b16w6J1b13w1J2b11w1Imm11b0w11 inst = new(encoding);
|
||||
|
||||
originalOffset = ImmUtils.CombineSImm20Times2(inst.Imm11, inst.Imm6, inst.J1, inst.J2, inst.S);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm24Times4(encoding);
|
||||
}
|
||||
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
Debug.Assert((targetAddress & 1) == 0);
|
||||
|
||||
return true;
|
||||
|
||||
case InstName.Cbnz:
|
||||
originalOffset = ImmUtils.ExtractT16UImm5Times2(encoding);
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
Debug.Assert((targetAddress & 1) == 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
targetAddress = 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void SplitBlocks(List<Block> blocks, List<ulong> branchTargets)
|
||||
{
|
||||
int btIndex = 0;
|
||||
|
||||
while (btIndex < branchTargets.Count)
|
||||
{
|
||||
for (int blockIndex = 0; blockIndex < blocks.Count && btIndex < branchTargets.Count; blockIndex++)
|
||||
{
|
||||
Block block = blocks[blockIndex];
|
||||
ulong currentBranchTarget = branchTargets[btIndex];
|
||||
|
||||
while (currentBranchTarget >= block.Address && currentBranchTarget < block.EndAddress)
|
||||
{
|
||||
if (block.Address != currentBranchTarget)
|
||||
{
|
||||
(Block leftBlock, Block rightBlock) = block.SplitAtAddress(currentBranchTarget);
|
||||
|
||||
if (leftBlock != null && rightBlock != null)
|
||||
{
|
||||
blocks.Insert(blockIndex, leftBlock);
|
||||
blocks[blockIndex + 1] = rightBlock;
|
||||
|
||||
block = leftBlock;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Split can only fail in thumb mode, where the instruction size is not fixed.
|
||||
|
||||
Debug.Assert(block.IsThumb);
|
||||
}
|
||||
}
|
||||
|
||||
btIndex++;
|
||||
|
||||
while (btIndex < branchTargets.Count && branchTargets[btIndex] == currentBranchTarget)
|
||||
{
|
||||
btIndex++;
|
||||
}
|
||||
|
||||
if (btIndex >= branchTargets.Count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
currentBranchTarget = branchTargets[btIndex];
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(btIndex < int.MaxValue);
|
||||
btIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool HasNextBlock(in Block block, ulong pc, List<ulong> branchTargets)
|
||||
{
|
||||
InstFlags lastInstFlags = block.Instructions[^1].Flags;
|
||||
|
||||
// Thumb has separate encodings for conditional and unconditional branch instructions.
|
||||
if (lastInstFlags.HasFlag(InstFlags.Cond) && (block.IsThumb || (ArmCondition)(block.Instructions[^1].Encoding >> 28) < ArmCondition.Al))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (block.Instructions[^1].Name)
|
||||
{
|
||||
case InstName.B:
|
||||
return branchTargets.Contains(pc + 4UL) ||
|
||||
(TryGetBranchTarget(block, out ulong targetAddress) && targetAddress >= pc && targetAddress < pc + 0x1000);
|
||||
|
||||
case InstName.Bx:
|
||||
case InstName.Bxj:
|
||||
return branchTargets.Contains(pc + 4UL);
|
||||
|
||||
case InstName.Cbnz:
|
||||
case InstName.BlI:
|
||||
case InstName.BlxR:
|
||||
return true;
|
||||
}
|
||||
|
||||
if (WritesToPC(block.Instructions[^1].Encoding, block.Instructions[^1].Name, lastInstFlags, block.IsThumb))
|
||||
{
|
||||
return branchTargets.Contains(pc + 4UL);
|
||||
}
|
||||
|
||||
return !block.EndsWithBranch;
|
||||
}
|
||||
|
||||
private static Block Decode(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, bool isThumb)
|
||||
{
|
||||
ulong startAddress = address;
|
||||
|
||||
List<InstInfo> insts = new();
|
||||
|
||||
uint encoding;
|
||||
InstMeta meta;
|
||||
InstFlags extraFlags = InstFlags.None;
|
||||
bool hasHostCall = false;
|
||||
bool isTruncated = false;
|
||||
|
||||
do
|
||||
{
|
||||
if (!memoryManager.IsMapped(address))
|
||||
{
|
||||
encoding = 0;
|
||||
meta = default;
|
||||
isTruncated = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (isThumb)
|
||||
{
|
||||
encoding = (uint)memoryManager.Read<ushort>(address) << 16;
|
||||
address += 2UL;
|
||||
|
||||
extraFlags = InstFlags.Thumb16;
|
||||
|
||||
if (!InstTableT16<T>.TryGetMeta(encoding, cpuPreset.Version, cpuPreset.Features, out meta))
|
||||
{
|
||||
encoding |= memoryManager.Read<ushort>(address);
|
||||
|
||||
if (InstTableT32<T>.TryGetMeta(encoding, cpuPreset.Version, cpuPreset.Features, out meta))
|
||||
{
|
||||
address += 2UL;
|
||||
extraFlags = InstFlags.None;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
encoding = memoryManager.Read<uint>(address);
|
||||
address += 4UL;
|
||||
|
||||
meta = InstTableA32<T>.GetMeta(encoding, cpuPreset.Version, cpuPreset.Features);
|
||||
}
|
||||
|
||||
if (meta.Name.IsSystemOrCall() && !hasHostCall)
|
||||
{
|
||||
hasHostCall = meta.Name.IsCall() || InstEmitSystem.NeedsCall(meta.Name);
|
||||
}
|
||||
|
||||
insts.Add(new(encoding, meta.Name, meta.EmitFunc, meta.Flags | extraFlags));
|
||||
}
|
||||
while (!IsControlFlow(encoding, meta.Name, meta.Flags | extraFlags, isThumb));
|
||||
|
||||
bool isLoopEnd = false;
|
||||
|
||||
if (!isTruncated && IsBackwardsBranch(meta.Name, encoding))
|
||||
{
|
||||
hasHostCall = true;
|
||||
isLoopEnd = true;
|
||||
}
|
||||
|
||||
return new(
|
||||
startAddress,
|
||||
address,
|
||||
insts,
|
||||
!isTruncated,
|
||||
hasHostCall,
|
||||
isTruncated,
|
||||
isLoopEnd,
|
||||
isThumb);
|
||||
}
|
||||
|
||||
private static bool IsControlFlow(uint encoding, InstName name, InstFlags flags, bool isThumb)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.B:
|
||||
case InstName.BlI:
|
||||
case InstName.BlxR:
|
||||
case InstName.Bx:
|
||||
case InstName.Bxj:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Tbb:
|
||||
return true;
|
||||
}
|
||||
|
||||
return WritesToPC(encoding, name, flags, isThumb);
|
||||
}
|
||||
|
||||
public static bool WritesToPC(uint encoding, InstName name, InstFlags flags, bool isThumb)
|
||||
{
|
||||
return (GetRegisterWriteMask(encoding, name, flags, isThumb) & (1u << RegisterUtils.PcRegister)) != 0;
|
||||
}
|
||||
|
||||
private static uint GetRegisterWriteMask(uint encoding, InstName name, InstFlags flags, bool isThumb)
|
||||
{
|
||||
uint mask = 0;
|
||||
|
||||
if (isThumb)
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Thumb16))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rdn))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRdn(flags, encoding);
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rd))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRdT16(flags, encoding);
|
||||
}
|
||||
|
||||
Debug.Assert(!flags.HasFlag(InstFlags.RdHi));
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rt))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRtT16(flags, encoding);
|
||||
}
|
||||
|
||||
Debug.Assert(!flags.HasFlag(InstFlags.Rt2));
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rlist))
|
||||
{
|
||||
mask |= (byte)(encoding >> 16);
|
||||
|
||||
if (name == InstName.Push)
|
||||
{
|
||||
mask |= (encoding >> 10) & 0x4000; // LR
|
||||
}
|
||||
else if (name == InstName.Pop)
|
||||
{
|
||||
mask |= (encoding >> 9) & 0x8000; // PC
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(!flags.HasFlag(InstFlags.WBack));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRdT32(flags, encoding);
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.RdLo))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRdLoT32(encoding);
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.RdHi))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRdHiT32(encoding);
|
||||
}
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rt) && IsRtWrite(name, encoding) && !IsR15RtEncodingSpecial(name, encoding))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRtT32(encoding);
|
||||
}
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rt2) && IsRtWrite(name, encoding))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRt2T32(encoding);
|
||||
}
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rlist))
|
||||
{
|
||||
mask |= (ushort)encoding;
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.WBack) && HasWriteBackT32(name, encoding))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRn(encoding); // This is at the same bit position as A32.
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRd(flags, encoding);
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.RdHi))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRdHi(encoding);
|
||||
}
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rt) && IsRtWrite(name, encoding) && !IsR15RtEncodingSpecial(name, encoding))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRt(encoding);
|
||||
}
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rt2) && IsRtWrite(name, encoding))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRt2(encoding);
|
||||
}
|
||||
|
||||
if (IsRegisterWrite(flags, InstFlags.Rlist))
|
||||
{
|
||||
mask |= (ushort)encoding;
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.WBack) && HasWriteBack(name, encoding))
|
||||
{
|
||||
mask |= 1u << RegisterUtils.ExtractRn(encoding);
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
private static bool IsRtWrite(InstName name, uint encoding)
|
||||
{
|
||||
// Some instructions can move GPR to FP/SIMD or FP/SIMD to GPR depending on the encoding.
|
||||
// Detect those cases so that we can tell if we're actually doing a register write.
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.VmovD:
|
||||
case InstName.VmovH:
|
||||
case InstName.VmovS:
|
||||
case InstName.VmovSs:
|
||||
return (encoding & (1u << 20)) != 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool HasWriteBack(InstName name, uint encoding)
|
||||
{
|
||||
if (IsLoadStoreMultiple(name))
|
||||
{
|
||||
return (encoding & (1u << 21)) != 0;
|
||||
}
|
||||
|
||||
if (IsVLDnVSTn(name))
|
||||
{
|
||||
return (encoding & 0xf) != RegisterUtils.PcRegister;
|
||||
}
|
||||
|
||||
bool w = (encoding & (1u << 21)) != 0;
|
||||
bool p = (encoding & (1u << 24)) != 0;
|
||||
|
||||
return !p || w;
|
||||
}
|
||||
|
||||
private static bool HasWriteBackT32(InstName name, uint encoding)
|
||||
{
|
||||
if (IsLoadStoreMultiple(name))
|
||||
{
|
||||
return (encoding & (1u << 21)) != 0;
|
||||
}
|
||||
|
||||
if (IsVLDnVSTn(name))
|
||||
{
|
||||
return (encoding & 0xf) != RegisterUtils.PcRegister;
|
||||
}
|
||||
|
||||
return (encoding & (1u << 8)) != 0;
|
||||
}
|
||||
|
||||
private static bool IsLoadStoreMultiple(InstName name)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Ldm:
|
||||
case InstName.Ldmda:
|
||||
case InstName.Ldmdb:
|
||||
case InstName.LdmE:
|
||||
case InstName.Ldmib:
|
||||
case InstName.LdmU:
|
||||
case InstName.Stm:
|
||||
case InstName.Stmda:
|
||||
case InstName.Stmdb:
|
||||
case InstName.Stmib:
|
||||
case InstName.StmU:
|
||||
case InstName.Fldmx:
|
||||
case InstName.Fstmx:
|
||||
case InstName.Vldm:
|
||||
case InstName.Vstm:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsVLDnVSTn(InstName name)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Vld11:
|
||||
case InstName.Vld1A:
|
||||
case InstName.Vld1M:
|
||||
case InstName.Vld21:
|
||||
case InstName.Vld2A:
|
||||
case InstName.Vld2M:
|
||||
case InstName.Vld31:
|
||||
case InstName.Vld3A:
|
||||
case InstName.Vld3M:
|
||||
case InstName.Vld41:
|
||||
case InstName.Vld4A:
|
||||
case InstName.Vld4M:
|
||||
case InstName.Vst11:
|
||||
case InstName.Vst1M:
|
||||
case InstName.Vst21:
|
||||
case InstName.Vst2M:
|
||||
case InstName.Vst31:
|
||||
case InstName.Vst3M:
|
||||
case InstName.Vst41:
|
||||
case InstName.Vst4M:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsR15RtEncodingSpecial(InstName name, uint encoding)
|
||||
{
|
||||
if (name == InstName.Vmrs)
|
||||
{
|
||||
return ((encoding >> 16) & 0xf) == 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsRegisterWrite(InstFlags flags, InstFlags testFlag)
|
||||
{
|
||||
return flags.HasFlag(testFlag) && !flags.HasFlag(InstFlags.ReadRd);
|
||||
}
|
||||
|
||||
private static bool IsBackwardsBranch(InstName name, uint encoding)
|
||||
{
|
||||
if (name == InstName.B)
|
||||
{
|
||||
return ImmUtils.ExtractSImm24Times4(encoding) < 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
1231
src/Ryujinx.Cpu/LightningJit/Arm32/IInstEmit.cs
Normal file
1231
src/Ryujinx.Cpu/LightningJit/Arm32/IInstEmit.cs
Normal file
File diff suppressed because it is too large
Load Diff
137
src/Ryujinx.Cpu/LightningJit/Arm32/ImmUtils.cs
Normal file
137
src/Ryujinx.Cpu/LightningJit/Arm32/ImmUtils.cs
Normal file
@ -0,0 +1,137 @@
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
static class ImmUtils
|
||||
{
|
||||
public static uint ExpandImm(uint imm)
|
||||
{
|
||||
return BitOperations.RotateRight((byte)imm, (int)(imm >> 8) * 2);
|
||||
}
|
||||
|
||||
public static bool ExpandedImmRotated(uint imm)
|
||||
{
|
||||
return (imm >> 8) != 0;
|
||||
}
|
||||
|
||||
public static uint ExpandImm(uint imm8, uint imm3, uint i)
|
||||
{
|
||||
uint imm = CombineImmU12(imm8, imm3, i);
|
||||
|
||||
if (imm >> 10 == 0)
|
||||
{
|
||||
return ((imm >> 8) & 3) switch
|
||||
{
|
||||
0 => (byte)imm,
|
||||
1 => (byte)imm * 0x00010001u,
|
||||
2 => (byte)imm * 0x01000100u,
|
||||
3 => (byte)imm * 0x01010101u,
|
||||
_ => 0,
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
return BitOperations.RotateRight(0x80u | (byte)imm, (int)(imm >> 7));
|
||||
}
|
||||
}
|
||||
|
||||
public static bool ExpandedImmRotated(uint imm8, uint imm3, uint i)
|
||||
{
|
||||
uint imm = CombineImmU12(imm8, imm3, i);
|
||||
|
||||
return (imm >> 7) != 0;
|
||||
}
|
||||
|
||||
public static uint CombineImmU5(uint imm2, uint imm3)
|
||||
{
|
||||
return imm2 | (imm3 << 2);
|
||||
}
|
||||
|
||||
public static uint CombineImmU5IImm4(uint i, uint imm4)
|
||||
{
|
||||
return i | (imm4 << 1);
|
||||
}
|
||||
|
||||
public static uint CombineImmU8(uint imm4l, uint imm4h)
|
||||
{
|
||||
return imm4l | (imm4h << 4);
|
||||
}
|
||||
|
||||
public static uint CombineImmU8(uint imm4, uint imm3, uint i)
|
||||
{
|
||||
return imm4 | (imm3 << 4) | (i << 7);
|
||||
}
|
||||
|
||||
public static uint CombineImmU12(uint imm8, uint imm3, uint i)
|
||||
{
|
||||
return imm8 | (imm3 << 8) | (i << 11);
|
||||
}
|
||||
|
||||
public static uint CombineImmU16(uint imm12, uint imm4)
|
||||
{
|
||||
return imm12 | (imm4 << 12);
|
||||
}
|
||||
|
||||
public static uint CombineImmU16(uint imm8, uint imm3, uint i, uint imm4)
|
||||
{
|
||||
return imm8 | (imm3 << 8) | (i << 11) | (imm4 << 12);
|
||||
}
|
||||
|
||||
public static int CombineSImm20Times2(uint imm11, uint imm6, uint j1, uint j2, uint s)
|
||||
{
|
||||
int imm32 = (int)(imm11 | (imm6 << 11) | (j1 << 17) | (j2 << 18) | (s << 19));
|
||||
|
||||
return (imm32 << 13) >> 12;
|
||||
}
|
||||
|
||||
public static int CombineSImm24Times2(uint imm11, uint imm10, uint j1, uint j2, uint s)
|
||||
{
|
||||
uint i1 = j1 ^ s ^ 1;
|
||||
uint i2 = j2 ^ s ^ 1;
|
||||
|
||||
int imm32 = (int)(imm11 | (imm10 << 11) | (i2 << 21) | (i1 << 22) | (s << 23));
|
||||
|
||||
return (imm32 << 8) >> 7;
|
||||
}
|
||||
|
||||
public static int CombineSImm24Times4(uint imm10L, uint imm10H, uint j1, uint j2, uint s)
|
||||
{
|
||||
uint i1 = j1 ^ s ^ 1;
|
||||
uint i2 = j2 ^ s ^ 1;
|
||||
|
||||
int imm32 = (int)(imm10L | (imm10H << 10) | (i2 << 20) | (i1 << 21) | (s << 22));
|
||||
|
||||
return (imm32 << 9) >> 7;
|
||||
}
|
||||
|
||||
public static uint CombineRegisterList(uint registerList, uint m)
|
||||
{
|
||||
return registerList | (m << 14);
|
||||
}
|
||||
|
||||
public static uint CombineRegisterList(uint registerList, uint m, uint p)
|
||||
{
|
||||
return registerList | (m << 14) | (p << 15);
|
||||
}
|
||||
|
||||
public static int ExtractSImm24Times4(uint encoding)
|
||||
{
|
||||
return (int)(encoding << 8) >> 6;
|
||||
}
|
||||
|
||||
public static int ExtractT16UImm5Times2(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> 18) & 0x3e;
|
||||
}
|
||||
|
||||
public static int ExtractT16SImm8Times2(uint encoding)
|
||||
{
|
||||
return (int)(encoding << 24) >> 23;
|
||||
}
|
||||
|
||||
public static int ExtractT16SImm11Times2(uint encoding)
|
||||
{
|
||||
return (int)(encoding << 21) >> 20;
|
||||
}
|
||||
}
|
||||
}
|
2927
src/Ryujinx.Cpu/LightningJit/Arm32/InstDecoders.cs
Normal file
2927
src/Ryujinx.Cpu/LightningJit/Arm32/InstDecoders.cs
Normal file
File diff suppressed because it is too large
Load Diff
63
src/Ryujinx.Cpu/LightningJit/Arm32/InstFlags.cs
Normal file
63
src/Ryujinx.Cpu/LightningJit/Arm32/InstFlags.cs
Normal file
@ -0,0 +1,63 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
[Flags]
|
||||
enum InstFlags
|
||||
{
|
||||
None = 0,
|
||||
Cond = 1 << 0,
|
||||
Rd = 1 << 1,
|
||||
RdLo = 1 << 2,
|
||||
RdHi = 1 << 3,
|
||||
Rdn = 1 << 4,
|
||||
Dn = 1 << 5,
|
||||
Rt = 1 << 6,
|
||||
Rt2 = 1 << 7,
|
||||
Rlist = 1 << 8,
|
||||
Rd16 = 1 << 9,
|
||||
ReadRd = 1 << 10,
|
||||
WBack = 1 << 11,
|
||||
Thumb16 = 1 << 12,
|
||||
|
||||
RdnDn = Rdn | Dn,
|
||||
RdRd16 = Rd | Rd16,
|
||||
RtRt2 = Rt | Rt2,
|
||||
RdLoRdHi = RdLo | RdHi,
|
||||
RdLoHi = Rd | RdHi,
|
||||
RdRtRead = Rd | RtRead,
|
||||
RdRtReadRd16 = Rd | RtRead | Rd16,
|
||||
RdRt2Read = Rd | Rt2 | RtRead,
|
||||
RdRt2ReadRd16 = Rd | Rt2 | RtRead | Rd16,
|
||||
RtRd16 = Rt | Rd16,
|
||||
RtWBack = Rt | WBack,
|
||||
Rt2WBack = Rt2 | RtWBack,
|
||||
RtRead = Rt | ReadRd,
|
||||
RtReadRd16 = Rt | ReadRd | Rd16,
|
||||
Rt2Read = Rt2 | RtRead,
|
||||
RtReadWBack = RtRead | WBack,
|
||||
Rt2ReadWBack = Rt2 | RtReadWBack,
|
||||
RlistWBack = Rlist | WBack,
|
||||
RlistRead = Rlist | ReadRd,
|
||||
RlistReadWBack = Rlist | ReadRd | WBack,
|
||||
|
||||
CondRd = Cond | Rd,
|
||||
CondRdLoHi = Cond | Rd | RdHi,
|
||||
CondRt = Cond | Rt,
|
||||
CondRt2 = Cond | Rt | Rt2,
|
||||
CondRd16 = Cond | Rd | Rd16,
|
||||
CondWBack = Cond | WBack,
|
||||
CondRdRtRead = Cond | Rd | RtRead,
|
||||
CondRdRt2Read = Cond | Rd | Rt2 | RtRead,
|
||||
CondRtWBack = Cond | RtWBack,
|
||||
CondRt2WBack = Cond | Rt2 | RtWBack,
|
||||
CondRtRead = Cond | RtRead,
|
||||
CondRt2Read = Cond | Rt2 | RtRead,
|
||||
CondRtReadWBack = Cond | RtReadWBack,
|
||||
CondRt2ReadWBack = Cond | Rt2 | RtReadWBack,
|
||||
CondRlist = Cond | Rlist,
|
||||
CondRlistWBack = Cond | Rlist | WBack,
|
||||
CondRlistRead = Cond | Rlist | ReadRd,
|
||||
CondRlistReadWBack = Cond | Rlist | ReadRd | WBack,
|
||||
}
|
||||
}
|
20
src/Ryujinx.Cpu/LightningJit/Arm32/InstInfo.cs
Normal file
20
src/Ryujinx.Cpu/LightningJit/Arm32/InstInfo.cs
Normal file
@ -0,0 +1,20 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
readonly struct InstInfo
|
||||
{
|
||||
public readonly uint Encoding;
|
||||
public readonly InstName Name;
|
||||
public readonly Action<CodeGenContext, uint> EmitFunc;
|
||||
public readonly InstFlags Flags;
|
||||
|
||||
public InstInfo(uint encoding, InstName name, Action<CodeGenContext, uint> emitFunc, InstFlags flags)
|
||||
{
|
||||
Encoding = encoding;
|
||||
Name = name;
|
||||
EmitFunc = emitFunc;
|
||||
Flags = flags;
|
||||
}
|
||||
}
|
||||
}
|
79
src/Ryujinx.Cpu/LightningJit/Arm32/InstInfoForTable.cs
Normal file
79
src/Ryujinx.Cpu/LightningJit/Arm32/InstInfoForTable.cs
Normal file
@ -0,0 +1,79 @@
|
||||
using Ryujinx.Cpu.LightningJit.Table;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
readonly struct InstInfoForTable : IInstInfo
|
||||
{
|
||||
public uint Encoding { get; }
|
||||
public uint EncodingMask { get; }
|
||||
public InstEncoding[] Constraints { get; }
|
||||
public InstMeta Meta { get; }
|
||||
public IsaVersion Version => Meta.Version;
|
||||
public IsaFeature Feature => Meta.Feature;
|
||||
|
||||
public InstInfoForTable(
|
||||
uint encoding,
|
||||
uint encodingMask,
|
||||
InstEncoding[] constraints,
|
||||
InstName name,
|
||||
Action<CodeGenContext, uint> emitFunc,
|
||||
IsaVersion isaVersion,
|
||||
IsaFeature isaFeature,
|
||||
InstFlags flags)
|
||||
{
|
||||
Encoding = encoding;
|
||||
EncodingMask = encodingMask;
|
||||
Constraints = constraints;
|
||||
Meta = new(name, emitFunc, isaVersion, isaFeature, flags);
|
||||
}
|
||||
|
||||
public InstInfoForTable(
|
||||
uint encoding,
|
||||
uint encodingMask,
|
||||
InstEncoding[] constraints,
|
||||
InstName name,
|
||||
Action<CodeGenContext, uint> emitFunc,
|
||||
IsaVersion isaVersion,
|
||||
InstFlags flags) : this(encoding, encodingMask, constraints, name, emitFunc, isaVersion, IsaFeature.None, flags)
|
||||
{
|
||||
}
|
||||
|
||||
public InstInfoForTable(
|
||||
uint encoding,
|
||||
uint encodingMask,
|
||||
InstName name,
|
||||
Action<CodeGenContext, uint> emitFunc,
|
||||
IsaVersion isaVersion,
|
||||
IsaFeature isaFeature,
|
||||
InstFlags flags) : this(encoding, encodingMask, null, name, emitFunc, isaVersion, isaFeature, flags)
|
||||
{
|
||||
}
|
||||
|
||||
public InstInfoForTable(
|
||||
uint encoding,
|
||||
uint encodingMask,
|
||||
InstName name,
|
||||
Action<CodeGenContext, uint> emitFunc,
|
||||
IsaVersion isaVersion,
|
||||
InstFlags flags) : this(encoding, encodingMask, null, name, emitFunc, isaVersion, IsaFeature.None, flags)
|
||||
{
|
||||
}
|
||||
|
||||
public bool IsConstrained(uint encoding)
|
||||
{
|
||||
if (Constraints != null)
|
||||
{
|
||||
foreach (InstEncoding constraint in Constraints)
|
||||
{
|
||||
if ((encoding & constraint.EncodingMask) == constraint.Encoding)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
22
src/Ryujinx.Cpu/LightningJit/Arm32/InstMeta.cs
Normal file
22
src/Ryujinx.Cpu/LightningJit/Arm32/InstMeta.cs
Normal file
@ -0,0 +1,22 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
readonly struct InstMeta
|
||||
{
|
||||
public readonly InstName Name;
|
||||
public readonly Action<CodeGenContext, uint> EmitFunc;
|
||||
public readonly IsaVersion Version;
|
||||
public readonly IsaFeature Feature;
|
||||
public readonly InstFlags Flags;
|
||||
|
||||
public InstMeta(InstName name, Action<CodeGenContext, uint> emitFunc, IsaVersion isaVersion, IsaFeature isaFeature, InstFlags flags)
|
||||
{
|
||||
Name = name;
|
||||
EmitFunc = emitFunc;
|
||||
Version = isaVersion;
|
||||
Feature = isaFeature;
|
||||
Flags = flags;
|
||||
}
|
||||
}
|
||||
}
|
562
src/Ryujinx.Cpu/LightningJit/Arm32/InstName.cs
Normal file
562
src/Ryujinx.Cpu/LightningJit/Arm32/InstName.cs
Normal file
@ -0,0 +1,562 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
enum InstName
|
||||
{
|
||||
AdcI,
|
||||
AdcR,
|
||||
AdcRr,
|
||||
AddI,
|
||||
AddR,
|
||||
AddRr,
|
||||
AddSpI,
|
||||
AddSpR,
|
||||
Adr,
|
||||
Aesd,
|
||||
Aese,
|
||||
Aesimc,
|
||||
Aesmc,
|
||||
AndI,
|
||||
AndR,
|
||||
AndRr,
|
||||
B,
|
||||
Bfc,
|
||||
Bfi,
|
||||
BicI,
|
||||
BicR,
|
||||
BicRr,
|
||||
Bkpt,
|
||||
BlxR,
|
||||
BlI,
|
||||
Bx,
|
||||
Bxj,
|
||||
Cbnz,
|
||||
Clrbhb,
|
||||
Clrex,
|
||||
Clz,
|
||||
CmnI,
|
||||
CmnR,
|
||||
CmnRr,
|
||||
CmpI,
|
||||
CmpR,
|
||||
CmpRr,
|
||||
Cps,
|
||||
Crc32,
|
||||
Crc32c,
|
||||
Csdb,
|
||||
Dbg,
|
||||
Dcps1,
|
||||
Dcps2,
|
||||
Dcps3,
|
||||
Dmb,
|
||||
Dsb,
|
||||
EorI,
|
||||
EorR,
|
||||
EorRr,
|
||||
Eret,
|
||||
Esb,
|
||||
Fldmx,
|
||||
Fstmx,
|
||||
Hlt,
|
||||
Hvc,
|
||||
Isb,
|
||||
It,
|
||||
Lda,
|
||||
Ldab,
|
||||
Ldaex,
|
||||
Ldaexb,
|
||||
Ldaexd,
|
||||
Ldaexh,
|
||||
Ldah,
|
||||
LdcI,
|
||||
LdcL,
|
||||
Ldm,
|
||||
Ldmda,
|
||||
Ldmdb,
|
||||
Ldmib,
|
||||
LdmE,
|
||||
LdmU,
|
||||
Ldrbt,
|
||||
LdrbI,
|
||||
LdrbL,
|
||||
LdrbR,
|
||||
LdrdI,
|
||||
LdrdL,
|
||||
LdrdR,
|
||||
Ldrex,
|
||||
Ldrexb,
|
||||
Ldrexd,
|
||||
Ldrexh,
|
||||
Ldrht,
|
||||
LdrhI,
|
||||
LdrhL,
|
||||
LdrhR,
|
||||
Ldrsbt,
|
||||
LdrsbI,
|
||||
LdrsbL,
|
||||
LdrsbR,
|
||||
Ldrsht,
|
||||
LdrshI,
|
||||
LdrshL,
|
||||
LdrshR,
|
||||
Ldrt,
|
||||
LdrI,
|
||||
LdrL,
|
||||
LdrR,
|
||||
Mcr,
|
||||
Mcrr,
|
||||
Mla,
|
||||
Mls,
|
||||
Movt,
|
||||
MovI,
|
||||
MovR,
|
||||
MovRr,
|
||||
Mrc,
|
||||
Mrrc,
|
||||
Mrs,
|
||||
MrsBr,
|
||||
MsrBr,
|
||||
MsrI,
|
||||
MsrR,
|
||||
Mul,
|
||||
MvnI,
|
||||
MvnR,
|
||||
MvnRr,
|
||||
Nop,
|
||||
OrnI,
|
||||
OrnR,
|
||||
OrrI,
|
||||
OrrR,
|
||||
OrrRr,
|
||||
Pkh,
|
||||
PldI,
|
||||
PldL,
|
||||
PldR,
|
||||
PliI,
|
||||
PliR,
|
||||
Pop,
|
||||
Pssbb,
|
||||
Push,
|
||||
Qadd,
|
||||
Qadd16,
|
||||
Qadd8,
|
||||
Qasx,
|
||||
Qdadd,
|
||||
Qdsub,
|
||||
Qsax,
|
||||
Qsub,
|
||||
Qsub16,
|
||||
Qsub8,
|
||||
Rbit,
|
||||
Rev,
|
||||
Rev16,
|
||||
Revsh,
|
||||
Rfe,
|
||||
RsbI,
|
||||
RsbR,
|
||||
RsbRr,
|
||||
RscI,
|
||||
RscR,
|
||||
RscRr,
|
||||
Sadd16,
|
||||
Sadd8,
|
||||
Sasx,
|
||||
Sb,
|
||||
SbcI,
|
||||
SbcR,
|
||||
SbcRr,
|
||||
Sbfx,
|
||||
Sdiv,
|
||||
Sel,
|
||||
Setend,
|
||||
Setpan,
|
||||
Sev,
|
||||
Sevl,
|
||||
Sha1c,
|
||||
Sha1h,
|
||||
Sha1m,
|
||||
Sha1p,
|
||||
Sha1su0,
|
||||
Sha1su1,
|
||||
Sha256h,
|
||||
Sha256h2,
|
||||
Sha256su0,
|
||||
Sha256su1,
|
||||
Shadd16,
|
||||
Shadd8,
|
||||
Shasx,
|
||||
Shsax,
|
||||
Shsub16,
|
||||
Shsub8,
|
||||
Smc,
|
||||
Smlabb,
|
||||
Smlad,
|
||||
Smlal,
|
||||
Smlalbb,
|
||||
Smlald,
|
||||
Smlawb,
|
||||
Smlsd,
|
||||
Smlsld,
|
||||
Smmla,
|
||||
Smmls,
|
||||
Smmul,
|
||||
Smuad,
|
||||
Smulbb,
|
||||
Smull,
|
||||
Smulwb,
|
||||
Smusd,
|
||||
Srs,
|
||||
Ssat,
|
||||
Ssat16,
|
||||
Ssax,
|
||||
Ssbb,
|
||||
Ssub16,
|
||||
Ssub8,
|
||||
Stc,
|
||||
Stl,
|
||||
Stlb,
|
||||
Stlex,
|
||||
Stlexb,
|
||||
Stlexd,
|
||||
Stlexh,
|
||||
Stlh,
|
||||
Stm,
|
||||
Stmda,
|
||||
Stmdb,
|
||||
Stmib,
|
||||
StmU,
|
||||
Strbt,
|
||||
StrbI,
|
||||
StrbR,
|
||||
StrdI,
|
||||
StrdR,
|
||||
Strex,
|
||||
Strexb,
|
||||
Strexd,
|
||||
Strexh,
|
||||
Strht,
|
||||
StrhI,
|
||||
StrhR,
|
||||
Strt,
|
||||
StrI,
|
||||
StrR,
|
||||
SubI,
|
||||
SubR,
|
||||
SubRr,
|
||||
SubSpI,
|
||||
SubSpR,
|
||||
Svc,
|
||||
Sxtab,
|
||||
Sxtab16,
|
||||
Sxtah,
|
||||
Sxtb,
|
||||
Sxtb16,
|
||||
Sxth,
|
||||
Tbb,
|
||||
TeqI,
|
||||
TeqR,
|
||||
TeqRr,
|
||||
Tsb,
|
||||
TstI,
|
||||
TstR,
|
||||
TstRr,
|
||||
Uadd16,
|
||||
Uadd8,
|
||||
Uasx,
|
||||
Ubfx,
|
||||
Udf,
|
||||
Udiv,
|
||||
Uhadd16,
|
||||
Uhadd8,
|
||||
Uhasx,
|
||||
Uhsax,
|
||||
Uhsub16,
|
||||
Uhsub8,
|
||||
Umaal,
|
||||
Umlal,
|
||||
Umull,
|
||||
Uqadd16,
|
||||
Uqadd8,
|
||||
Uqasx,
|
||||
Uqsax,
|
||||
Uqsub16,
|
||||
Uqsub8,
|
||||
Usad8,
|
||||
Usada8,
|
||||
Usat,
|
||||
Usat16,
|
||||
Usax,
|
||||
Usub16,
|
||||
Usub8,
|
||||
Uxtab,
|
||||
Uxtab16,
|
||||
Uxtah,
|
||||
Uxtb,
|
||||
Uxtb16,
|
||||
Uxth,
|
||||
Vaba,
|
||||
Vabal,
|
||||
VabdlI,
|
||||
VabdF,
|
||||
VabdI,
|
||||
Vabs,
|
||||
Vacge,
|
||||
Vacgt,
|
||||
Vaddhn,
|
||||
Vaddl,
|
||||
Vaddw,
|
||||
VaddF,
|
||||
VaddI,
|
||||
VandR,
|
||||
VbicI,
|
||||
VbicR,
|
||||
Vbif,
|
||||
Vbit,
|
||||
Vbsl,
|
||||
Vcadd,
|
||||
VceqI,
|
||||
VceqR,
|
||||
VcgeI,
|
||||
VcgeR,
|
||||
VcgtI,
|
||||
VcgtR,
|
||||
VcleI,
|
||||
Vcls,
|
||||
VcltI,
|
||||
Vclz,
|
||||
Vcmla,
|
||||
VcmlaS,
|
||||
Vcmp,
|
||||
Vcmpe,
|
||||
Vcnt,
|
||||
VcvtaAsimd,
|
||||
VcvtaVfp,
|
||||
Vcvtb,
|
||||
VcvtbBfs,
|
||||
VcvtmAsimd,
|
||||
VcvtmVfp,
|
||||
VcvtnAsimd,
|
||||
VcvtnVfp,
|
||||
VcvtpAsimd,
|
||||
VcvtpVfp,
|
||||
VcvtrIv,
|
||||
Vcvtt,
|
||||
VcvttBfs,
|
||||
VcvtBfs,
|
||||
VcvtDs,
|
||||
VcvtHs,
|
||||
VcvtIs,
|
||||
VcvtIv,
|
||||
VcvtVi,
|
||||
VcvtXs,
|
||||
VcvtXv,
|
||||
Vdiv,
|
||||
Vdot,
|
||||
VdotS,
|
||||
VdupR,
|
||||
VdupS,
|
||||
Veor,
|
||||
Vext,
|
||||
Vfma,
|
||||
Vfmal,
|
||||
VfmalS,
|
||||
VfmaBf,
|
||||
VfmaBfs,
|
||||
Vfms,
|
||||
Vfmsl,
|
||||
VfmslS,
|
||||
Vfnma,
|
||||
Vfnms,
|
||||
Vhadd,
|
||||
Vhsub,
|
||||
Vins,
|
||||
Vjcvt,
|
||||
Vld11,
|
||||
Vld1A,
|
||||
Vld1M,
|
||||
Vld21,
|
||||
Vld2A,
|
||||
Vld2M,
|
||||
Vld31,
|
||||
Vld3A,
|
||||
Vld3M,
|
||||
Vld41,
|
||||
Vld4A,
|
||||
Vld4M,
|
||||
Vldm,
|
||||
VldrI,
|
||||
VldrL,
|
||||
Vmaxnm,
|
||||
VmaxF,
|
||||
VmaxI,
|
||||
Vminnm,
|
||||
VminF,
|
||||
VminI,
|
||||
VmlalI,
|
||||
VmlalS,
|
||||
VmlaF,
|
||||
VmlaI,
|
||||
VmlaS,
|
||||
VmlslI,
|
||||
VmlslS,
|
||||
VmlsF,
|
||||
VmlsI,
|
||||
VmlsS,
|
||||
Vmmla,
|
||||
Vmovl,
|
||||
Vmovn,
|
||||
Vmovx,
|
||||
VmovD,
|
||||
VmovH,
|
||||
VmovI,
|
||||
VmovR,
|
||||
VmovRs,
|
||||
VmovS,
|
||||
VmovSr,
|
||||
VmovSs,
|
||||
Vmrs,
|
||||
Vmsr,
|
||||
VmullI,
|
||||
VmullS,
|
||||
VmulF,
|
||||
VmulI,
|
||||
VmulS,
|
||||
VmvnI,
|
||||
VmvnR,
|
||||
Vneg,
|
||||
Vnmla,
|
||||
Vnmls,
|
||||
Vnmul,
|
||||
VornR,
|
||||
VorrI,
|
||||
VorrR,
|
||||
Vpadal,
|
||||
Vpaddl,
|
||||
VpaddF,
|
||||
VpaddI,
|
||||
VpmaxF,
|
||||
VpmaxI,
|
||||
VpminF,
|
||||
VpminI,
|
||||
Vqabs,
|
||||
Vqadd,
|
||||
Vqdmlal,
|
||||
Vqdmlsl,
|
||||
Vqdmulh,
|
||||
Vqdmull,
|
||||
Vqmovn,
|
||||
Vqneg,
|
||||
Vqrdmlah,
|
||||
Vqrdmlsh,
|
||||
Vqrdmulh,
|
||||
Vqrshl,
|
||||
Vqrshrn,
|
||||
VqshlI,
|
||||
VqshlR,
|
||||
Vqshrn,
|
||||
Vqsub,
|
||||
Vraddhn,
|
||||
Vrecpe,
|
||||
Vrecps,
|
||||
Vrev16,
|
||||
Vrev32,
|
||||
Vrev64,
|
||||
Vrhadd,
|
||||
VrintaAsimd,
|
||||
VrintaVfp,
|
||||
VrintmAsimd,
|
||||
VrintmVfp,
|
||||
VrintnAsimd,
|
||||
VrintnVfp,
|
||||
VrintpAsimd,
|
||||
VrintpVfp,
|
||||
VrintrVfp,
|
||||
VrintxAsimd,
|
||||
VrintxVfp,
|
||||
VrintzAsimd,
|
||||
VrintzVfp,
|
||||
Vrshl,
|
||||
Vrshr,
|
||||
Vrshrn,
|
||||
Vrsqrte,
|
||||
Vrsqrts,
|
||||
Vrsra,
|
||||
Vrsubhn,
|
||||
Vsdot,
|
||||
VsdotS,
|
||||
Vsel,
|
||||
Vshll,
|
||||
VshlI,
|
||||
VshlR,
|
||||
Vshr,
|
||||
Vshrn,
|
||||
Vsli,
|
||||
Vsmmla,
|
||||
Vsqrt,
|
||||
Vsra,
|
||||
Vsri,
|
||||
Vst11,
|
||||
Vst1M,
|
||||
Vst21,
|
||||
Vst2M,
|
||||
Vst31,
|
||||
Vst3M,
|
||||
Vst41,
|
||||
Vst4M,
|
||||
Vstm,
|
||||
Vstr,
|
||||
Vsubhn,
|
||||
Vsubl,
|
||||
Vsubw,
|
||||
VsubF,
|
||||
VsubI,
|
||||
VsudotS,
|
||||
Vswp,
|
||||
Vtbl,
|
||||
Vtrn,
|
||||
Vtst,
|
||||
Vudot,
|
||||
VudotS,
|
||||
Vummla,
|
||||
Vusdot,
|
||||
VusdotS,
|
||||
Vusmmla,
|
||||
Vuzp,
|
||||
Vzip,
|
||||
Wfe,
|
||||
Wfi,
|
||||
Yield,
|
||||
}
|
||||
|
||||
static class InstNameExtensions
|
||||
{
|
||||
public static bool IsCall(this InstName name)
|
||||
{
|
||||
return name == InstName.BlI || name == InstName.BlxR;
|
||||
}
|
||||
|
||||
public static bool IsSystem(this InstName name)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Mcr:
|
||||
case InstName.Mcrr:
|
||||
case InstName.Mrc:
|
||||
case InstName.Mrs:
|
||||
case InstName.MrsBr:
|
||||
case InstName.MsrBr:
|
||||
case InstName.MsrI:
|
||||
case InstName.MsrR:
|
||||
case InstName.Mrrc:
|
||||
case InstName.Svc:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static bool IsSystemOrCall(this InstName name)
|
||||
{
|
||||
return name.IsSystem() || name.IsCall();
|
||||
}
|
||||
}
|
||||
}
|
1194
src/Ryujinx.Cpu/LightningJit/Arm32/InstTableA32.cs
Normal file
1194
src/Ryujinx.Cpu/LightningJit/Arm32/InstTableA32.cs
Normal file
File diff suppressed because it is too large
Load Diff
146
src/Ryujinx.Cpu/LightningJit/Arm32/InstTableT16.cs
Normal file
146
src/Ryujinx.Cpu/LightningJit/Arm32/InstTableT16.cs
Normal file
@ -0,0 +1,146 @@
|
||||
using Ryujinx.Cpu.LightningJit.Table;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
static class InstTableT16<T> where T : IInstEmit
|
||||
{
|
||||
private static readonly InstTableLevel<InstInfoForTable> _table;
|
||||
|
||||
static InstTableT16()
|
||||
{
|
||||
InstEncoding[] rmRdndnConstraints = new InstEncoding[]
|
||||
{
|
||||
new(0x00680000, 0x00780000),
|
||||
new(0x00850000, 0x00870000),
|
||||
};
|
||||
|
||||
InstEncoding[] rmConstraints = new InstEncoding[]
|
||||
{
|
||||
new(0x00680000, 0x00780000),
|
||||
};
|
||||
|
||||
InstEncoding[] condCondConstraints = new InstEncoding[]
|
||||
{
|
||||
new(0x0E000000, 0x0F000000),
|
||||
new(0x0F000000, 0x0F000000),
|
||||
};
|
||||
|
||||
InstEncoding[] maskConstraints = new InstEncoding[]
|
||||
{
|
||||
new(0x00000000, 0x000F0000),
|
||||
};
|
||||
|
||||
InstEncoding[] opConstraints = new InstEncoding[]
|
||||
{
|
||||
new(0x18000000, 0x18000000),
|
||||
};
|
||||
|
||||
InstEncoding[] opOpOpOpConstraints = new InstEncoding[]
|
||||
{
|
||||
new(0x00000000, 0x03C00000),
|
||||
new(0x00400000, 0x03C00000),
|
||||
new(0x01400000, 0x03C00000),
|
||||
new(0x01800000, 0x03C00000),
|
||||
};
|
||||
|
||||
List<InstInfoForTable> insts = new()
|
||||
{
|
||||
new(0x41400000, 0xFFC00000, InstName.AdcR, T.AdcRT1, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0x1C000000, 0xFE000000, InstName.AddI, T.AddIT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x30000000, 0xF8000000, InstName.AddI, T.AddIT2, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0x18000000, 0xFE000000, InstName.AddR, T.AddRT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x44000000, 0xFF000000, rmRdndnConstraints, InstName.AddR, T.AddRT2, IsaVersion.v80, InstFlags.RdnDn),
|
||||
new(0xA8000000, 0xF8000000, InstName.AddSpI, T.AddSpIT1, IsaVersion.v80, InstFlags.RdRd16),
|
||||
new(0xB0000000, 0xFF800000, InstName.AddSpI, T.AddSpIT2, IsaVersion.v80, InstFlags.None),
|
||||
new(0x44680000, 0xFF780000, InstName.AddSpR, T.AddSpRT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x44850000, 0xFF870000, rmConstraints, InstName.AddSpR, T.AddSpRT2, IsaVersion.v80, InstFlags.None),
|
||||
new(0xA0000000, 0xF8000000, InstName.Adr, T.AdrT1, IsaVersion.v80, InstFlags.RdRd16),
|
||||
new(0x40000000, 0xFFC00000, InstName.AndR, T.AndRT1, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0xD0000000, 0xF0000000, condCondConstraints, InstName.B, T.BT1, IsaVersion.v80, InstFlags.Cond),
|
||||
new(0xE0000000, 0xF8000000, InstName.B, T.BT2, IsaVersion.v80, InstFlags.None),
|
||||
new(0x43800000, 0xFFC00000, InstName.BicR, T.BicRT1, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0xBE000000, 0xFF000000, InstName.Bkpt, T.BkptT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x47800000, 0xFF870000, InstName.BlxR, T.BlxRT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x47000000, 0xFF870000, InstName.Bx, T.BxT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xB1000000, 0xF5000000, InstName.Cbnz, T.CbnzT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x42C00000, 0xFFC00000, InstName.CmnR, T.CmnRT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x28000000, 0xF8000000, InstName.CmpI, T.CmpIT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x42800000, 0xFFC00000, InstName.CmpR, T.CmpRT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x45000000, 0xFF000000, InstName.CmpR, T.CmpRT2, IsaVersion.v80, InstFlags.None),
|
||||
new(0xB6600000, 0xFFE80000, InstName.Cps, T.CpsT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x40400000, 0xFFC00000, InstName.EorR, T.EorRT1, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0xBA800000, 0xFFC00000, InstName.Hlt, T.HltT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xBF000000, 0xFF000000, maskConstraints, InstName.It, T.ItT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xC8000000, 0xF8000000, InstName.Ldm, T.LdmT1, IsaVersion.v80, InstFlags.Rlist),
|
||||
new(0x78000000, 0xF8000000, InstName.LdrbI, T.LdrbIT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x5C000000, 0xFE000000, InstName.LdrbR, T.LdrbRT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x88000000, 0xF8000000, InstName.LdrhI, T.LdrhIT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x5A000000, 0xFE000000, InstName.LdrhR, T.LdrhRT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x56000000, 0xFE000000, InstName.LdrsbR, T.LdrsbRT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x5E000000, 0xFE000000, InstName.LdrshR, T.LdrshRT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x68000000, 0xF8000000, InstName.LdrI, T.LdrIT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x98000000, 0xF8000000, InstName.LdrI, T.LdrIT2, IsaVersion.v80, InstFlags.RtRd16),
|
||||
new(0x48000000, 0xF8000000, InstName.LdrL, T.LdrLT1, IsaVersion.v80, InstFlags.RtRd16),
|
||||
new(0x58000000, 0xFE000000, InstName.LdrR, T.LdrRT1, IsaVersion.v80, InstFlags.Rt),
|
||||
new(0x20000000, 0xF8000000, InstName.MovI, T.MovIT1, IsaVersion.v80, InstFlags.RdRd16),
|
||||
new(0x46000000, 0xFF000000, InstName.MovR, T.MovRT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x00000000, 0xE0000000, opConstraints, InstName.MovR, T.MovRT2, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x40000000, 0xFE000000, opOpOpOpConstraints, InstName.MovRr, T.MovRrT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x43400000, 0xFFC00000, InstName.Mul, T.MulT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x43C00000, 0xFFC00000, InstName.MvnR, T.MvnRT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xBF000000, 0xFFFF0000, InstName.Nop, T.NopT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0x43000000, 0xFFC00000, InstName.OrrR, T.OrrRT1, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0xBC000000, 0xFE000000, InstName.Pop, T.PopT1, IsaVersion.v80, InstFlags.Rlist),
|
||||
new(0xB4000000, 0xFE000000, InstName.Push, T.PushT1, IsaVersion.v80, InstFlags.RlistRead),
|
||||
new(0xBA000000, 0xFFC00000, InstName.Rev, T.RevT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xBA400000, 0xFFC00000, InstName.Rev16, T.Rev16T1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xBAC00000, 0xFFC00000, InstName.Revsh, T.RevshT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x42400000, 0xFFC00000, InstName.RsbI, T.RsbIT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x41800000, 0xFFC00000, InstName.SbcR, T.SbcRT1, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0xB6500000, 0xFFF70000, InstName.Setend, T.SetendT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xB6100000, 0xFFF70000, InstName.Setpan, T.SetpanT1, IsaVersion.v81, IsaFeature.FeatPan, InstFlags.None),
|
||||
new(0xBF400000, 0xFFFF0000, InstName.Sev, T.SevT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xBF500000, 0xFFFF0000, InstName.Sevl, T.SevlT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xC0000000, 0xF8000000, InstName.Stm, T.StmT1, IsaVersion.v80, InstFlags.RlistRead),
|
||||
new(0x70000000, 0xF8000000, InstName.StrbI, T.StrbIT1, IsaVersion.v80, InstFlags.RtRead),
|
||||
new(0x54000000, 0xFE000000, InstName.StrbR, T.StrbRT1, IsaVersion.v80, InstFlags.RtRead),
|
||||
new(0x80000000, 0xF8000000, InstName.StrhI, T.StrhIT1, IsaVersion.v80, InstFlags.RtRead),
|
||||
new(0x52000000, 0xFE000000, InstName.StrhR, T.StrhRT1, IsaVersion.v80, InstFlags.RtRead),
|
||||
new(0x60000000, 0xF8000000, InstName.StrI, T.StrIT1, IsaVersion.v80, InstFlags.RtRead),
|
||||
new(0x90000000, 0xF8000000, InstName.StrI, T.StrIT2, IsaVersion.v80, InstFlags.RtReadRd16),
|
||||
new(0x50000000, 0xFE000000, InstName.StrR, T.StrRT1, IsaVersion.v80, InstFlags.RtRead),
|
||||
new(0x1E000000, 0xFE000000, InstName.SubI, T.SubIT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x38000000, 0xF8000000, InstName.SubI, T.SubIT2, IsaVersion.v80, InstFlags.Rdn),
|
||||
new(0x1A000000, 0xFE000000, InstName.SubR, T.SubRT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xB0800000, 0xFF800000, InstName.SubSpI, T.SubSpIT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xDF000000, 0xFF000000, InstName.Svc, T.SvcT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xB2400000, 0xFFC00000, InstName.Sxtb, T.SxtbT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xB2000000, 0xFFC00000, InstName.Sxth, T.SxthT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0x42000000, 0xFFC00000, InstName.TstR, T.TstRT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xDE000000, 0xFF000000, InstName.Udf, T.UdfT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xB2C00000, 0xFFC00000, InstName.Uxtb, T.UxtbT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xB2800000, 0xFFC00000, InstName.Uxth, T.UxthT1, IsaVersion.v80, InstFlags.Rd),
|
||||
new(0xBF200000, 0xFFFF0000, InstName.Wfe, T.WfeT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xBF300000, 0xFFFF0000, InstName.Wfi, T.WfiT1, IsaVersion.v80, InstFlags.None),
|
||||
new(0xBF100000, 0xFFFF0000, InstName.Yield, T.YieldT1, IsaVersion.v80, InstFlags.None),
|
||||
};
|
||||
|
||||
_table = new(insts);
|
||||
}
|
||||
|
||||
public static bool TryGetMeta(uint encoding, IsaVersion version, IsaFeature features, out InstMeta meta)
|
||||
{
|
||||
if (_table.TryFind(encoding, version, features, out InstInfoForTable info))
|
||||
{
|
||||
meta = info.Meta;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
meta = new(InstName.Udf, T.UdfA1, IsaVersion.v80, IsaFeature.None, InstFlags.None);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
1212
src/Ryujinx.Cpu/LightningJit/Arm32/InstTableT32.cs
Normal file
1212
src/Ryujinx.Cpu/LightningJit/Arm32/InstTableT32.cs
Normal file
File diff suppressed because it is too large
Load Diff
31
src/Ryujinx.Cpu/LightningJit/Arm32/MultiBlock.cs
Normal file
31
src/Ryujinx.Cpu/LightningJit/Arm32/MultiBlock.cs
Normal file
@ -0,0 +1,31 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
class MultiBlock
|
||||
{
|
||||
public readonly List<Block> Blocks;
|
||||
public readonly bool HasHostCall;
|
||||
public readonly bool IsTruncated;
|
||||
|
||||
public MultiBlock(List<Block> blocks)
|
||||
{
|
||||
Blocks = blocks;
|
||||
|
||||
Block block = blocks[0];
|
||||
|
||||
HasHostCall = block.HasHostCall;
|
||||
|
||||
for (int index = 1; index < blocks.Count; index++)
|
||||
{
|
||||
block = blocks[index];
|
||||
|
||||
HasHostCall |= block.HasHostCall;
|
||||
}
|
||||
|
||||
block = blocks[^1];
|
||||
|
||||
IsTruncated = block.IsTruncated;
|
||||
}
|
||||
}
|
||||
}
|
20
src/Ryujinx.Cpu/LightningJit/Arm32/PendingBranch.cs
Normal file
20
src/Ryujinx.Cpu/LightningJit/Arm32/PendingBranch.cs
Normal file
@ -0,0 +1,20 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
readonly struct PendingBranch
|
||||
{
|
||||
public readonly BranchType BranchType;
|
||||
public readonly uint TargetAddress;
|
||||
public readonly uint NextAddress;
|
||||
public readonly InstName Name;
|
||||
public readonly int WriterPointer;
|
||||
|
||||
public PendingBranch(BranchType branchType, uint targetAddress, uint nextAddress, InstName name, int writerPointer)
|
||||
{
|
||||
BranchType = branchType;
|
||||
TargetAddress = targetAddress;
|
||||
NextAddress = nextAddress;
|
||||
Name = name;
|
||||
WriterPointer = writerPointer;
|
||||
}
|
||||
}
|
||||
}
|
169
src/Ryujinx.Cpu/LightningJit/Arm32/RegisterAllocator.cs
Normal file
169
src/Ryujinx.Cpu/LightningJit/Arm32/RegisterAllocator.cs
Normal file
@ -0,0 +1,169 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
class RegisterAllocator
|
||||
{
|
||||
public const int MaxTemps = 1;
|
||||
|
||||
private uint _gprMask;
|
||||
private uint _fpSimdMask;
|
||||
|
||||
public int FixedContextRegister { get; }
|
||||
public int FixedPageTableRegister { get; }
|
||||
|
||||
public uint UsedGprsMask { get; private set; }
|
||||
public uint UsedFpSimdMask { get; private set; }
|
||||
|
||||
public RegisterAllocator()
|
||||
{
|
||||
_gprMask = ushort.MaxValue;
|
||||
_fpSimdMask = ushort.MaxValue;
|
||||
|
||||
FixedContextRegister = AllocateTempRegisterWithPreferencing();
|
||||
FixedPageTableRegister = AllocateTempRegisterWithPreferencing();
|
||||
}
|
||||
|
||||
public void MarkGprAsUsed(int index)
|
||||
{
|
||||
UsedGprsMask |= 1u << index;
|
||||
}
|
||||
|
||||
public void MarkFpSimdAsUsed(int index)
|
||||
{
|
||||
UsedFpSimdMask |= 1u << index;
|
||||
}
|
||||
|
||||
public void MarkFpSimdRangeAsUsed(int index, int count)
|
||||
{
|
||||
UsedFpSimdMask |= (uint.MaxValue >> (32 - count)) << index;
|
||||
}
|
||||
|
||||
public Operand RemapGprRegister(int index)
|
||||
{
|
||||
MarkGprAsUsed(index);
|
||||
|
||||
return new Operand(OperandKind.Register, OperandType.I32, (ulong)index);
|
||||
}
|
||||
|
||||
public Operand RemapFpRegister(int index, bool isFP32)
|
||||
{
|
||||
MarkFpSimdAsUsed(index);
|
||||
|
||||
return new Operand(OperandKind.Register, isFP32 ? OperandType.FP32 : OperandType.FP64, (ulong)index);
|
||||
}
|
||||
|
||||
public Operand RemapSimdRegister(int index)
|
||||
{
|
||||
MarkFpSimdAsUsed(index);
|
||||
|
||||
return new Operand(OperandKind.Register, OperandType.V128, (ulong)index);
|
||||
}
|
||||
|
||||
public Operand RemapSimdRegister(int index, int count)
|
||||
{
|
||||
MarkFpSimdRangeAsUsed(index, count);
|
||||
|
||||
return new Operand(OperandKind.Register, OperandType.V128, (ulong)index);
|
||||
}
|
||||
|
||||
public void EnsureTempGprRegisters(int count)
|
||||
{
|
||||
if (count != 0)
|
||||
{
|
||||
Span<int> registers = stackalloc int[count];
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
registers[index] = AllocateTempGprRegister();
|
||||
}
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
FreeTempGprRegister(registers[index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int AllocateTempGprRegister()
|
||||
{
|
||||
int index = AllocateTempRegister(ref _gprMask, AbiConstants.ReservedRegsMask);
|
||||
|
||||
MarkGprAsUsed(index);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
private int AllocateTempRegisterWithPreferencing()
|
||||
{
|
||||
int firstCalleeSaved = BitOperations.TrailingZeroCount(~_gprMask & AbiConstants.GprCalleeSavedRegsMask);
|
||||
if (firstCalleeSaved < 32)
|
||||
{
|
||||
uint regMask = 1u << firstCalleeSaved;
|
||||
if ((regMask & AbiConstants.ReservedRegsMask) == 0)
|
||||
{
|
||||
_gprMask |= regMask;
|
||||
|
||||
return firstCalleeSaved;
|
||||
}
|
||||
}
|
||||
|
||||
return AllocateTempRegister(ref _gprMask, AbiConstants.ReservedRegsMask);
|
||||
}
|
||||
|
||||
public int AllocateTempFpSimdRegister()
|
||||
{
|
||||
int index = AllocateTempRegister(ref _fpSimdMask, 0);
|
||||
|
||||
MarkFpSimdAsUsed(index);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
public ScopedRegister AllocateTempGprRegisterScoped()
|
||||
{
|
||||
return new(this, new(OperandKind.Register, OperandType.I32, (ulong)AllocateTempGprRegister()));
|
||||
}
|
||||
|
||||
public ScopedRegister AllocateTempFpRegisterScoped(bool isFP32)
|
||||
{
|
||||
return new(this, new(OperandKind.Register, isFP32 ? OperandType.FP32 : OperandType.FP64, (ulong)AllocateTempFpSimdRegister()));
|
||||
}
|
||||
|
||||
public ScopedRegister AllocateTempSimdRegisterScoped()
|
||||
{
|
||||
return new(this, new(OperandKind.Register, OperandType.V128, (ulong)AllocateTempFpSimdRegister()));
|
||||
}
|
||||
|
||||
public void FreeTempGprRegister(int index)
|
||||
{
|
||||
FreeTempRegister(ref _gprMask, index);
|
||||
}
|
||||
|
||||
public void FreeTempFpSimdRegister(int index)
|
||||
{
|
||||
FreeTempRegister(ref _fpSimdMask, index);
|
||||
}
|
||||
|
||||
private static int AllocateTempRegister(ref uint mask, uint reservedMask)
|
||||
{
|
||||
int index = BitOperations.TrailingZeroCount(~(mask | reservedMask));
|
||||
if (index == sizeof(uint) * 8)
|
||||
{
|
||||
throw new InvalidOperationException("No free registers.");
|
||||
}
|
||||
|
||||
mask |= 1u << index;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
private static void FreeTempRegister(ref uint mask, int index)
|
||||
{
|
||||
mask &= ~(1u << index);
|
||||
}
|
||||
}
|
||||
}
|
109
src/Ryujinx.Cpu/LightningJit/Arm32/RegisterUtils.cs
Normal file
109
src/Ryujinx.Cpu/LightningJit/Arm32/RegisterUtils.cs
Normal file
@ -0,0 +1,109 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
static class RegisterUtils
|
||||
{
|
||||
public const int SpRegister = 13;
|
||||
public const int LrRegister = 14;
|
||||
public const int PcRegister = 15;
|
||||
|
||||
private const int RmBit = 0;
|
||||
private const int RdRtBit = 12;
|
||||
private const int RdHiRnBit = 16;
|
||||
|
||||
private const int RdRtT16Bit = 16;
|
||||
private const int RdRtT16AltBit = 24;
|
||||
|
||||
private const int RdRt2RdHiT32Bit = 8;
|
||||
private const int RdT32AltBit = 0;
|
||||
private const int RtRdLoT32Bit = 12;
|
||||
|
||||
public static int ExtractRt(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdRtBit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRt2(uint encoding)
|
||||
{
|
||||
return (int)GetRt2((uint)ExtractRt(encoding));
|
||||
}
|
||||
|
||||
public static int ExtractRd(InstFlags flags, uint encoding)
|
||||
{
|
||||
return flags.HasFlag(InstFlags.Rd16) ? ExtractRn(encoding) : ExtractRd(encoding);
|
||||
}
|
||||
|
||||
public static int ExtractRd(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdRtBit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRdHi(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdHiRnBit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRn(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdHiRnBit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRm(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RmBit) & 0xf;
|
||||
}
|
||||
|
||||
public static uint GetRt2(uint rt)
|
||||
{
|
||||
return Math.Min(rt + 1, PcRegister);
|
||||
}
|
||||
|
||||
public static int ExtractRdn(InstFlags flags, uint encoding)
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Dn))
|
||||
{
|
||||
return ((int)(encoding >> RdRtT16Bit) & 7) | (int)((encoding >> 4) & 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
return ExtractRdT16(flags, encoding);
|
||||
}
|
||||
}
|
||||
|
||||
public static int ExtractRdT16(InstFlags flags, uint encoding)
|
||||
{
|
||||
return flags.HasFlag(InstFlags.Rd16) ? (int)(encoding >> RdRtT16AltBit) & 7 : (int)(encoding >> RdRtT16Bit) & 7;
|
||||
}
|
||||
|
||||
public static int ExtractRtT16(InstFlags flags, uint encoding)
|
||||
{
|
||||
return flags.HasFlag(InstFlags.Rd16) ? (int)(encoding >> RdRtT16AltBit) & 7 : (int)(encoding >> RdRtT16Bit) & 7;
|
||||
}
|
||||
|
||||
public static int ExtractRdT32(InstFlags flags, uint encoding)
|
||||
{
|
||||
return flags.HasFlag(InstFlags.Rd16) ? (int)(encoding >> RdT32AltBit) & 0xf : (int)(encoding >> RdRt2RdHiT32Bit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRdLoT32(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RtRdLoT32Bit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRdHiT32(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdRt2RdHiT32Bit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRtT32(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RtRdLoT32Bit) & 0xf;
|
||||
}
|
||||
|
||||
public static int ExtractRt2T32(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdRt2RdHiT32Bit) & 0xf;
|
||||
}
|
||||
}
|
||||
}
|
39
src/Ryujinx.Cpu/LightningJit/Arm32/ScopedRegister.cs
Normal file
39
src/Ryujinx.Cpu/LightningJit/Arm32/ScopedRegister.cs
Normal file
@ -0,0 +1,39 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32
|
||||
{
|
||||
readonly struct ScopedRegister : IDisposable
|
||||
{
|
||||
private readonly RegisterAllocator _registerAllocator;
|
||||
private readonly Operand _operand;
|
||||
private readonly bool _isAllocated;
|
||||
|
||||
public readonly Operand Operand => _operand;
|
||||
public readonly bool IsAllocated => _isAllocated;
|
||||
|
||||
public ScopedRegister(RegisterAllocator registerAllocator, Operand operand, bool isAllocated = true)
|
||||
{
|
||||
_registerAllocator = registerAllocator;
|
||||
_operand = operand;
|
||||
_isAllocated = isAllocated;
|
||||
}
|
||||
|
||||
public readonly void Dispose()
|
||||
{
|
||||
if (!_isAllocated)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (_operand.Type.IsInteger())
|
||||
{
|
||||
_registerAllocator.FreeTempGprRegister(_operand.AsInt32());
|
||||
}
|
||||
else
|
||||
{
|
||||
_registerAllocator.FreeTempFpSimdRegister(_operand.AsInt32());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
789
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/Compiler.cs
Normal file
789
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/Compiler.cs
Normal file
@ -0,0 +1,789 @@
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class Compiler
|
||||
{
|
||||
public const uint UsableGprsMask = 0x7fff;
|
||||
public const uint UsableFpSimdMask = 0xffff;
|
||||
public const uint UsablePStateMask = 0xf0000000;
|
||||
|
||||
private const int Encodable26BitsOffsetLimit = 0x2000000;
|
||||
|
||||
private readonly struct Context
|
||||
{
|
||||
public readonly CodeWriter Writer;
|
||||
public readonly RegisterAllocator RegisterAllocator;
|
||||
public readonly MemoryManagerType MemoryManagerType;
|
||||
public readonly TailMerger TailMerger;
|
||||
public readonly AddressTable<ulong> FuncTable;
|
||||
public readonly IntPtr DispatchStubPointer;
|
||||
|
||||
private readonly RegisterSaveRestore _registerSaveRestore;
|
||||
private readonly IntPtr _pageTablePointer;
|
||||
|
||||
public Context(
|
||||
CodeWriter writer,
|
||||
RegisterAllocator registerAllocator,
|
||||
MemoryManagerType mmType,
|
||||
TailMerger tailMerger,
|
||||
AddressTable<ulong> funcTable,
|
||||
RegisterSaveRestore registerSaveRestore,
|
||||
IntPtr dispatchStubPointer,
|
||||
IntPtr pageTablePointer)
|
||||
{
|
||||
Writer = writer;
|
||||
RegisterAllocator = registerAllocator;
|
||||
MemoryManagerType = mmType;
|
||||
TailMerger = tailMerger;
|
||||
FuncTable = funcTable;
|
||||
_registerSaveRestore = registerSaveRestore;
|
||||
DispatchStubPointer = dispatchStubPointer;
|
||||
_pageTablePointer = pageTablePointer;
|
||||
}
|
||||
|
||||
public readonly int GetReservedStackOffset()
|
||||
{
|
||||
return _registerSaveRestore.GetReservedStackOffset();
|
||||
}
|
||||
|
||||
public readonly void WritePrologueAt(int instructionPointer)
|
||||
{
|
||||
CodeWriter writer = new();
|
||||
Assembler asm = new(writer);
|
||||
|
||||
_registerSaveRestore.WritePrologue(ref asm);
|
||||
|
||||
// If needed, set up the fixed registers with the pointers we will use.
|
||||
// First one is the context pointer (passed as first argument),
|
||||
// second one is the page table or address space base, it is at a fixed memory location and considered constant.
|
||||
|
||||
if (RegisterAllocator.FixedContextRegister != 0)
|
||||
{
|
||||
asm.Mov(Register(RegisterAllocator.FixedContextRegister), Register(0));
|
||||
}
|
||||
|
||||
asm.Mov(Register(RegisterAllocator.FixedPageTableRegister), (ulong)_pageTablePointer);
|
||||
|
||||
LoadFromContext(ref asm);
|
||||
|
||||
// Write the prologue at the specified position in our writer.
|
||||
Writer.WriteInstructionsAt(instructionPointer, writer);
|
||||
}
|
||||
|
||||
public readonly void WriteEpilogueWithoutContext()
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
_registerSaveRestore.WriteEpilogue(ref asm);
|
||||
}
|
||||
|
||||
public void LoadFromContext()
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
LoadFromContext(ref asm);
|
||||
}
|
||||
|
||||
private void LoadFromContext(ref Assembler asm)
|
||||
{
|
||||
LoadGprFromContext(ref asm, RegisterAllocator.UsedGprsMask & UsableGprsMask, NativeContextOffsets.GprBaseOffset);
|
||||
LoadFpSimdFromContext(ref asm, RegisterAllocator.UsedFpSimdMask & UsableFpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
|
||||
LoadPStateFromContext(ref asm, UsablePStateMask, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public void StoreToContext()
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
StoreToContext(ref asm);
|
||||
}
|
||||
|
||||
private void StoreToContext(ref Assembler asm)
|
||||
{
|
||||
StoreGprToContext(ref asm, RegisterAllocator.UsedGprsMask & UsableGprsMask, NativeContextOffsets.GprBaseOffset);
|
||||
StoreFpSimdToContext(ref asm, RegisterAllocator.UsedFpSimdMask & UsableFpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
|
||||
StorePStateToContext(ref asm, UsablePStateMask, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
private void LoadGprFromContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 8;
|
||||
|
||||
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
mask &= ~(3u << reg);
|
||||
|
||||
asm.LdpRiUn(Register(reg), Register(reg + 1), contextPtr, offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.LdrRiUn(Register(reg), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadFpSimdFromContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 16;
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.LdrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadPStateFromContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
if (mask == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
asm.LdrRiUn(tempRegister.Operand, contextPtr, baseOffset);
|
||||
asm.MsrNzcv(tempRegister.Operand);
|
||||
}
|
||||
|
||||
private void StoreGprToContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 8;
|
||||
|
||||
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
mask &= ~(3u << reg);
|
||||
|
||||
asm.StpRiUn(Register(reg), Register(reg + 1), contextPtr, offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.StrRiUn(Register(reg), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void StoreFpSimdToContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 16;
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.StrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
|
||||
private void StorePStateToContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
if (mask == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempRegister2 = RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
asm.LdrRiUn(tempRegister.Operand, contextPtr, baseOffset);
|
||||
asm.MrsNzcv(tempRegister2.Operand);
|
||||
asm.And(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(0xfffffff));
|
||||
asm.Orr(tempRegister.Operand, tempRegister.Operand, tempRegister2.Operand);
|
||||
asm.StrRiUn(tempRegister.Operand, contextPtr, baseOffset);
|
||||
}
|
||||
}
|
||||
|
||||
public static CompiledFunction Compile(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, AddressTable<ulong> funcTable, IntPtr dispatchStubPtr, bool isThumb)
|
||||
{
|
||||
MultiBlock multiBlock = Decoder<InstEmit>.DecodeMulti(cpuPreset, memoryManager, address, isThumb);
|
||||
|
||||
Dictionary<ulong, int> targets = new();
|
||||
|
||||
CodeWriter writer = new();
|
||||
RegisterAllocator regAlloc = new();
|
||||
Assembler asm = new(writer);
|
||||
CodeGenContext cgContext = new(writer, asm, regAlloc, memoryManager.Type, isThumb);
|
||||
ArmCondition lastCondition = ArmCondition.Al;
|
||||
int lastConditionIp = 0;
|
||||
|
||||
// Required for load/store to context.
|
||||
regAlloc.EnsureTempGprRegisters(2);
|
||||
|
||||
ulong pc = address;
|
||||
|
||||
for (int blockIndex = 0; blockIndex < multiBlock.Blocks.Count; blockIndex++)
|
||||
{
|
||||
Block block = multiBlock.Blocks[blockIndex];
|
||||
|
||||
Debug.Assert(block.Address == pc);
|
||||
|
||||
targets.Add(pc, writer.InstructionPointer);
|
||||
|
||||
for (int index = 0; index < block.Instructions.Count; index++)
|
||||
{
|
||||
InstInfo instInfo = block.Instructions[index];
|
||||
|
||||
if (index < block.Instructions.Count - 1)
|
||||
{
|
||||
cgContext.SetNextInstruction(block.Instructions[index + 1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
cgContext.SetNextInstruction(default);
|
||||
}
|
||||
|
||||
SetConditionalStart(cgContext, ref lastCondition, ref lastConditionIp, instInfo.Name, instInfo.Flags, instInfo.Encoding);
|
||||
|
||||
if (block.IsLoopEnd && index == block.Instructions.Count - 1)
|
||||
{
|
||||
// If this is a loop, the code might run for a long time uninterrupted.
|
||||
// We insert a "sync point" here to ensure the loop can be interrupted if needed.
|
||||
|
||||
cgContext.AddPendingSyncPoint();
|
||||
|
||||
asm.B(0);
|
||||
}
|
||||
|
||||
cgContext.SetPc((uint)pc);
|
||||
|
||||
instInfo.EmitFunc(cgContext, instInfo.Encoding);
|
||||
|
||||
if (cgContext.ConsumeNzcvModified())
|
||||
{
|
||||
ForceConditionalEnd(cgContext, ref lastCondition, lastConditionIp);
|
||||
}
|
||||
|
||||
cgContext.UpdateItState();
|
||||
|
||||
pc += instInfo.Flags.HasFlag(InstFlags.Thumb16) ? 2UL : 4UL;
|
||||
}
|
||||
|
||||
if (Decoder<InstEmit>.WritesToPC(block.Instructions[^1].Encoding, block.Instructions[^1].Name, block.Instructions[^1].Flags, block.IsThumb))
|
||||
{
|
||||
// If the block ends with a PC register write, then we have a branch from register.
|
||||
|
||||
InstEmitCommon.SetThumbFlag(cgContext, regAlloc.RemapGprRegister(RegisterUtils.PcRegister));
|
||||
|
||||
cgContext.AddPendingIndirectBranch(block.Instructions[^1].Name, RegisterUtils.PcRegister);
|
||||
|
||||
asm.B(0);
|
||||
}
|
||||
|
||||
ForceConditionalEnd(cgContext, ref lastCondition, lastConditionIp);
|
||||
}
|
||||
|
||||
RegisterSaveRestore rsr = new(
|
||||
regAlloc.UsedGprsMask & AbiConstants.GprCalleeSavedRegsMask,
|
||||
regAlloc.UsedFpSimdMask & AbiConstants.FpSimdCalleeSavedRegsMask,
|
||||
OperandType.FP64,
|
||||
multiBlock.HasHostCall,
|
||||
multiBlock.HasHostCall ? CalculateStackSizeForCallSpill(regAlloc.UsedGprsMask, regAlloc.UsedFpSimdMask, UsablePStateMask) : 0);
|
||||
|
||||
TailMerger tailMerger = new();
|
||||
|
||||
Context context = new(writer, regAlloc, memoryManager.Type, tailMerger, funcTable, rsr, dispatchStubPtr, memoryManager.PageTablePointer);
|
||||
|
||||
InstInfo lastInstruction = multiBlock.Blocks[^1].Instructions[^1];
|
||||
bool lastInstIsConditional = GetCondition(lastInstruction, isThumb) != ArmCondition.Al;
|
||||
|
||||
if (multiBlock.IsTruncated || lastInstIsConditional || lastInstruction.Name.IsCall() || IsConditionalBranch(lastInstruction))
|
||||
{
|
||||
WriteTailCallConstant(context, ref asm, (uint)pc);
|
||||
}
|
||||
|
||||
IEnumerable<PendingBranch> pendingBranches = cgContext.GetPendingBranches();
|
||||
|
||||
foreach (PendingBranch pendingBranch in pendingBranches)
|
||||
{
|
||||
RewriteBranchInstructionWithTarget(context, pendingBranch, targets);
|
||||
}
|
||||
|
||||
tailMerger.WriteReturn(writer, context.WriteEpilogueWithoutContext);
|
||||
|
||||
context.WritePrologueAt(0);
|
||||
|
||||
return new(writer.AsByteSpan(), (int)(pc - address));
|
||||
}
|
||||
|
||||
private static int CalculateStackSizeForCallSpill(uint gprUseMask, uint fpSimdUseMask, uint pStateUseMask)
|
||||
{
|
||||
// Note that we don't discard callee saved FP/SIMD register because only the lower 64 bits is callee saved,
|
||||
// so if the function is using the full register, that won't be enough.
|
||||
// We could do better, but it's likely not worth it since this case happens very rarely in practice.
|
||||
|
||||
return BitOperations.PopCount(gprUseMask & ~AbiConstants.GprCalleeSavedRegsMask) * 8 +
|
||||
BitOperations.PopCount(fpSimdUseMask) * 16 +
|
||||
(pStateUseMask != 0 ? 8 : 0);
|
||||
}
|
||||
|
||||
private static void SetConditionalStart(
|
||||
CodeGenContext context,
|
||||
ref ArmCondition condition,
|
||||
ref int instructionPointer,
|
||||
InstName name,
|
||||
InstFlags flags,
|
||||
uint encoding)
|
||||
{
|
||||
if (!context.ConsumeItCondition(out ArmCondition currentCond))
|
||||
{
|
||||
currentCond = GetCondition(name, flags, encoding, context.IsThumb);
|
||||
}
|
||||
|
||||
if (currentCond != condition)
|
||||
{
|
||||
WriteConditionalEnd(context, condition, instructionPointer);
|
||||
|
||||
condition = currentCond;
|
||||
|
||||
if (currentCond != ArmCondition.Al)
|
||||
{
|
||||
instructionPointer = context.CodeWriter.InstructionPointer;
|
||||
context.Arm64Assembler.B(currentCond.Invert(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsConditionalBranch(in InstInfo instInfo)
|
||||
{
|
||||
return instInfo.Name == InstName.B && (ArmCondition)(instInfo.Encoding >> 28) != ArmCondition.Al;
|
||||
}
|
||||
|
||||
private static ArmCondition GetCondition(in InstInfo instInfo, bool isThumb)
|
||||
{
|
||||
return GetCondition(instInfo.Name, instInfo.Flags, instInfo.Encoding, isThumb);
|
||||
}
|
||||
|
||||
private static ArmCondition GetCondition(InstName name, InstFlags flags, uint encoding, bool isThumb)
|
||||
{
|
||||
// For branch, we handle conditional execution on the instruction itself.
|
||||
bool hasCond = flags.HasFlag(InstFlags.Cond) && !CanHandleConditionalInstruction(name, encoding, isThumb);
|
||||
|
||||
return hasCond ? (ArmCondition)(encoding >> 28) : ArmCondition.Al;
|
||||
}
|
||||
|
||||
private static bool CanHandleConditionalInstruction(InstName name, uint encoding, bool isThumb)
|
||||
{
|
||||
if (name == InstName.B)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// We can use CSEL for conditional MOV from registers, as long the instruction is not setting flags.
|
||||
// We don't handle thumb right now because the condition comes from the IT block which would be more complicated to handle.
|
||||
if (name == InstName.MovR && !isThumb && (encoding & (1u << 20)) == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void ForceConditionalEnd(CodeGenContext context, ref ArmCondition condition, int instructionPointer)
|
||||
{
|
||||
WriteConditionalEnd(context, condition, instructionPointer);
|
||||
|
||||
condition = ArmCondition.Al;
|
||||
}
|
||||
|
||||
private static void WriteConditionalEnd(CodeGenContext context, ArmCondition condition, int instructionPointer)
|
||||
{
|
||||
if (condition != ArmCondition.Al)
|
||||
{
|
||||
int delta = context.CodeWriter.InstructionPointer - instructionPointer;
|
||||
uint branchInst = context.CodeWriter.ReadInstructionAt(instructionPointer) | (((uint)delta & 0x7ffff) << 5);
|
||||
Debug.Assert((int)((branchInst & ~0x1fu) << 8) >> 11 == delta * 4);
|
||||
|
||||
context.CodeWriter.WriteInstructionAt(instructionPointer, branchInst);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RewriteBranchInstructionWithTarget(in Context context, in PendingBranch pendingBranch, Dictionary<ulong, int> targets)
|
||||
{
|
||||
switch (pendingBranch.BranchType)
|
||||
{
|
||||
case BranchType.Branch:
|
||||
RewriteBranchInstructionWithTarget(context, pendingBranch.Name, pendingBranch.TargetAddress, pendingBranch.WriterPointer, targets);
|
||||
break;
|
||||
case BranchType.Call:
|
||||
RewriteCallInstructionWithTarget(context, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
|
||||
break;
|
||||
case BranchType.IndirectBranch:
|
||||
RewriteIndirectBranchInstructionWithTarget(context, pendingBranch.Name, pendingBranch.TargetAddress, pendingBranch.WriterPointer);
|
||||
break;
|
||||
case BranchType.TableBranchByte:
|
||||
case BranchType.TableBranchHalfword:
|
||||
RewriteTableBranchInstructionWithTarget(
|
||||
context,
|
||||
pendingBranch.BranchType == BranchType.TableBranchHalfword,
|
||||
pendingBranch.TargetAddress,
|
||||
pendingBranch.NextAddress,
|
||||
pendingBranch.WriterPointer);
|
||||
break;
|
||||
case BranchType.IndirectCall:
|
||||
RewriteIndirectCallInstructionWithTarget(context, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
|
||||
break;
|
||||
case BranchType.SyncPoint:
|
||||
case BranchType.SoftwareInterrupt:
|
||||
case BranchType.ReadCntpct:
|
||||
RewriteHostCall(context, pendingBranch.Name, pendingBranch.BranchType, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
|
||||
break;
|
||||
default:
|
||||
Debug.Fail($"Invalid branch type '{pendingBranch.BranchType}'");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void RewriteBranchInstructionWithTarget(in Context context, InstName name, uint targetAddress, int branchIndex, Dictionary<ulong, int> targets)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
int delta;
|
||||
int targetIndex;
|
||||
uint encoding = writer.ReadInstructionAt(branchIndex);
|
||||
|
||||
if (encoding == 0x14000000)
|
||||
{
|
||||
// Unconditional branch.
|
||||
|
||||
if (targets.TryGetValue(targetAddress, out targetIndex))
|
||||
{
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
if (delta >= -Encodable26BitsOffsetLimit && delta < Encodable26BitsOffsetLimit)
|
||||
{
|
||||
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
targetIndex = writer.InstructionPointer;
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
|
||||
WriteTailCallConstant(context, ref asm, targetAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Conditional branch.
|
||||
|
||||
uint branchMask = 0x7ffff;
|
||||
int branchMax = (int)(branchMask + 1) / 2;
|
||||
|
||||
if (targets.TryGetValue(targetAddress, out targetIndex))
|
||||
{
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
if (delta >= -branchMax && delta < branchMax)
|
||||
{
|
||||
writer.WriteInstructionAt(branchIndex, encoding | (uint)((delta & branchMask) << 5));
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
targetIndex = writer.InstructionPointer;
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
if (delta >= -branchMax && delta < branchMax)
|
||||
{
|
||||
writer.WriteInstructionAt(branchIndex, encoding | (uint)((delta & branchMask) << 5));
|
||||
WriteTailCallConstant(context, ref asm, targetAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the branch target is too far away, we use a regular unconditional branch
|
||||
// instruction instead which has a much higher range.
|
||||
// We branch directly to the end of the function, where we put the conditional branch,
|
||||
// and then branch back to the next instruction or return the branch target depending
|
||||
// on the branch being taken or not.
|
||||
|
||||
uint branchInst = 0x14000000u | ((uint)delta & 0x3ffffff);
|
||||
Debug.Assert((int)(branchInst << 6) >> 4 == delta * 4);
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, branchInst);
|
||||
|
||||
int movedBranchIndex = writer.InstructionPointer;
|
||||
|
||||
writer.WriteInstruction(0u); // Placeholder
|
||||
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
|
||||
|
||||
delta = writer.InstructionPointer - movedBranchIndex;
|
||||
|
||||
writer.WriteInstructionAt(movedBranchIndex, encoding | (uint)((delta & branchMask) << 5));
|
||||
WriteTailCallConstant(context, ref asm, targetAddress);
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(name == InstName.B || name == InstName.Cbnz, $"Unknown branch instruction \"{name}\".");
|
||||
}
|
||||
|
||||
private static void RewriteCallInstructionWithTarget(in Context context, uint targetAddress, uint nextAddress, int branchIndex)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteBranchToCurrentPosition(context, branchIndex);
|
||||
|
||||
asm.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.LrRegister), nextAddress);
|
||||
|
||||
context.StoreToContext();
|
||||
InstEmitFlow.WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
context.GetReservedStackOffset(),
|
||||
nextAddress,
|
||||
InstEmitCommon.Const((int)targetAddress));
|
||||
context.LoadFromContext();
|
||||
|
||||
// Branch back to the next instruction (after the call).
|
||||
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
|
||||
}
|
||||
|
||||
private static void RewriteIndirectBranchInstructionWithTarget(in Context context, InstName name, uint targetRegister, int branchIndex)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteBranchToCurrentPosition(context, branchIndex);
|
||||
|
||||
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
asm.And(target.Operand, context.RegisterAllocator.RemapGprRegister((int)targetRegister), InstEmitCommon.Const(~1));
|
||||
|
||||
context.StoreToContext();
|
||||
|
||||
if ((name == InstName.Bx && targetRegister == RegisterUtils.LrRegister) ||
|
||||
name == InstName.Ldm ||
|
||||
name == InstName.Ldmda ||
|
||||
name == InstName.Ldmdb ||
|
||||
name == InstName.Ldmib)
|
||||
{
|
||||
// Arm32 does not have a return instruction, instead returns are implemented
|
||||
// either using BX LR (for leaf functions), or POP { ... PC }.
|
||||
|
||||
asm.Mov(Register(0), target.Operand);
|
||||
|
||||
context.TailMerger.AddUnconditionalReturn(writer, asm);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitFlow.WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
context.GetReservedStackOffset(),
|
||||
0u,
|
||||
target.Operand,
|
||||
isTail: true);
|
||||
}
|
||||
}
|
||||
|
||||
private static void RewriteTableBranchInstructionWithTarget(in Context context, bool halfword, uint rn, uint rm, int branchIndex)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteBranchToCurrentPosition(context, branchIndex);
|
||||
|
||||
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
asm.Add(
|
||||
target.Operand,
|
||||
context.RegisterAllocator.RemapGprRegister((int)rn),
|
||||
context.RegisterAllocator.RemapGprRegister((int)rm),
|
||||
ArmShiftType.Lsl,
|
||||
halfword ? 1 : 0);
|
||||
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, asm, target.Operand, target.Operand);
|
||||
|
||||
if (halfword)
|
||||
{
|
||||
asm.LdrhRiUn(target.Operand, target.Operand, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrbRiUn(target.Operand, target.Operand, 0);
|
||||
}
|
||||
|
||||
asm.Add(target.Operand, context.RegisterAllocator.RemapGprRegister(RegisterUtils.PcRegister), target.Operand, ArmShiftType.Lsl, 1);
|
||||
|
||||
context.StoreToContext();
|
||||
|
||||
InstEmitFlow.WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
context.GetReservedStackOffset(),
|
||||
0u,
|
||||
target.Operand,
|
||||
isTail: true);
|
||||
}
|
||||
|
||||
private static void RewriteIndirectCallInstructionWithTarget(in Context context, uint targetRegister, uint nextAddress, int branchIndex)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteBranchToCurrentPosition(context, branchIndex);
|
||||
|
||||
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
asm.And(target.Operand, context.RegisterAllocator.RemapGprRegister((int)targetRegister), InstEmitCommon.Const(~1));
|
||||
asm.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.LrRegister), nextAddress);
|
||||
|
||||
context.StoreToContext();
|
||||
InstEmitFlow.WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
context.GetReservedStackOffset(),
|
||||
nextAddress & ~1u,
|
||||
target.Operand);
|
||||
context.LoadFromContext();
|
||||
|
||||
// Branch back to the next instruction (after the call).
|
||||
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
|
||||
}
|
||||
|
||||
private static void RewriteHostCall(in Context context, InstName name, BranchType type, uint imm, uint pc, int branchIndex)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
uint encoding = writer.ReadInstructionAt(branchIndex);
|
||||
int targetIndex = writer.InstructionPointer;
|
||||
int delta = targetIndex - branchIndex;
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case BranchType.SyncPoint:
|
||||
InstEmitSystem.WriteSyncPoint(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset());
|
||||
break;
|
||||
case BranchType.SoftwareInterrupt:
|
||||
context.StoreToContext();
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Bkpt:
|
||||
InstEmitSystem.WriteBkpt(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
|
||||
break;
|
||||
case InstName.Svc:
|
||||
InstEmitSystem.WriteSvc(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
|
||||
break;
|
||||
case InstName.Udf:
|
||||
InstEmitSystem.WriteUdf(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
|
||||
break;
|
||||
}
|
||||
context.LoadFromContext();
|
||||
break;
|
||||
case BranchType.ReadCntpct:
|
||||
InstEmitSystem.WriteReadCntpct(context.Writer, context.RegisterAllocator, context.GetReservedStackOffset(), (int)imm, (int)pc);
|
||||
break;
|
||||
default:
|
||||
Debug.Fail($"Invalid branch type '{type}'");
|
||||
break;
|
||||
}
|
||||
|
||||
// Branch back to the next instruction.
|
||||
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
|
||||
}
|
||||
|
||||
private static void WriteBranchToCurrentPosition(in Context context, int branchIndex)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
|
||||
int targetIndex = writer.InstructionPointer;
|
||||
|
||||
if (branchIndex + 1 == targetIndex)
|
||||
{
|
||||
writer.RemoveLastInstruction();
|
||||
}
|
||||
else
|
||||
{
|
||||
uint encoding = writer.ReadInstructionAt(branchIndex);
|
||||
int delta = targetIndex - branchIndex;
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteTailCallConstant(in Context context, ref Assembler asm, uint address)
|
||||
{
|
||||
context.StoreToContext();
|
||||
InstEmitFlow.WriteCallWithGuestAddress(
|
||||
context.Writer,
|
||||
ref asm,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
context.GetReservedStackOffset(),
|
||||
0u,
|
||||
InstEmitCommon.Const((int)address),
|
||||
isTail: true);
|
||||
}
|
||||
|
||||
private static Operand Register(int register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand(register, RegisterType.Integer, type);
|
||||
}
|
||||
|
||||
public static void PrintStats()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
8502
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmit.cs
Normal file
8502
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmit.cs
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,87 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitAbsDiff
|
||||
{
|
||||
public static void Usad8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
for (int b = 0; b < 4; b++)
|
||||
{
|
||||
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
|
||||
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
|
||||
|
||||
Operand dest = b == 0 ? tempD.Operand : tempD2.Operand;
|
||||
|
||||
context.Arm64Assembler.Sub(dest, tempN.Operand, tempM.Operand);
|
||||
|
||||
EmitAbs(context, dest);
|
||||
|
||||
if (b > 0)
|
||||
{
|
||||
if (b < 3)
|
||||
{
|
||||
context.Arm64Assembler.Add(tempD.Operand, tempD.Operand, dest);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(rdOperand, tempD.Operand, dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void Usada8(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
for (int b = 0; b < 4; b++)
|
||||
{
|
||||
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
|
||||
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
|
||||
|
||||
Operand dest = b == 0 ? tempD.Operand : tempD2.Operand;
|
||||
|
||||
context.Arm64Assembler.Sub(dest, tempN.Operand, tempM.Operand);
|
||||
|
||||
EmitAbs(context, dest);
|
||||
|
||||
if (b > 0)
|
||||
{
|
||||
context.Arm64Assembler.Add(tempD.Operand, tempD.Operand, dest);
|
||||
}
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Add(rdOperand, tempD.Operand, raOperand);
|
||||
}
|
||||
|
||||
private static void EmitAbs(CodeGenContext context, Operand value)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
// r = (value + ((int)value >> 31)) ^ ((int)value >> 31).
|
||||
// Subtracts 1 and then inverts the value if the sign bit is set, same as a conditional negation.
|
||||
|
||||
context.Arm64Assembler.Add(tempRegister.Operand, value, value, ArmShiftType.Asr, 31);
|
||||
context.Arm64Assembler.Eor(value, tempRegister.Operand, value, ArmShiftType.Asr, 31);
|
||||
}
|
||||
}
|
||||
}
|
1105
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitAlu.cs
Normal file
1105
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitAlu.cs
Normal file
File diff suppressed because it is too large
Load Diff
103
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitBit.cs
Normal file
103
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitBit.cs
Normal file
@ -0,0 +1,103 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitBit
|
||||
{
|
||||
public static void Bfc(CodeGenContext context, uint rd, uint lsb, uint msb)
|
||||
{
|
||||
// This is documented as "unpredictable".
|
||||
if (msb < lsb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
|
||||
context.Arm64Assembler.Bfc(rdOperand, (int)lsb, (int)(msb - lsb + 1));
|
||||
}
|
||||
|
||||
public static void Bfi(CodeGenContext context, uint rd, uint rn, uint lsb, uint msb)
|
||||
{
|
||||
// This is documented as "unpredictable".
|
||||
if (msb < lsb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
context.Arm64Assembler.Bfi(rdOperand, rnOperand, (int)lsb, (int)(msb - lsb + 1));
|
||||
}
|
||||
|
||||
public static void Clz(CodeGenContext context, uint rd, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Clz(rdOperand, rmOperand);
|
||||
}
|
||||
|
||||
public static void Rbit(CodeGenContext context, uint rd, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Rbit(rdOperand, rmOperand);
|
||||
}
|
||||
|
||||
public static void Rev(CodeGenContext context, uint rd, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Rev(rdOperand, rmOperand);
|
||||
}
|
||||
|
||||
public static void Rev16(CodeGenContext context, uint rd, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Rev16(rdOperand, rmOperand);
|
||||
}
|
||||
|
||||
public static void Revsh(CodeGenContext context, uint rd, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Rev16(rdOperand, rmOperand);
|
||||
context.Arm64Assembler.Sxth(rdOperand, rdOperand);
|
||||
}
|
||||
|
||||
public static void Sbfx(CodeGenContext context, uint rd, uint rn, uint lsb, uint widthMinus1)
|
||||
{
|
||||
// This is documented as "unpredictable".
|
||||
if (lsb + widthMinus1 > 31)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
context.Arm64Assembler.Sbfx(rdOperand, rnOperand, (int)lsb, (int)widthMinus1 + 1);
|
||||
}
|
||||
|
||||
public static void Ubfx(CodeGenContext context, uint rd, uint rn, uint lsb, uint widthMinus1)
|
||||
{
|
||||
// This is documented as "unpredictable".
|
||||
if (lsb + widthMinus1 > 31)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
context.Arm64Assembler.Ubfx(rdOperand, rnOperand, (int)lsb, (int)widthMinus1 + 1);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,263 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitCommon
|
||||
{
|
||||
public static Operand Const(int value)
|
||||
{
|
||||
return new(OperandKind.Constant, OperandType.I32, (uint)value);
|
||||
}
|
||||
|
||||
public static Operand GetInputGpr(CodeGenContext context, uint register)
|
||||
{
|
||||
Operand operand = context.RegisterAllocator.RemapGprRegister((int)register);
|
||||
|
||||
if (register == RegisterUtils.PcRegister)
|
||||
{
|
||||
context.Arm64Assembler.Mov(operand, context.Pc);
|
||||
}
|
||||
|
||||
return operand;
|
||||
}
|
||||
|
||||
public static Operand GetOutputGpr(CodeGenContext context, uint register)
|
||||
{
|
||||
return context.RegisterAllocator.RemapGprRegister((int)register);
|
||||
}
|
||||
|
||||
public static void GetCurrentFlags(CodeGenContext context, Operand flagsOut)
|
||||
{
|
||||
context.Arm64Assembler.MrsNzcv(flagsOut);
|
||||
context.Arm64Assembler.Lsr(flagsOut, flagsOut, Const(28));
|
||||
}
|
||||
|
||||
public static void RestoreNzcvFlags(CodeGenContext context, Operand nzcvFlags)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Lsl(tempRegister.Operand, nzcvFlags, Const(28));
|
||||
context.Arm64Assembler.MsrNzcv(tempRegister.Operand);
|
||||
}
|
||||
|
||||
public static void RestoreCvFlags(CodeGenContext context, Operand cvFlags)
|
||||
{
|
||||
// Arm64 zeros the carry and overflow flags for logical operations, but Arm32 keeps them unchanged.
|
||||
// This will restore carry and overflow after a operation has zeroed them.
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.MrsNzcv(tempRegister.Operand);
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, cvFlags, 28, 2);
|
||||
context.Arm64Assembler.MsrNzcv(tempRegister.Operand);
|
||||
}
|
||||
|
||||
public static void SetThumbFlag(CodeGenContext context)
|
||||
{
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, Const(1 << 5));
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public static void SetThumbFlag(CodeGenContext context, Operand value)
|
||||
{
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, value, 5, 1);
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public static void ClearThumbFlag(CodeGenContext context)
|
||||
{
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
context.Arm64Assembler.Bfc(tempRegister.Operand, 5, 1);
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public static void EmitSigned16BitPair(CodeGenContext context, uint rd, uint rn, Action<Operand, Operand> elementAction)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = GetOutputGpr(context, rd);
|
||||
Operand rnOperand = GetInputGpr(context, rn);
|
||||
|
||||
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
|
||||
elementAction(tempD.Operand, tempN.Operand);
|
||||
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
|
||||
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
|
||||
elementAction(tempD.Operand, tempN.Operand);
|
||||
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
|
||||
}
|
||||
|
||||
public static void EmitSigned16BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = GetOutputGpr(context, rd);
|
||||
Operand rnOperand = GetInputGpr(context, rn);
|
||||
Operand rmOperand = GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
|
||||
context.Arm64Assembler.Sxth(tempM.Operand, rmOperand);
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
|
||||
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
|
||||
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
|
||||
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, Const(16));
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
|
||||
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
|
||||
}
|
||||
|
||||
public static void EmitSigned16BitXPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand, int> elementAction)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = GetOutputGpr(context, rd);
|
||||
Operand rnOperand = GetInputGpr(context, rn);
|
||||
Operand rmOperand = GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
|
||||
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, Const(16));
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 0);
|
||||
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
|
||||
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
|
||||
context.Arm64Assembler.Sxth(tempM.Operand, rmOperand);
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 1);
|
||||
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
|
||||
}
|
||||
|
||||
public static void EmitSigned8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
Emit8BitPair(context, rd, rn, rm, elementAction, unsigned: false);
|
||||
}
|
||||
|
||||
public static void EmitUnsigned16BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = GetOutputGpr(context, rd);
|
||||
Operand rnOperand = GetInputGpr(context, rn);
|
||||
Operand rmOperand = GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Uxth(tempN.Operand, rnOperand);
|
||||
context.Arm64Assembler.Uxth(tempM.Operand, rmOperand);
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
|
||||
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
|
||||
|
||||
context.Arm64Assembler.Lsr(tempN.Operand, rnOperand, Const(16));
|
||||
context.Arm64Assembler.Lsr(tempM.Operand, rmOperand, Const(16));
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
|
||||
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
|
||||
}
|
||||
|
||||
public static void EmitUnsigned16BitXPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand, int> elementAction)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = GetOutputGpr(context, rd);
|
||||
Operand rnOperand = GetInputGpr(context, rn);
|
||||
Operand rmOperand = GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Uxth(tempN.Operand, rnOperand);
|
||||
context.Arm64Assembler.Lsr(tempM.Operand, rmOperand, Const(16));
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 0);
|
||||
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
|
||||
|
||||
context.Arm64Assembler.Lsr(tempN.Operand, rnOperand, Const(16));
|
||||
context.Arm64Assembler.Uxth(tempM.Operand, rmOperand);
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 1);
|
||||
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
|
||||
}
|
||||
|
||||
public static void EmitUnsigned8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
Emit8BitPair(context, rd, rn, rm, elementAction, unsigned: true);
|
||||
}
|
||||
|
||||
private static void Emit8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction, bool unsigned)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = GetOutputGpr(context, rd);
|
||||
Operand rnOperand = GetInputGpr(context, rn);
|
||||
Operand rmOperand = GetInputGpr(context, rm);
|
||||
|
||||
for (int b = 0; b < 4; b++)
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
|
||||
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sbfx(tempN.Operand, rnOperand, b * 8, 8);
|
||||
context.Arm64Assembler.Sbfx(tempM.Operand, rmOperand, b * 8, 8);
|
||||
}
|
||||
|
||||
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
|
||||
|
||||
if (b == 0)
|
||||
{
|
||||
context.Arm64Assembler.Uxtb(tempD2.Operand, tempD.Operand);
|
||||
}
|
||||
else if (b < 3)
|
||||
{
|
||||
context.Arm64Assembler.Uxtb(tempD.Operand, tempD.Operand);
|
||||
context.Arm64Assembler.Orr(tempD2.Operand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, b * 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static uint CombineV(uint low4, uint high1, uint size)
|
||||
{
|
||||
return size == 3 ? CombineV(low4, high1) : CombineVF(high1, low4);
|
||||
}
|
||||
|
||||
public static uint CombineV(uint low4, uint high1)
|
||||
{
|
||||
return low4 | (high1 << 4);
|
||||
}
|
||||
|
||||
public static uint CombineVF(uint low1, uint high4)
|
||||
{
|
||||
return low1 | (high4 << 1);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitCrc32
|
||||
{
|
||||
public static void Crc32(CodeGenContext context, uint rd, uint rn, uint rm, uint sz)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Crc32(rdOperand, rnOperand, rmOperand, Math.Min(2, sz));
|
||||
}
|
||||
|
||||
public static void Crc32c(CodeGenContext context, uint rd, uint rn, uint rm, uint sz)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Crc32c(rdOperand, rnOperand, rmOperand, Math.Min(2, sz));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitDivide
|
||||
{
|
||||
public static void Sdiv(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Sdiv(rdOperand, rnOperand, rmOperand);
|
||||
}
|
||||
|
||||
public static void Udiv(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Udiv(rdOperand, rnOperand, rmOperand);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,191 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitExtension
|
||||
{
|
||||
public static void Sxtab(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, ArmExtensionType.Sxtb, rd, rn, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Sxtab16(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
EmitExtendAccumulate8(context, rd, rn, rm, rotate, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Sxtah(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, ArmExtensionType.Sxth, rd, rn, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Sxtb(CodeGenContext context, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, context.Arm64Assembler.Sxtb, rd, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Sxtb16(CodeGenContext context, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
if (rotate != 0)
|
||||
{
|
||||
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
|
||||
context.Arm64Assembler.And(rdOperand, tempRegister.Operand, InstEmitCommon.Const(0xff00ff));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.And(rdOperand, rmOperand, InstEmitCommon.Const(0xff00ff));
|
||||
}
|
||||
|
||||
// Sign-extend by broadcasting sign bits.
|
||||
context.Arm64Assembler.And(tempRegister.Operand, rdOperand, InstEmitCommon.Const(0x800080));
|
||||
context.Arm64Assembler.Lsl(tempRegister2.Operand, tempRegister.Operand, InstEmitCommon.Const(9));
|
||||
context.Arm64Assembler.Sub(tempRegister.Operand, tempRegister2.Operand, tempRegister.Operand);
|
||||
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
|
||||
}
|
||||
|
||||
public static void Sxth(CodeGenContext context, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, context.Arm64Assembler.Sxth, rd, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Uxtab(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, ArmExtensionType.Uxtb, rd, rn, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Uxtab16(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
EmitExtendAccumulate8(context, rd, rn, rm, rotate, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uxtah(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, ArmExtensionType.Uxth, rd, rn, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Uxtb(CodeGenContext context, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, context.Arm64Assembler.Uxtb, rd, rm, rotate);
|
||||
}
|
||||
|
||||
public static void Uxtb16(CodeGenContext context, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (rotate != 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
|
||||
context.Arm64Assembler.And(rdOperand, tempRegister.Operand, InstEmitCommon.Const(0xff00ff));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.And(rdOperand, rmOperand, InstEmitCommon.Const(0xff00ff));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Uxth(CodeGenContext context, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
EmitRotated(context, context.Arm64Assembler.Uxth, rd, rm, rotate);
|
||||
}
|
||||
|
||||
private static void EmitRotated(CodeGenContext context, Action<Operand, Operand> action, uint rd, uint rm, uint rotate)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (rotate != 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
|
||||
action(rdOperand, tempRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
action(rdOperand, rmOperand);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitRotated(CodeGenContext context, ArmExtensionType extensionType, uint rd, uint rn, uint rm, uint rotate)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (rotate != 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
|
||||
context.Arm64Assembler.Add(rdOperand, rnOperand, tempRegister.Operand, extensionType);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(rdOperand, rnOperand, rmOperand, extensionType);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitExtendAccumulate8(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate, bool unsigned)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (rotate != 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
|
||||
|
||||
EmitExtendAccumulate8Core(context, rdOperand, rnOperand, tempRegister.Operand, unsigned);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitExtendAccumulate8Core(context, rdOperand, rnOperand, rmOperand, unsigned);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitExtendAccumulate8Core(CodeGenContext context, Operand rd, Operand rn, Operand rm, bool unsigned)
|
||||
{
|
||||
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
context.Arm64Assembler.Uxth(tempN.Operand, rn);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sxth(tempN.Operand, rn);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Add(tempD.Operand, tempN.Operand, rm, unsigned ? ArmExtensionType.Uxtb : ArmExtensionType.Sxtb);
|
||||
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(tempN.Operand, rn, InstEmitCommon.Const(16));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rn, InstEmitCommon.Const(16));
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(tempD.Operand, rm, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Add(tempD.Operand, tempN.Operand, tempD.Operand, unsigned ? ArmExtensionType.Uxtb : ArmExtensionType.Sxtb);
|
||||
context.Arm64Assembler.Orr(rd, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
|
||||
}
|
||||
}
|
||||
}
|
256
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs
Normal file
256
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs
Normal file
@ -0,0 +1,256 @@
|
||||
using ARMeilleure.Common;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitFlow
|
||||
{
|
||||
private const int SpIndex = 31;
|
||||
|
||||
public static void B(CodeGenContext context, int imm, ArmCondition condition)
|
||||
{
|
||||
context.AddPendingBranch(InstName.B, imm);
|
||||
|
||||
if (condition == ArmCondition.Al)
|
||||
{
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.B(condition, 0);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Bl(CodeGenContext context, int imm, bool sourceIsThumb, bool targetIsThumb)
|
||||
{
|
||||
uint nextAddress = sourceIsThumb ? context.Pc | 1u : context.Pc - 4;
|
||||
uint targetAddress = targetIsThumb ? context.Pc + (uint)imm : (context.Pc & ~3u) + (uint)imm;
|
||||
|
||||
if (sourceIsThumb != targetIsThumb)
|
||||
{
|
||||
if (targetIsThumb)
|
||||
{
|
||||
InstEmitCommon.SetThumbFlag(context);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitCommon.ClearThumbFlag(context);
|
||||
}
|
||||
}
|
||||
|
||||
context.AddPendingCall(targetAddress, nextAddress);
|
||||
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public static void Blx(CodeGenContext context, uint rm, bool sourceIsThumb)
|
||||
{
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
InstEmitCommon.SetThumbFlag(context, rmOperand);
|
||||
|
||||
uint nextAddress = sourceIsThumb ? (context.Pc - 2) | 1u : context.Pc - 4;
|
||||
|
||||
context.AddPendingIndirectCall(rm, nextAddress);
|
||||
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public static void Bx(CodeGenContext context, uint rm)
|
||||
{
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
InstEmitCommon.SetThumbFlag(context, rmOperand);
|
||||
|
||||
context.AddPendingIndirectBranch(InstName.Bx, rm);
|
||||
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public static void Cbnz(CodeGenContext context, uint rn, int imm, bool op)
|
||||
{
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
context.AddPendingBranch(InstName.Cbnz, imm);
|
||||
|
||||
if (op)
|
||||
{
|
||||
context.Arm64Assembler.Cbnz(rnOperand, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Cbz(rnOperand, 0);
|
||||
}
|
||||
}
|
||||
|
||||
public static void It(CodeGenContext context, uint firstCond, uint mask)
|
||||
{
|
||||
Debug.Assert(mask != 0);
|
||||
|
||||
int instCount = 4 - BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
Span<ArmCondition> conditions = stackalloc ArmCondition[instCount];
|
||||
|
||||
int i = 0;
|
||||
|
||||
for (int index = 5 - instCount; index < 4; index++)
|
||||
{
|
||||
bool invert = (mask & (1u << index)) != 0;
|
||||
|
||||
if (invert)
|
||||
{
|
||||
conditions[i++] = ((ArmCondition)firstCond).Invert();
|
||||
}
|
||||
else
|
||||
{
|
||||
conditions[i++] = (ArmCondition)firstCond;
|
||||
}
|
||||
}
|
||||
|
||||
conditions[i] = (ArmCondition)firstCond;
|
||||
|
||||
context.SetItBlockStart(conditions);
|
||||
}
|
||||
|
||||
public static void Tbb(CodeGenContext context, uint rn, uint rm, bool h)
|
||||
{
|
||||
context.Arm64Assembler.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.PcRegister), context.Pc);
|
||||
|
||||
context.AddPendingTableBranch(rn, rm, h);
|
||||
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public unsafe static void WriteCallWithGuestAddress(
|
||||
CodeWriter writer,
|
||||
ref Assembler asm,
|
||||
RegisterAllocator regAlloc,
|
||||
TailMerger tailMerger,
|
||||
Action writeEpilogue,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr funcPtr,
|
||||
int spillBaseOffset,
|
||||
uint nextAddress,
|
||||
Operand guestAddress,
|
||||
bool isTail = false)
|
||||
{
|
||||
int tempRegister;
|
||||
|
||||
if (guestAddress.Kind == OperandKind.Constant)
|
||||
{
|
||||
tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
|
||||
asm.Mov(Register(tempRegister), guestAddress.Value);
|
||||
asm.StrRiUn(Register(tempRegister), Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
|
||||
}
|
||||
|
||||
tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1;
|
||||
|
||||
if (!isTail)
|
||||
{
|
||||
WriteSpillSkipContext(ref asm, regAlloc, spillBaseOffset);
|
||||
}
|
||||
|
||||
Operand rn = Register(tempRegister);
|
||||
|
||||
if (regAlloc.FixedContextRegister != 0)
|
||||
{
|
||||
asm.Mov(Register(0), Register(regAlloc.FixedContextRegister));
|
||||
}
|
||||
|
||||
if (guestAddress.Kind == OperandKind.Constant && funcTable != null)
|
||||
{
|
||||
ulong funcPtrLoc = (ulong)Unsafe.AsPointer(ref funcTable.GetValue(guestAddress.Value));
|
||||
|
||||
asm.Mov(rn, funcPtrLoc & ~0xfffUL);
|
||||
asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL));
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Mov(rn, (ulong)funcPtr);
|
||||
}
|
||||
|
||||
if (isTail)
|
||||
{
|
||||
writeEpilogue();
|
||||
asm.Br(rn);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Blr(rn);
|
||||
|
||||
asm.Mov(rn, nextAddress);
|
||||
asm.Cmp(Register(0), rn);
|
||||
|
||||
tailMerger.AddConditionalReturn(writer, asm, ArmCondition.Ne);
|
||||
|
||||
WriteFillSkipContext(ref asm, regAlloc, spillBaseOffset);
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteSpillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
|
||||
{
|
||||
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: true);
|
||||
}
|
||||
|
||||
private static void WriteFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
|
||||
{
|
||||
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: false);
|
||||
}
|
||||
|
||||
private static void WriteSpillOrFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset, bool spill)
|
||||
{
|
||||
uint gprMask = regAlloc.UsedGprsMask & ((1u << regAlloc.FixedContextRegister) | (1u << regAlloc.FixedPageTableRegister));
|
||||
|
||||
while (gprMask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(gprMask);
|
||||
|
||||
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(3u << reg);
|
||||
spillOffset += 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StrRiUn(Register(reg), Register(SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(reg), Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(1u << reg);
|
||||
spillOffset += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand Register(int register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand(register, RegisterType.Integer, type);
|
||||
}
|
||||
}
|
||||
}
|
265
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitGE.cs
Normal file
265
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitGE.cs
Normal file
@ -0,0 +1,265 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitGE
|
||||
{
|
||||
public static void Sadd16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: true, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Sadd8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: true, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Sasx(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAsxSax(context, rd, rn, rm, isAsx: true, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Sel(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
ExtractGEFlags(context, geFlags.Operand);
|
||||
|
||||
// Broadcast compact GE flags (one bit to one byte, 0b1111 -> 0x1010101).
|
||||
context.Arm64Assembler.Mov(tempRegister.Operand, 0x204081u);
|
||||
context.Arm64Assembler.Mul(geFlags.Operand, geFlags.Operand, tempRegister.Operand);
|
||||
context.Arm64Assembler.And(geFlags.Operand, geFlags.Operand, InstEmitCommon.Const(0x1010101));
|
||||
|
||||
// Build mask from expanded flags (0x1010101 -> 0xFFFFFFFF).
|
||||
context.Arm64Assembler.Lsl(tempRegister.Operand, geFlags.Operand, InstEmitCommon.Const(8));
|
||||
context.Arm64Assembler.Sub(geFlags.Operand, tempRegister.Operand, geFlags.Operand);
|
||||
|
||||
// Result = (n & mask) | (m & ~mask).
|
||||
context.Arm64Assembler.And(tempRegister.Operand, geFlags.Operand, rnOperand);
|
||||
context.Arm64Assembler.Bic(rdOperand, rmOperand, geFlags.Operand);
|
||||
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
|
||||
}
|
||||
|
||||
public static void Ssax(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAsxSax(context, rd, rn, rm, isAsx: false, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Ssub16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: false, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Ssub8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: false, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Uadd16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: true, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uadd8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: true, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uasx(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAsxSax(context, rd, rn, rm, isAsx: true, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Usax(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAsxSax(context, rd, rn, rm, isAsx: false, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Usub16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: false, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Usub8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: false, unsigned: true);
|
||||
}
|
||||
|
||||
private static void EmitAddSub(CodeGenContext context, uint rd, uint rn, uint rm, bool is16Bit, bool add, bool unsigned)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
int e = 0;
|
||||
|
||||
void Emit(Operand d, Operand n, Operand m)
|
||||
{
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
|
||||
if (unsigned && add)
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(geFlags.Operand, d, InstEmitCommon.Const(is16Bit ? 16 : 8));
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Lsr(tempRegister.Operand, d, InstEmitCommon.Const(is16Bit ? 16 : 8));
|
||||
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Mvn(tempRegister.Operand, d);
|
||||
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(geFlags.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Lsr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
|
||||
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e);
|
||||
}
|
||||
}
|
||||
|
||||
e += is16Bit ? 2 : 1;
|
||||
}
|
||||
|
||||
if (is16Bit)
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, Emit);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, Emit);
|
||||
}
|
||||
|
||||
// Duplicate bits.
|
||||
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, geFlags.Operand, ArmShiftType.Lsl, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, Emit);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, Emit);
|
||||
}
|
||||
}
|
||||
|
||||
UpdateGEFlags(context, geFlags.Operand);
|
||||
}
|
||||
|
||||
private static void EmitAsxSax(CodeGenContext context, uint rd, uint rn, uint rm, bool isAsx, bool unsigned)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
void Emit(Operand d, Operand n, Operand m, int e)
|
||||
{
|
||||
bool add = e == (isAsx ? 1 : 0);
|
||||
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
|
||||
if (unsigned && add)
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(geFlags.Operand, d, InstEmitCommon.Const(16));
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Lsr(tempRegister.Operand, d, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e * 2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Mvn(tempRegister.Operand, d);
|
||||
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(geFlags.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Lsr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
|
||||
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e * 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, Emit);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, Emit);
|
||||
}
|
||||
|
||||
// Duplicate bits.
|
||||
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, geFlags.Operand, ArmShiftType.Lsl, 1);
|
||||
|
||||
UpdateGEFlags(context, geFlags.Operand);
|
||||
}
|
||||
|
||||
public static void UpdateGEFlags(CodeGenContext context, Operand flags)
|
||||
{
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, flags, 16, 4);
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public static void ExtractGEFlags(CodeGenContext context, Operand flags)
|
||||
{
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(flags, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
context.Arm64Assembler.Ubfx(flags, flags, 16, 4);
|
||||
}
|
||||
}
|
||||
}
|
178
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitHalve.cs
Normal file
178
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitHalve.cs
Normal file
@ -0,0 +1,178 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitHalve
|
||||
{
|
||||
public static void Shadd16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Shadd8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Shsub16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Shsub8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Shasx(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
|
||||
});
|
||||
}
|
||||
|
||||
public static void Shsax(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uhadd16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uhadd8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uhasx(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uhsax(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uhsub16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uhsub8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
|
||||
}
|
||||
|
||||
private static void EmitHadd(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister res = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||
// We mask by 0x7F/0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
||||
|
||||
context.Arm64Assembler.And(res.Operand, rmOperand, rnOperand);
|
||||
context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
|
||||
context.Arm64Assembler.Lsr(rdOperand, carry.Operand, InstEmitCommon.Const(1));
|
||||
context.Arm64Assembler.And(rdOperand, rdOperand, InstEmitCommon.Const(mask));
|
||||
context.Arm64Assembler.Add(rdOperand, rdOperand, res.Operand);
|
||||
|
||||
if (!unsigned)
|
||||
{
|
||||
// Propagates the sign bit from (x^y)>>1 upwards by one.
|
||||
context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
|
||||
context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitHsub(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister left = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister right = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||
|
||||
context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
|
||||
context.Arm64Assembler.Lsr(left.Operand, carry.Operand, InstEmitCommon.Const(1));
|
||||
context.Arm64Assembler.And(right.Operand, carry.Operand, rmOperand);
|
||||
|
||||
// We must now perform a partitioned subtraction.
|
||||
// We can do this because minuend contains 7/15 bit fields.
|
||||
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||
|
||||
context.Arm64Assembler.Orr(rdOperand, left.Operand, InstEmitCommon.Const(~mask));
|
||||
context.Arm64Assembler.Sub(rdOperand, rdOperand, right.Operand);
|
||||
context.Arm64Assembler.Eor(rdOperand, rdOperand, InstEmitCommon.Const(~mask));
|
||||
|
||||
if (!unsigned)
|
||||
{
|
||||
// We then sign extend the result into this bit.
|
||||
context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
|
||||
context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1172
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMemory.cs
Normal file
1172
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMemory.cs
Normal file
File diff suppressed because it is too large
Load Diff
350
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMove.cs
Normal file
350
src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitMove.cs
Normal file
@ -0,0 +1,350 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitMove
|
||||
{
|
||||
public static void MvnI(CodeGenContext context, uint rd, uint imm, bool immRotated, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
|
||||
if (s)
|
||||
{
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
if (immRotated)
|
||||
{
|
||||
if ((imm & (1u << 31)) != 0)
|
||||
{
|
||||
context.Arm64Assembler.Orr(flagsRegister.Operand, flagsRegister.Operand, InstEmitCommon.Const(1 << 29));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Bfc(flagsRegister.Operand, 29, 1);
|
||||
}
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, ~imm);
|
||||
context.Arm64Assembler.Tst(rdOperand, rdOperand);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Mov(rdOperand, ~imm);
|
||||
}
|
||||
}
|
||||
|
||||
public static void MvnR(CodeGenContext context, uint rd, uint rm, uint sType, uint imm5, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
ScopedRegister flagsRegister = default;
|
||||
|
||||
if (s)
|
||||
{
|
||||
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, flagsRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mvn(rdOperand, rmOperand);
|
||||
|
||||
if (s)
|
||||
{
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
flagsRegister.Dispose();
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
}
|
||||
|
||||
public static void MvnRr(CodeGenContext context, uint rd, uint rm, uint sType, uint rs, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand rsOperand = InstEmitCommon.GetInputGpr(context, rs);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
ScopedRegister flagsRegister = default;
|
||||
|
||||
if (s)
|
||||
{
|
||||
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType, flagsRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mvn(rdOperand, rmOperand);
|
||||
|
||||
if (s)
|
||||
{
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
flagsRegister.Dispose();
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
}
|
||||
|
||||
public static void MovI(CodeGenContext context, uint rd, uint imm, bool immRotated, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
|
||||
if (s)
|
||||
{
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
if (immRotated)
|
||||
{
|
||||
if ((imm & (1u << 31)) != 0)
|
||||
{
|
||||
context.Arm64Assembler.Orr(flagsRegister.Operand, flagsRegister.Operand, InstEmitCommon.Const(2));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Bfc(flagsRegister.Operand, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, imm);
|
||||
context.Arm64Assembler.Tst(rdOperand, rdOperand);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Mov(rdOperand, imm);
|
||||
}
|
||||
}
|
||||
|
||||
public static void MovR(CodeGenContext context, uint rd, uint rm, uint sType, uint imm5, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (InstEmitAlu.CanShift(sType, imm5) && !s)
|
||||
{
|
||||
if (imm5 != 0)
|
||||
{
|
||||
switch ((ArmShiftType)sType)
|
||||
{
|
||||
case ArmShiftType.Lsl:
|
||||
context.Arm64Assembler.Lsl(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
case ArmShiftType.Lsr:
|
||||
context.Arm64Assembler.Lsr(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
case ArmShiftType.Asr:
|
||||
context.Arm64Assembler.Asr(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
case ArmShiftType.Ror:
|
||||
context.Arm64Assembler.Ror(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Mov(rdOperand, rmOperand);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
ScopedRegister flagsRegister = default;
|
||||
|
||||
if (s)
|
||||
{
|
||||
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, flagsRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, null);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, rmOperand);
|
||||
|
||||
if (s)
|
||||
{
|
||||
context.Arm64Assembler.Tst(rdOperand, rdOperand);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
flagsRegister.Dispose();
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void MovR(CodeGenContext context, uint cond, uint rd, uint rm, uint sType, uint imm5, bool s)
|
||||
{
|
||||
if (context.ConsumeSkipNextInstruction())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if ((ArmCondition)cond >= ArmCondition.Al || s)
|
||||
{
|
||||
MovR(context, rd, rm, sType, imm5, s);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (InstEmitAlu.CanShift(sType, imm5))
|
||||
{
|
||||
if (imm5 != 0)
|
||||
{
|
||||
switch ((ArmShiftType)sType)
|
||||
{
|
||||
case ArmShiftType.Lsl:
|
||||
context.Arm64Assembler.Lsl(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
case ArmShiftType.Lsr:
|
||||
context.Arm64Assembler.Lsr(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
case ArmShiftType.Asr:
|
||||
context.Arm64Assembler.Asr(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
case ArmShiftType.Ror:
|
||||
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
break;
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Csel(rdOperand, tempRegister.Operand, rdOperand, (ArmCondition)cond);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand other = rdOperand;
|
||||
|
||||
InstInfo nextInstruction = context.PeekNextInstruction();
|
||||
|
||||
if (nextInstruction.Name == InstName.MovR)
|
||||
{
|
||||
// If this instruction is followed by another move with the inverse condition,
|
||||
// we can just put it into the second operand of the CSEL instruction and skip the next move.
|
||||
|
||||
InstCondb28w4Sb20w1Rdb12w4Imm5b7w5Stypeb5w2Rmb0w4 nextInst = new(nextInstruction.Encoding);
|
||||
|
||||
if (nextInst.Rd == rd &&
|
||||
nextInst.S == 0 &&
|
||||
nextInst.Stype == 0 &&
|
||||
nextInst.Imm5 == 0 &&
|
||||
nextInst.Cond == (cond ^ 1u) &&
|
||||
nextInst.Rm != RegisterUtils.PcRegister)
|
||||
{
|
||||
other = InstEmitCommon.GetInputGpr(context, nextInst.Rm);
|
||||
context.SetSkipNextInstruction();
|
||||
}
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Csel(rdOperand, rmOperand, other, (ArmCondition)cond);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, null);
|
||||
|
||||
context.Arm64Assembler.Csel(rdOperand, rmOperand, rdOperand, (ArmCondition)cond);
|
||||
}
|
||||
}
|
||||
|
||||
public static void MovRr(CodeGenContext context, uint rd, uint rm, uint sType, uint rs, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand rsOperand = InstEmitCommon.GetInputGpr(context, rs);
|
||||
|
||||
if (!s)
|
||||
{
|
||||
InstEmitAlu.GetMShiftedByReg(context, rdOperand, rmOperand, rsOperand, sType);
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType, flagsRegister.Operand);
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, rmOperand);
|
||||
context.Arm64Assembler.Tst(rdOperand, rdOperand);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
}
|
||||
|
||||
public static void Movt(CodeGenContext context, uint rd, uint imm)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetInputGpr(context, rd);
|
||||
|
||||
context.Arm64Assembler.Movk(rdOperand, (int)imm, 1);
|
||||
}
|
||||
|
||||
public static void Pkh(CodeGenContext context, uint rd, uint rn, uint rm, bool tb, uint imm5)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (!tb && imm5 == 0)
|
||||
{
|
||||
context.Arm64Assembler.Extr(rdOperand, rnOperand, rmOperand, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
if (tb)
|
||||
{
|
||||
context.Arm64Assembler.Asr(tempRegister.Operand, rmOperand, InstEmitCommon.Const(imm5 == 0 ? 31 : (int)imm5));
|
||||
context.Arm64Assembler.Extr(rdOperand, tempRegister.Operand, rnOperand, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Lsl(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
|
||||
context.Arm64Assembler.Extr(rdOperand, rnOperand, tempRegister.Operand, 16);
|
||||
}
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Ror(rdOperand, rdOperand, InstEmitCommon.Const(16));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,603 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitMultiply
|
||||
{
|
||||
public static void Mla(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
context.Arm64Assembler.Madd(rdOperand, rnOperand, rmOperand, raOperand);
|
||||
}
|
||||
|
||||
public static void Mls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
context.Arm64Assembler.Msub(rdOperand, rnOperand, rmOperand, raOperand);
|
||||
}
|
||||
|
||||
public static void Mul(CodeGenContext context, uint rd, uint rn, uint rm, bool s)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
if (s)
|
||||
{
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.Arm64Assembler.Mul(rdOperand, rnOperand, rmOperand);
|
||||
context.Arm64Assembler.Tst(rdOperand, rdOperand);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Mul(rdOperand, rnOperand, rmOperand);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Smlabb(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool nHigh, bool mHigh)
|
||||
{
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
|
||||
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
|
||||
|
||||
context.Arm64Assembler.Sxtw(tempA64, raOperand);
|
||||
context.Arm64Assembler.Smaddl(tempN.Operand, tempN.Operand, tempM.Operand, tempA64);
|
||||
|
||||
CheckResultOverflow(context, tempM64, tempN.Operand);
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
|
||||
}
|
||||
|
||||
public static void Smlad(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x)
|
||||
{
|
||||
EmitSmladSmlsd(context, rd, rn, rm, ra, x, add: true);
|
||||
}
|
||||
|
||||
public static void Smlal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
|
||||
{
|
||||
EmitMultiplyAddLong(context, context.Arm64Assembler.Smaddl, rdLo, rdHi, rn, rm, s);
|
||||
}
|
||||
|
||||
public static void Smlalbb(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool nHigh, bool mHigh)
|
||||
{
|
||||
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
|
||||
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
|
||||
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
|
||||
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
|
||||
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
|
||||
|
||||
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Lsl(tempA64, rdHiOperand64, InstEmitCommon.Const(32));
|
||||
context.Arm64Assembler.Orr(tempA64, tempA64, rdLoOperand);
|
||||
|
||||
context.Arm64Assembler.Smaddl(rdLoOperand64, tempN.Operand, tempM.Operand, tempA64);
|
||||
|
||||
if (rdLo != rdHi)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
|
||||
}
|
||||
|
||||
public static void Smlald(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x)
|
||||
{
|
||||
EmitSmlaldSmlsld(context, rdLo, rdHi, rn, rm, x, add: true);
|
||||
}
|
||||
|
||||
public static void Smlawb(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool mHigh)
|
||||
{
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
|
||||
|
||||
context.Arm64Assembler.Sxtw(tempA64, raOperand);
|
||||
context.Arm64Assembler.Lsl(tempA64, tempA64, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Smaddl(tempN.Operand, rnOperand, tempM.Operand, tempA64);
|
||||
context.Arm64Assembler.Asr(tempN64, tempN64, InstEmitCommon.Const(16));
|
||||
|
||||
CheckResultOverflow(context, tempM64, tempN.Operand);
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
|
||||
}
|
||||
|
||||
public static void Smlsd(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x)
|
||||
{
|
||||
EmitSmladSmlsd(context, rd, rn, rm, ra, x, add: false);
|
||||
}
|
||||
|
||||
public static void Smlsld(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x)
|
||||
{
|
||||
EmitSmlaldSmlsld(context, rdLo, rdHi, rn, rm, x, add: false);
|
||||
}
|
||||
|
||||
public static void Smmla(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r)
|
||||
{
|
||||
EmitSmmlaSmmls(context, rd, rn, rm, ra, r, add: true);
|
||||
}
|
||||
|
||||
public static void Smmls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r)
|
||||
{
|
||||
EmitSmmlaSmmls(context, rd, rn, rm, ra, r, add: false);
|
||||
}
|
||||
|
||||
public static void Smmul(CodeGenContext context, uint rd, uint rn, uint rm, bool r)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
|
||||
|
||||
context.Arm64Assembler.Smull(rdOperand64, rnOperand, rmOperand);
|
||||
|
||||
if (r)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Mov(tempRegister.Operand, 0x80000000u);
|
||||
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempRegister.Operand);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(rdOperand64, rdOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
public static void Smuad(CodeGenContext context, uint rd, uint rn, uint rm, bool x)
|
||||
{
|
||||
EmitSmuadSmusd(context, rd, rn, rm, x, add: true);
|
||||
}
|
||||
|
||||
public static void Smulbb(CodeGenContext context, uint rd, uint rn, uint rm, bool nHigh, bool mHigh)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
|
||||
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
|
||||
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
|
||||
|
||||
context.Arm64Assembler.Smull(rdOperand64, tempN.Operand, tempM.Operand);
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
|
||||
}
|
||||
|
||||
public static void Smull(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
|
||||
{
|
||||
EmitMultiplyLong(context, context.Arm64Assembler.Smull, rdLo, rdHi, rn, rm, s);
|
||||
}
|
||||
|
||||
public static void Smulwb(CodeGenContext context, uint rd, uint rn, uint rm, bool mHigh)
|
||||
{
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
|
||||
|
||||
context.Arm64Assembler.Smull(tempN.Operand, rnOperand, tempM.Operand);
|
||||
context.Arm64Assembler.Asr(tempN64, tempN64, InstEmitCommon.Const(16));
|
||||
|
||||
CheckResultOverflow(context, tempM64, tempN.Operand);
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
|
||||
}
|
||||
|
||||
public static void Smusd(CodeGenContext context, uint rd, uint rn, uint rm, bool x)
|
||||
{
|
||||
EmitSmuadSmusd(context, rd, rn, rm, x, add: false);
|
||||
}
|
||||
|
||||
public static void Umaal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm)
|
||||
{
|
||||
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
|
||||
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
|
||||
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
|
||||
|
||||
if (rdLo == rdHi)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempRegister64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Umaddl(tempRegister64, rnOperand, rmOperand, rdLoOperand64);
|
||||
context.Arm64Assembler.Add(rdLoOperand64, tempRegister64, rdHiOperand64);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Umaddl(rdLoOperand64, rnOperand, rmOperand, rdLoOperand64);
|
||||
context.Arm64Assembler.Add(rdLoOperand64, rdLoOperand64, rdHiOperand64);
|
||||
}
|
||||
|
||||
if (rdLo != rdHi)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
|
||||
}
|
||||
|
||||
public static void Umlal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
|
||||
{
|
||||
EmitMultiplyAddLong(context, context.Arm64Assembler.Umaddl, rdLo, rdHi, rn, rm, s);
|
||||
}
|
||||
|
||||
public static void Umull(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
|
||||
{
|
||||
EmitMultiplyLong(context, context.Arm64Assembler.Umull, rdLo, rdHi, rn, rm, s);
|
||||
}
|
||||
|
||||
private static void EmitMultiplyLong(CodeGenContext context, Action<Operand, Operand, Operand> action, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
|
||||
{
|
||||
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
|
||||
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
|
||||
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
|
||||
|
||||
if (s)
|
||||
{
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
action(rdLoOperand64, rnOperand, rmOperand);
|
||||
context.Arm64Assembler.Tst(rdLoOperand64, rdLoOperand64);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
action(rdLoOperand64, rnOperand, rmOperand);
|
||||
}
|
||||
|
||||
if (rdLo != rdHi)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
|
||||
}
|
||||
|
||||
private static void EmitMultiplyAddLong(CodeGenContext context, Action<Operand, Operand, Operand, Operand> action, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
|
||||
{
|
||||
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
|
||||
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
|
||||
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
|
||||
|
||||
using ScopedRegister raRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand raOperand64 = new(OperandKind.Register, OperandType.I64, raRegister.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Lsl(raOperand64, rdHiOperand64, InstEmitCommon.Const(32));
|
||||
context.Arm64Assembler.Orr(raOperand64, raOperand64, rdLoOperand);
|
||||
|
||||
if (s)
|
||||
{
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
action(rdLoOperand64, rnOperand, rmOperand, raOperand64);
|
||||
context.Arm64Assembler.Tst(rdLoOperand64, rdLoOperand64);
|
||||
|
||||
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
else
|
||||
{
|
||||
action(rdLoOperand64, rnOperand, rmOperand, raOperand64);
|
||||
}
|
||||
|
||||
if (rdLo != rdHi)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
|
||||
}
|
||||
|
||||
private static void EmitSmladSmlsd(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x, bool add)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
|
||||
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
|
||||
|
||||
ScopedRegister swapTemp = default;
|
||||
|
||||
if (x)
|
||||
{
|
||||
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
|
||||
|
||||
rmOperand = swapTemp.Operand;
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Sxth(tempN64, rnOperand);
|
||||
context.Arm64Assembler.Sxth(tempM64, rmOperand);
|
||||
context.Arm64Assembler.Sxtw(tempA64, raOperand);
|
||||
|
||||
context.Arm64Assembler.Mul(rdOperand64, tempN64, tempM64);
|
||||
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
|
||||
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Smaddl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Smsubl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempA64);
|
||||
|
||||
CheckResultOverflow(context, tempM64, rdOperand64);
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
|
||||
|
||||
if (x)
|
||||
{
|
||||
swapTemp.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitSmlaldSmlsld(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x, bool add)
|
||||
{
|
||||
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
|
||||
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
|
||||
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
|
||||
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
|
||||
|
||||
ScopedRegister swapTemp = default;
|
||||
|
||||
if (x)
|
||||
{
|
||||
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
|
||||
|
||||
rmOperand = swapTemp.Operand;
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Sxth(tempN64, rnOperand);
|
||||
context.Arm64Assembler.Sxth(tempM64, rmOperand);
|
||||
|
||||
context.Arm64Assembler.Mul(rdLoOperand64, tempN64, tempM64);
|
||||
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
|
||||
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Smaddl(rdLoOperand64, tempN.Operand, tempM.Operand, rdLoOperand64);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Smsubl(rdLoOperand64, tempN.Operand, tempM.Operand, rdLoOperand64);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsl(tempA64, rdHiOperand64, InstEmitCommon.Const(32));
|
||||
context.Arm64Assembler.Orr(tempA64, tempA64, rdLoOperand);
|
||||
|
||||
context.Arm64Assembler.Add(rdLoOperand64, rdLoOperand64, tempA64);
|
||||
|
||||
if (rdLo != rdHi)
|
||||
{
|
||||
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
|
||||
|
||||
if (x)
|
||||
{
|
||||
swapTemp.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitSmmlaSmmls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r, bool add)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
|
||||
|
||||
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
|
||||
Operand raOperand64 = new(OperandKind.Register, OperandType.I64, raOperand.Value);
|
||||
|
||||
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Lsl(tempA64, raOperand64, InstEmitCommon.Const(32));
|
||||
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Smaddl(rdOperand64, rnOperand, rmOperand, tempA64);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Smsubl(rdOperand64, rnOperand, rmOperand, tempA64);
|
||||
}
|
||||
|
||||
if (r)
|
||||
{
|
||||
context.Arm64Assembler.Mov(tempA.Operand, 0x80000000u);
|
||||
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempA64);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(rdOperand64, rdOperand64, InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
private static void EmitSmuadSmusd(CodeGenContext context, uint rd, uint rn, uint rm, bool x, bool add)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
|
||||
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
|
||||
ScopedRegister swapTemp = default;
|
||||
|
||||
if (x)
|
||||
{
|
||||
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
|
||||
|
||||
rmOperand = swapTemp.Operand;
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Sxth(tempN64, rnOperand);
|
||||
context.Arm64Assembler.Sxth(tempM64, rmOperand);
|
||||
|
||||
context.Arm64Assembler.Mul(rdOperand64, tempN64, tempM64);
|
||||
|
||||
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
|
||||
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Smaddl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Smsubl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
|
||||
|
||||
if (x)
|
||||
{
|
||||
swapTemp.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private static void SelectSignedHalfword(CodeGenContext context, Operand dest, Operand source, bool high)
|
||||
{
|
||||
if (high)
|
||||
{
|
||||
context.Arm64Assembler.Asr(dest, source, InstEmitCommon.Const(16));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sxth(dest, source);
|
||||
}
|
||||
}
|
||||
|
||||
private static void CheckResultOverflow(CodeGenContext context, Operand temp64, Operand result)
|
||||
{
|
||||
context.Arm64Assembler.Sxtw(temp64, result);
|
||||
context.Arm64Assembler.Sub(temp64, temp64, result);
|
||||
|
||||
int branchIndex = context.CodeWriter.InstructionPointer;
|
||||
|
||||
context.Arm64Assembler.Cbz(temp64, 0);
|
||||
|
||||
// Set Q flag if we had an overflow.
|
||||
InstEmitSaturate.SetQFlag(context);
|
||||
|
||||
int delta = context.CodeWriter.InstructionPointer - branchIndex;
|
||||
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,344 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonArithmetic
|
||||
{
|
||||
public static void Vaba(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uaba : context.Arm64Assembler.Saba, null);
|
||||
}
|
||||
|
||||
public static void Vabal(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uabal : context.Arm64Assembler.Sabal);
|
||||
}
|
||||
|
||||
public static void VabdF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FabdV, context.Arm64Assembler.FabdVH);
|
||||
}
|
||||
|
||||
public static void VabdI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uabd : context.Arm64Assembler.Sabd, null);
|
||||
}
|
||||
|
||||
public static void Vabdl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uabdl : context.Arm64Assembler.Sabdl);
|
||||
}
|
||||
|
||||
public static void Vabs(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FabsSingleAndDouble, context.Arm64Assembler.FabsHalf);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.AbsV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FaddSingleAndDouble, context.Arm64Assembler.FaddHalf);
|
||||
}
|
||||
|
||||
public static void VaddI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.AddV, context.Arm64Assembler.AddS);
|
||||
}
|
||||
|
||||
public static void Vaddhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Addhn);
|
||||
}
|
||||
|
||||
public static void Vaddl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uaddl : context.Arm64Assembler.Saddl);
|
||||
}
|
||||
|
||||
public static void Vaddw(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryWide(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uaddw : context.Arm64Assembler.Saddw);
|
||||
}
|
||||
|
||||
public static void VfmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmlaVecSingleAndDouble, context.Arm64Assembler.FmlaVecHalf);
|
||||
}
|
||||
|
||||
public static void VfmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmlsVecSingleAndDouble, context.Arm64Assembler.FmlsVecHalf);
|
||||
}
|
||||
|
||||
public static void Vhadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uhadd : context.Arm64Assembler.Shadd, null);
|
||||
}
|
||||
|
||||
public static void Vhsub(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uhsub : context.Arm64Assembler.Shsub, null);
|
||||
}
|
||||
|
||||
public static void Vmaxnm(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxnmSingleAndDouble, context.Arm64Assembler.FmaxnmHalf);
|
||||
}
|
||||
|
||||
public static void VmaxF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxSingleAndDouble, context.Arm64Assembler.FmaxHalf);
|
||||
}
|
||||
|
||||
public static void VmaxI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umax : context.Arm64Assembler.Smax, null);
|
||||
}
|
||||
|
||||
public static void Vminnm(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminnmSingleAndDouble, context.Arm64Assembler.FminnmHalf);
|
||||
}
|
||||
|
||||
public static void VminF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminSingleAndDouble, context.Arm64Assembler.FminHalf);
|
||||
}
|
||||
|
||||
public static void VminI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umin : context.Arm64Assembler.Smin, null);
|
||||
}
|
||||
|
||||
public static void VmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryMulNegRdF(context, rd, rn, rm, sz, q, negProduct: false);
|
||||
}
|
||||
|
||||
public static void VmlaI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.MlaVec);
|
||||
}
|
||||
|
||||
public static void VmlaS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryMulNegRdByScalarAnyF(context, rd, rn, rm, size, q, negProduct: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MlaElt);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmlalI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlalVec : context.Arm64Assembler.SmlalVec);
|
||||
}
|
||||
|
||||
public static void VmlalS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlalElt : context.Arm64Assembler.SmlalElt);
|
||||
}
|
||||
|
||||
public static void VmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryMulNegRdF(context, rd, rn, rm, sz, q, negProduct: true);
|
||||
}
|
||||
|
||||
public static void VmlsI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.MlsVec);
|
||||
}
|
||||
|
||||
public static void VmlsS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryMulNegRdByScalarAnyF(context, rd, rn, rm, size, q, negProduct: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MlsElt);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmlslI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlslVec : context.Arm64Assembler.SmlslVec);
|
||||
}
|
||||
|
||||
public static void VmlslS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlslElt : context.Arm64Assembler.SmlslElt);
|
||||
}
|
||||
|
||||
public static void VmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmulVecSingleAndDouble, context.Arm64Assembler.FmulVecHalf);
|
||||
}
|
||||
|
||||
public static void VmulI(CodeGenContext context, uint rd, uint rn, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.Pmul, null);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.MulVec, null);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmulS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryByScalarAnyF(context, rd, rn, rm, size, q, context.Arm64Assembler.FmulElt2regElementSingleAndDouble, context.Arm64Assembler.FmulElt2regElementHalf);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MulElt);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmullI(CodeGenContext context, uint rd, uint rn, uint rm, bool op, bool u, uint size)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size == 2 ? 3 : size, context.Arm64Assembler.Pmull);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmullVec : context.Arm64Assembler.SmullVec);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmullS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmullElt : context.Arm64Assembler.SmullElt);
|
||||
}
|
||||
|
||||
public static void Vneg(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FnegSingleAndDouble, context.Arm64Assembler.FnegHalf);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.NegV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vpadal(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryRd(context, rd, rm, size, q, op ? context.Arm64Assembler.Uadalp : context.Arm64Assembler.Sadalp);
|
||||
}
|
||||
|
||||
public static void VpaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FaddpVecSingleAndDouble, context.Arm64Assembler.FaddpVecHalf);
|
||||
}
|
||||
|
||||
public static void VpaddI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.AddpVec, null);
|
||||
}
|
||||
|
||||
public static void Vpaddl(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, op ? context.Arm64Assembler.Uaddlp : context.Arm64Assembler.Saddlp);
|
||||
}
|
||||
|
||||
public static void VpmaxF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxpVecSingleAndDouble, context.Arm64Assembler.FmaxpVecHalf);
|
||||
}
|
||||
|
||||
public static void VpmaxI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umaxp : context.Arm64Assembler.Smaxp, null);
|
||||
}
|
||||
|
||||
public static void VpminF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminpVecSingleAndDouble, context.Arm64Assembler.FminpVecHalf);
|
||||
}
|
||||
|
||||
public static void VpminI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uminp : context.Arm64Assembler.Sminp, null);
|
||||
}
|
||||
|
||||
public static void Vrecpe(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrecpeV, context.Arm64Assembler.FrecpeVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vrecps(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FrecpsV, context.Arm64Assembler.FrecpsVH);
|
||||
}
|
||||
|
||||
public static void Vrsqrte(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrsqrteV, context.Arm64Assembler.FrsqrteVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vrsqrts(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FrsqrtsV, context.Arm64Assembler.FrsqrtsVH);
|
||||
}
|
||||
|
||||
public static void VsubF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FsubSingleAndDouble, context.Arm64Assembler.FsubHalf);
|
||||
}
|
||||
|
||||
public static void VsubI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SubV, context.Arm64Assembler.SubS);
|
||||
}
|
||||
|
||||
public static void Vsubhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Subhn);
|
||||
}
|
||||
|
||||
public static void Vsubl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Usubl : context.Arm64Assembler.Ssubl);
|
||||
}
|
||||
|
||||
public static void Vsubw(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryWide(context, rd, rn, rm, size, u ? context.Arm64Assembler.Usubw : context.Arm64Assembler.Ssubw);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonBit
|
||||
{
|
||||
public static void Vcls(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Cls);
|
||||
}
|
||||
|
||||
public static void Vclz(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Clz);
|
||||
}
|
||||
|
||||
public static void Vcnt(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Cnt);
|
||||
}
|
||||
|
||||
public static void Vrev16(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev16);
|
||||
}
|
||||
|
||||
public static void Vrev32(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev32);
|
||||
}
|
||||
|
||||
public static void Vrev64(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev64);
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,126 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonCompare
|
||||
{
|
||||
public static void Vacge(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FacgeV, context.Arm64Assembler.FacgeVH);
|
||||
}
|
||||
|
||||
public static void Vacgt(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FacgtV, context.Arm64Assembler.FacgtVH);
|
||||
}
|
||||
|
||||
public static void VceqI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmeqZeroV, context.Arm64Assembler.FcmeqZeroVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmeqZeroV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VceqR(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.CmeqRegV, context.Arm64Assembler.CmeqRegS);
|
||||
}
|
||||
|
||||
public static void VceqFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmeqRegV, context.Arm64Assembler.FcmeqRegVH);
|
||||
}
|
||||
|
||||
public static void VcgeI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmgeZeroV, context.Arm64Assembler.FcmgeZeroVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmgeZeroV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcgeR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(
|
||||
context,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size,
|
||||
q,
|
||||
u ? context.Arm64Assembler.CmhsV : context.Arm64Assembler.CmgeRegV,
|
||||
u ? context.Arm64Assembler.CmhsS : context.Arm64Assembler.CmgeRegS);
|
||||
}
|
||||
|
||||
public static void VcgeFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmgeRegV, context.Arm64Assembler.FcmgeRegVH);
|
||||
}
|
||||
|
||||
public static void VcgtI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmgtZeroV, context.Arm64Assembler.FcmgtZeroVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmgtZeroV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcgtR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(
|
||||
context,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size,
|
||||
q,
|
||||
u ? context.Arm64Assembler.CmhiV : context.Arm64Assembler.CmgtRegV,
|
||||
u ? context.Arm64Assembler.CmhiS : context.Arm64Assembler.CmgtRegS);
|
||||
}
|
||||
|
||||
public static void VcgtFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmgtRegV, context.Arm64Assembler.FcmgtRegVH);
|
||||
}
|
||||
|
||||
public static void VcleI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmleV, context.Arm64Assembler.FcmleVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmleV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcltI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
|
||||
{
|
||||
if (f)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmltV, context.Arm64Assembler.FcmltVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmltV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vtst(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.CmtstV, context.Arm64Assembler.CmtstS);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,137 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonConvert
|
||||
{
|
||||
public static void Vcvta(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtauV, context.Arm64Assembler.FcvtauVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtasV, context.Arm64Assembler.FcvtasVH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcvtm(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtmuV, context.Arm64Assembler.FcvtmuVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtmsV, context.Arm64Assembler.FcvtmsVH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcvtn(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtnuV, context.Arm64Assembler.FcvtnuVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtnsV, context.Arm64Assembler.FcvtnsVH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcvtp(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtpuV, context.Arm64Assembler.FcvtpuVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtpsV, context.Arm64Assembler.FcvtpsVH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtHs(CodeGenContext context, uint rd, uint rm, bool op)
|
||||
{
|
||||
bool halfToSingle = op;
|
||||
if (halfToSingle)
|
||||
{
|
||||
// Half to single.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnaryLong(context, rd, rm, 0, context.Arm64Assembler.Fcvtl);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Single to half.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, 0, context.Arm64Assembler.Fcvtn);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtIs(CodeGenContext context, uint rd, uint rm, uint op, uint size, uint q)
|
||||
{
|
||||
Debug.Assert(op >> 2 == 0);
|
||||
|
||||
bool unsigned = (op & 1) != 0;
|
||||
bool toInteger = (op >> 1) != 0;
|
||||
|
||||
if (toInteger)
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtzuIntV, context.Arm64Assembler.FcvtzuIntVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtzsIntV, context.Arm64Assembler.FcvtzsIntVH);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.UcvtfIntV, context.Arm64Assembler.UcvtfIntVH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.ScvtfIntV, context.Arm64Assembler.ScvtfIntVH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtXs(CodeGenContext context, uint rd, uint rm, uint imm6, uint op, bool u, uint q)
|
||||
{
|
||||
Debug.Assert(op >> 2 == 0);
|
||||
|
||||
bool unsigned = u;
|
||||
bool toFixed = (op & 1) != 0;
|
||||
uint size = 1 + (op >> 1);
|
||||
uint fbits = Math.Clamp(64u - imm6, 1, 8u << (int)size);
|
||||
|
||||
if (toFixed)
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.FcvtzuFixV);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.FcvtzsFixV);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.UcvtfFixV);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.ScvtfFixV);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonCrypto
|
||||
{
|
||||
public static void Aesd(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 0);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesd);
|
||||
}
|
||||
|
||||
public static void Aese(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 0);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aese);
|
||||
}
|
||||
|
||||
public static void Aesimc(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 0);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesimc);
|
||||
}
|
||||
|
||||
public static void Aesmc(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 0);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesmc);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonHash
|
||||
{
|
||||
public static void Sha1c(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1c);
|
||||
}
|
||||
|
||||
public static void Sha1h(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 2);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha1h);
|
||||
}
|
||||
|
||||
public static void Sha1m(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1m);
|
||||
}
|
||||
|
||||
public static void Sha1p(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1p);
|
||||
}
|
||||
|
||||
public static void Sha1su0(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1su0);
|
||||
}
|
||||
|
||||
public static void Sha1su1(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 2);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha1su1);
|
||||
}
|
||||
|
||||
public static void Sha256h(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256h);
|
||||
}
|
||||
|
||||
public static void Sha256h2(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256h2);
|
||||
}
|
||||
|
||||
public static void Sha256su0(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(size == 2);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha256su0);
|
||||
}
|
||||
|
||||
public static void Sha256su1(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
// TODO: Feature check, emulation if not supported.
|
||||
|
||||
Debug.Assert(q == 1);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256su1);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonLogical
|
||||
{
|
||||
public static void VandR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.And);
|
||||
}
|
||||
|
||||
public static void VbicI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
|
||||
{
|
||||
EmitMovi(context, rd, cmode, imm8, 1, q);
|
||||
}
|
||||
|
||||
public static void VbicR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.BicReg);
|
||||
}
|
||||
|
||||
public static void VbifR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bif);
|
||||
}
|
||||
|
||||
public static void VbitR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bit);
|
||||
}
|
||||
|
||||
public static void VbslR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bsl);
|
||||
}
|
||||
|
||||
public static void VeorR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.Eor);
|
||||
}
|
||||
|
||||
public static void VornR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.Orn);
|
||||
}
|
||||
|
||||
public static void VorrI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
|
||||
{
|
||||
EmitMovi(context, rd, cmode, imm8, 0, q);
|
||||
}
|
||||
|
||||
public static void VorrR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.OrrReg);
|
||||
}
|
||||
|
||||
private static void EmitMovi(CodeGenContext context, uint rd, uint cmode, uint imm8, uint op, uint q)
|
||||
{
|
||||
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = InstEmitNeonMove.Split(imm8);
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
|
||||
|
||||
context.Arm64Assembler.Movi(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, op, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
|
||||
context.Arm64Assembler.Movi(rdOperand, h, g, f, e, d, cmode, c, b, a, op, q);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,797 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonMemory
|
||||
{
|
||||
public static void Vld11(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 1, 1, context.Arm64Assembler.Ld1SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld1A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1SingleReplicateInstruction(context, address, rd, size, t + 1, 1, context.Arm64Assembler.Ld1rAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld1M(CodeGenContext context, uint rd, uint rn, uint rm, uint registersCount, uint align, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 8 * (int)registersCount, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, registersCount, 1, context.Arm64Assembler.Ld1MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld21(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 2, step, context.Arm64Assembler.Ld2SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld2A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 2, t + 1, context.Arm64Assembler.Ld2rAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld2M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
|
||||
{
|
||||
uint step = (type & 1) + 1;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 16, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 2, step, context.Arm64Assembler.Ld2MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld2M(CodeGenContext context, uint rd, uint rn, uint rm, uint align, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234Multiple2x2Instruction(context, address, rd, size, context.Arm64Assembler.Ld2MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld31(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 3, step, context.Arm64Assembler.Ld3SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld3A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 3, t + 1, context.Arm64Assembler.Ld3rAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld3M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
|
||||
{
|
||||
uint step = (type & 1) + 1;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 24, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 3, step, context.Arm64Assembler.Ld3MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld41(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 4, step, context.Arm64Assembler.Ld4SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld4A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 4, t + 1, context.Arm64Assembler.Ld4rAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vld4M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
|
||||
{
|
||||
uint step = (type & 1) + 1;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
|
||||
{
|
||||
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 4, step, context.Arm64Assembler.Ld4MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vldm(CodeGenContext context, uint rd, uint rn, uint registerCount, bool u, bool w, bool singleRegs)
|
||||
{
|
||||
EmitMemoryMultipleInstruction(context, rd, rn, registerCount, u, w, singleRegs, isStore: false);
|
||||
}
|
||||
|
||||
public static void Vldr(CodeGenContext context, uint rd, uint rn, uint imm8, bool u, uint size)
|
||||
{
|
||||
EmitMemoryInstruction(context, rd, rn, imm8, u, size, isStore: false);
|
||||
}
|
||||
|
||||
public static void Vst11(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
|
||||
{
|
||||
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 1, 1, context.Arm64Assembler.St1SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst1M(CodeGenContext context, uint rd, uint rn, uint rm, uint registersCount, uint align, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 8 * (int)registersCount, (address) =>
|
||||
{
|
||||
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, registersCount, 1, context.Arm64Assembler.St1MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst21(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 2, step, context.Arm64Assembler.St2SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst2M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
|
||||
{
|
||||
uint step = (type & 1) + 1;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 16, (address) =>
|
||||
{
|
||||
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 2, step, context.Arm64Assembler.St2MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst2M(CodeGenContext context, uint rd, uint rn, uint rm, uint align, uint size)
|
||||
{
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
|
||||
{
|
||||
EmitMemoryStore1234Multiple2x2Instruction(context, address, rd, size, context.Arm64Assembler.St2MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst31(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 3, step, context.Arm64Assembler.St3SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst3M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
|
||||
{
|
||||
uint step = (type & 1) + 1;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 24, (address) =>
|
||||
{
|
||||
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 3, step, context.Arm64Assembler.St3MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst41(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
|
||||
{
|
||||
uint index = indexAlign >> ((int)size + 1);
|
||||
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
|
||||
{
|
||||
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 4, step, context.Arm64Assembler.St4SnglAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vst4M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
|
||||
{
|
||||
uint step = (type & 1) + 1;
|
||||
|
||||
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
|
||||
{
|
||||
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 4, step, context.Arm64Assembler.St4MultAsNoPostIndex);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Vstm(CodeGenContext context, uint rd, uint rn, uint registerCount, bool u, bool w, bool singleRegs)
|
||||
{
|
||||
EmitMemoryMultipleInstruction(context, rd, rn, registerCount, u, w, singleRegs, isStore: true);
|
||||
}
|
||||
|
||||
public static void Vstr(CodeGenContext context, uint rd, uint rn, uint imm8, bool u, uint size)
|
||||
{
|
||||
EmitMemoryInstruction(context, rd, rn, imm8, u, size, isStore: true);
|
||||
}
|
||||
|
||||
private static void EmitMemoryMultipleInstruction(
|
||||
CodeGenContext context,
|
||||
uint rd,
|
||||
uint rn,
|
||||
uint registerCount,
|
||||
bool add,
|
||||
bool wBack,
|
||||
bool singleRegs,
|
||||
bool isStore)
|
||||
{
|
||||
Operand baseAddress = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand offset = InstEmitCommon.Const((int)registerCount * (singleRegs ? 4 : 8));
|
||||
|
||||
if (!add)
|
||||
{
|
||||
if (wBack)
|
||||
{
|
||||
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, baseAddress, baseAddress, offset, false, ArmShiftType.Lsl, 0);
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, baseAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, tempRegister.Operand, baseAddress, offset, false, ArmShiftType.Lsl, 0);
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, tempRegister.Operand);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, baseAddress);
|
||||
}
|
||||
|
||||
EmitMemoryMultipleInstructionCore(context, tempRegister.Operand, rd, registerCount, singleRegs, isStore);
|
||||
|
||||
if (add && wBack)
|
||||
{
|
||||
context.Arm64Assembler.Add(baseAddress, baseAddress, offset);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemoryMultipleInstructionCore(CodeGenContext context, Operand baseAddress, uint rd, uint registerCount, bool singleRegs, bool isStore)
|
||||
{
|
||||
int offs = 0;
|
||||
uint r = rd;
|
||||
uint upperBound = Math.Min(rd + registerCount, 32u);
|
||||
uint regMask = singleRegs ? 3u : 1u;
|
||||
|
||||
// Read/write misaligned elements first.
|
||||
|
||||
for (; (r & regMask) != 0 && r < upperBound; r++)
|
||||
{
|
||||
EmitMemoryInstruction(context, baseAddress, r, offs, singleRegs, isStore);
|
||||
|
||||
offs += singleRegs ? 4 : 8;
|
||||
}
|
||||
|
||||
// Read/write aligned, full vectors.
|
||||
|
||||
while (upperBound - r >= (singleRegs ? 4 : 2))
|
||||
{
|
||||
int qIndex = (int)(r >> (singleRegs ? 2 : 1));
|
||||
|
||||
Operand rtOperand = context.RegisterAllocator.RemapSimdRegister(qIndex);
|
||||
|
||||
if (upperBound - r >= (singleRegs ? 8 : 4) && (offs & 0xf) == 0)
|
||||
{
|
||||
Operand rt2Operand = context.RegisterAllocator.RemapSimdRegister(qIndex + 1);
|
||||
|
||||
if (isStore)
|
||||
{
|
||||
context.Arm64Assembler.StpRiUn(rtOperand, rt2Operand, baseAddress, offs);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.LdpRiUn(rtOperand, rt2Operand, baseAddress, offs);
|
||||
}
|
||||
|
||||
r += singleRegs ? 8u : 4u;
|
||||
offs += 32;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((offs & 0xf) == 0)
|
||||
{
|
||||
if (isStore)
|
||||
{
|
||||
context.Arm64Assembler.StrRiUn(rtOperand, baseAddress, offs);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.LdrRiUn(rtOperand, baseAddress, offs);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isStore)
|
||||
{
|
||||
context.Arm64Assembler.Stur(rtOperand, baseAddress, offs);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Ldur(rtOperand, baseAddress, offs);
|
||||
}
|
||||
}
|
||||
|
||||
r += singleRegs ? 4u : 2u;
|
||||
offs += 16;
|
||||
}
|
||||
}
|
||||
|
||||
// Read/write last misaligned elements.
|
||||
|
||||
for (; r < upperBound; r++)
|
||||
{
|
||||
EmitMemoryInstruction(context, baseAddress, r, offs, singleRegs, isStore);
|
||||
|
||||
offs += singleRegs ? 4 : 8;
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemoryInstruction(CodeGenContext context, Operand baseAddress, uint r, int offs, bool singleRegs, bool isStore)
|
||||
{
|
||||
if (isStore)
|
||||
{
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, r, singleRegs);
|
||||
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, baseAddress, offs);
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, baseAddress, offs);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, r, singleRegs);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemoryInstruction(CodeGenContext context, uint rd, uint rn, uint imm8, bool add, uint size, bool isStore)
|
||||
{
|
||||
bool singleRegs = size != 3;
|
||||
int offs = (int)imm8;
|
||||
|
||||
if (size == 1)
|
||||
{
|
||||
offs <<= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
offs <<= 2;
|
||||
}
|
||||
|
||||
using ScopedRegister address = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
if (rn == RegisterUtils.PcRegister)
|
||||
{
|
||||
if (!add)
|
||||
{
|
||||
offs = -offs;
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Mov(address.Operand, (context.Pc & ~3u) + (uint)offs);
|
||||
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, address.Operand);
|
||||
|
||||
offs = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rnOperand = context.RegisterAllocator.RemapGprRegister((int)rn);
|
||||
|
||||
if (InstEmitMemory.CanFoldOffset(context.MemoryManagerType, add ? offs : -offs, (int)size, true, out _))
|
||||
{
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, rnOperand);
|
||||
|
||||
if (!add)
|
||||
{
|
||||
offs = -offs;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, address.Operand, rnOperand, InstEmitCommon.Const(offs), add, ArmShiftType.Lsl, 0);
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, address.Operand);
|
||||
|
||||
offs = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if ((size == 3 && (offs & 7) != 0) || offs < 0)
|
||||
{
|
||||
if (isStore)
|
||||
{
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
|
||||
|
||||
context.Arm64Assembler.Stur(tempRegister.Operand, address.Operand, offs, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
|
||||
|
||||
context.Arm64Assembler.Ldur(tempRegister.Operand, address.Operand, offs, size);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isStore)
|
||||
{
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
|
||||
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, address.Operand, offs, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, address.Operand, offs, size);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemory1234InstructionCore(CodeGenContext context, uint rn, uint rm, int bytes, Action<Operand> callback)
|
||||
{
|
||||
bool wBack = rm != RegisterUtils.PcRegister;
|
||||
bool registerIndex = rm != RegisterUtils.PcRegister && rm != RegisterUtils.SpRegister;
|
||||
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
using ScopedRegister address = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, rnOperand);
|
||||
|
||||
callback(address.Operand);
|
||||
|
||||
if (wBack)
|
||||
{
|
||||
if (registerIndex)
|
||||
{
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
context.Arm64Assembler.Add(rnOperand, rnOperand, rmOperand);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(rnOperand, rnOperand, InstEmitCommon.Const(bytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemoryLoad1234SingleInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint index,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, index, size);
|
||||
|
||||
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryLoad1SingleReplicateInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
if ((rd & 1) == 0 && registerCount == 2)
|
||||
{
|
||||
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1)), baseAddress, size, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint vecsCount = (registerCount + 1) >> 1;
|
||||
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)vecsCount);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, size, registerCount > 1 ? 1u : 0u);
|
||||
|
||||
MoveQuadwordsToDoublewords(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemoryLoad234SingleReplicateInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, size, 0u);
|
||||
|
||||
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryLoad1234MultipleInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, size, 0);
|
||||
|
||||
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryLoad1234MultipleInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, registerCount, size, 0);
|
||||
|
||||
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryLoad1234Multiple2x2Instruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
if ((rd & 1) == 0)
|
||||
{
|
||||
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1), 2), baseAddress, size, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, 2);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, size, 1);
|
||||
|
||||
MoveQuadwordsToDoublewords2x2(context, rd, tempRegisters);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitMemoryStore1234SingleInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint index,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, index, size);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryStore1234MultipleInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, size, 0);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryStore1234MultipleInstruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
uint registerCount,
|
||||
uint step,
|
||||
Action<Operand, Operand, uint, uint, uint> action)
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
|
||||
|
||||
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, registerCount, size, 0);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
|
||||
private static void EmitMemoryStore1234Multiple2x2Instruction(
|
||||
CodeGenContext context,
|
||||
Operand baseAddress,
|
||||
uint rd,
|
||||
uint size,
|
||||
Action<Operand, Operand, uint, uint> action)
|
||||
{
|
||||
if ((rd & 1) == 0)
|
||||
{
|
||||
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1), 2), baseAddress, size, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, 2);
|
||||
|
||||
MoveDoublewordsToQuadwords2x2(context, rd, tempRegisters);
|
||||
|
||||
action(tempRegisters[0].Operand, baseAddress, size, 1);
|
||||
|
||||
FreeSequentialRegisters(tempRegisters);
|
||||
}
|
||||
}
|
||||
|
||||
private static ScopedRegister[] AllocateSequentialRegisters(CodeGenContext context, int count)
|
||||
{
|
||||
ScopedRegister[] registers = new ScopedRegister[count];
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
registers[index] = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
}
|
||||
|
||||
AssertSequentialRegisters(registers);
|
||||
|
||||
return registers;
|
||||
}
|
||||
|
||||
private static void FreeSequentialRegisters(ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 0; index < registers.Length; index++)
|
||||
{
|
||||
registers[index].Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
[Conditional("DEBUG")]
|
||||
private static void AssertSequentialRegisters(ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 1; index < registers.Length; index++)
|
||||
{
|
||||
Debug.Assert(registers[index].Operand.GetRegister().Index == registers[0].Operand.GetRegister().Index + index);
|
||||
}
|
||||
}
|
||||
|
||||
private static void MoveQuadwordsLowerToDoublewords(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 0; index < registerCount; index++)
|
||||
{
|
||||
uint r = rd + (uint)index * step;
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
|
||||
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 0, imm5);
|
||||
}
|
||||
}
|
||||
|
||||
private static void MoveDoublewordsToQuadwordsLower(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 0; index < registerCount; index++)
|
||||
{
|
||||
uint r = rd + (uint)index * step;
|
||||
|
||||
InstEmitNeonCommon.MoveScalarToSide(context, registers[index].Operand, r, false);
|
||||
}
|
||||
}
|
||||
|
||||
private static void MoveDoublewordsToQuadwords2x2(CodeGenContext context, uint rd, ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 0; index < 2; index++)
|
||||
{
|
||||
uint r = rd + (uint)index * 2;
|
||||
uint r2 = r + 1;
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(0, false);
|
||||
context.Arm64Assembler.InsElt(registers[index].Operand, rdOperand, (r & 1u) << 3, imm5);
|
||||
|
||||
rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r2 >> 1));
|
||||
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(1, false);
|
||||
context.Arm64Assembler.InsElt(registers[index].Operand, rdOperand, (r2 & 1u) << 3, imm5);
|
||||
}
|
||||
}
|
||||
|
||||
private static void MoveQuadwordsToDoublewords(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 0; index < registerCount; index++)
|
||||
{
|
||||
uint r = rd + (uint)index * step;
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
|
||||
context.Arm64Assembler.InsElt(rdOperand, registers[index >> 1].Operand, ((uint)index & 1u) << 3, imm5);
|
||||
}
|
||||
}
|
||||
|
||||
private static void MoveQuadwordsToDoublewords2x2(CodeGenContext context, uint rd, ReadOnlySpan<ScopedRegister> registers)
|
||||
{
|
||||
for (int index = 0; index < 2; index++)
|
||||
{
|
||||
uint r = rd + (uint)index * 2;
|
||||
uint r2 = r + 1;
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
|
||||
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 0, imm5);
|
||||
|
||||
rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r2 >> 1));
|
||||
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r2 & 1u, false);
|
||||
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 1u << 3, imm5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,665 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonMove
|
||||
{
|
||||
public static void VdupR(CodeGenContext context, uint rd, uint rt, uint b, uint e, uint q)
|
||||
{
|
||||
uint size = 2 - (e | (b << 1));
|
||||
|
||||
Debug.Assert(size < 3);
|
||||
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(0, size);
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.DupGen(tempRegister.Operand, rtOperand, imm5, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert((rd & 1) == 0);
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
|
||||
context.Arm64Assembler.DupGen(rdOperand, rtOperand, imm5, q);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VdupS(CodeGenContext context, uint rd, uint rm, uint imm4, uint q)
|
||||
{
|
||||
uint size = (uint)BitOperations.TrailingZeroCount(imm4);
|
||||
|
||||
Debug.Assert(size < 3);
|
||||
|
||||
uint index = imm4 >> (int)(size + 1);
|
||||
|
||||
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
|
||||
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(index | ((rm & 1) << (int)(3 - size)), size);
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.DupEltVectorFromElement(tempRegister.Operand, rmOperand, imm5, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert((rd & 1) == 0);
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
|
||||
context.Arm64Assembler.DupEltVectorFromElement(rdOperand, rmOperand, imm5, q);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vext(CodeGenContext context, uint rd, uint rn, uint rm, uint imm4, uint q)
|
||||
{
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister rnReg = InstEmitNeonCommon.MoveScalarToSide(context, rn, false);
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
|
||||
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rnReg, rmReg);
|
||||
|
||||
context.Arm64Assembler.Ext(tempRegister.Operand, rnReg.Operand, imm4, rmReg.Operand, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(((rd | rn | rm) & 1) == 0);
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
Operand rnOperand = context.RegisterAllocator.RemapSimdRegister((int)(rn >> 1));
|
||||
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
|
||||
|
||||
context.Arm64Assembler.Ext(rdOperand, rnOperand, imm4, rmOperand, q);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmovl(CodeGenContext context, uint rd, uint rm, bool u, uint imm3h)
|
||||
{
|
||||
uint size = (uint)BitOperations.TrailingZeroCount(imm3h);
|
||||
Debug.Assert(size < 3);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongShift(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
0,
|
||||
size,
|
||||
isShl: true,
|
||||
u ? context.Arm64Assembler.Ushll : context.Arm64Assembler.Sshll);
|
||||
}
|
||||
|
||||
public static void Vmovn(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
Debug.Assert(size < 3);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, context.Arm64Assembler.Xtn);
|
||||
}
|
||||
|
||||
public static void Vmovx(CodeGenContext context, uint rd, uint rm)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryShift(context, rd, rm, 16, 2, isShl: false, context.Arm64Assembler.UshrS);
|
||||
}
|
||||
|
||||
public static void VmovD(CodeGenContext context, uint rt, uint rt2, uint rm, bool op)
|
||||
{
|
||||
Operand rmReg = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
|
||||
|
||||
uint top = rm & 1;
|
||||
uint ftype = top + 1;
|
||||
|
||||
if (op)
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
Operand rt2Operand = InstEmitCommon.GetOutputGpr(context, rt2);
|
||||
|
||||
Operand rtOperand64 = new(OperandKind.Register, OperandType.I64, rtOperand.Value);
|
||||
Operand rt2Operand64 = new(OperandKind.Register, OperandType.I64, rt2Operand.Value);
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(rtOperand64, rmReg, ftype, 1, 0, top);
|
||||
|
||||
context.Arm64Assembler.Lsr(rt2Operand64, rtOperand64, InstEmitCommon.Const(32));
|
||||
context.Arm64Assembler.Mov(rtOperand, rtOperand); // Zero-extend.
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
Operand rt2Operand = InstEmitCommon.GetInputGpr(context, rt2);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempRegister64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Lsl(tempRegister64, rt2Operand, InstEmitCommon.Const(32));
|
||||
context.Arm64Assembler.Orr(tempRegister64, tempRegister64, rtOperand);
|
||||
|
||||
if (top == 0)
|
||||
{
|
||||
// Doing FMOV on Rm directly would clear the high bits if we are moving to the bottom.
|
||||
|
||||
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(tempRegister2.Operand, tempRegister64, ftype, 1, 1, top);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rm, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.FmovFloatGen(rmReg, tempRegister64, ftype, 1, 1, top);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmovH(CodeGenContext context, uint rt, uint rn, bool op)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rn, true);
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(rtOperand, tempRegister.Operand, 3, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 3, 0, 1, 0);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rn, true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmovI(CodeGenContext context, uint rd, uint op, uint cmode, uint imm8, uint q)
|
||||
{
|
||||
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(imm8);
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Movi(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, op, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
|
||||
context.Arm64Assembler.Movi(rdOperand, h, g, f, e, d, cmode, c, b, a, op, q);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmovFI(CodeGenContext context, uint rd, uint imm8, uint size)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.FmovFloatImm(tempRegister.Operand, imm8, size ^ 2u);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, size != 3);
|
||||
}
|
||||
|
||||
public static void VmovR(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
bool singleRegister = size == 2;
|
||||
|
||||
int shift = singleRegister ? 2 : 1;
|
||||
uint mask = singleRegister ? 3u : 1u;
|
||||
uint dstElt = rd & mask;
|
||||
uint srcElt = rm & mask;
|
||||
|
||||
uint imm4 = srcElt << (singleRegister ? 2 : 3);
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(dstElt, singleRegister);
|
||||
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> shift));
|
||||
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> shift));
|
||||
|
||||
context.Arm64Assembler.InsElt(rdOperand, rmOperand, imm4, imm5);
|
||||
}
|
||||
|
||||
public static void VmovRs(CodeGenContext context, uint rd, uint rt, uint opc1, uint opc2)
|
||||
{
|
||||
uint index;
|
||||
uint size;
|
||||
|
||||
if ((opc1 & 2u) != 0)
|
||||
{
|
||||
index = opc2 | ((opc1 & 1u) << 2);
|
||||
size = 0;
|
||||
}
|
||||
else if ((opc2 & 1u) != 0)
|
||||
{
|
||||
index = (opc2 >> 1) | ((opc1 & 1u) << 1);
|
||||
size = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(opc1 == 0 || opc1 == 1);
|
||||
Debug.Assert(opc2 == 0);
|
||||
|
||||
index = opc1 & 1u;
|
||||
size = 2;
|
||||
}
|
||||
|
||||
index |= (rd & 1u) << (int)(3 - size);
|
||||
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
|
||||
Operand rdReg = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
|
||||
context.Arm64Assembler.InsGen(rdReg, rtOperand, InstEmitNeonCommon.GetImm5ForElementIndex(index, size));
|
||||
}
|
||||
|
||||
public static void VmovS(CodeGenContext context, uint rt, uint rn, bool op)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rn, true);
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(rtOperand, tempRegister.Operand, 0, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 0, 0, 1, 0);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rn, true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmovSr(CodeGenContext context, uint rt, uint rn, bool u, uint opc1, uint opc2)
|
||||
{
|
||||
uint index;
|
||||
uint size;
|
||||
|
||||
if ((opc1 & 2u) != 0)
|
||||
{
|
||||
index = opc2 | ((opc1 & 1u) << 2);
|
||||
size = 0;
|
||||
}
|
||||
else if ((opc2 & 1u) != 0)
|
||||
{
|
||||
index = (opc2 >> 1) | ((opc1 & 1u) << 1);
|
||||
size = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(opc1 == 0 || opc1 == 1);
|
||||
Debug.Assert(opc2 == 0);
|
||||
Debug.Assert(!u);
|
||||
|
||||
index = opc1 & 1u;
|
||||
size = 2;
|
||||
}
|
||||
|
||||
index |= (rn & 1u) << (int)(3 - size);
|
||||
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
|
||||
Operand rnReg = context.RegisterAllocator.RemapSimdRegister((int)(rn >> 1));
|
||||
|
||||
if (u || size > 1)
|
||||
{
|
||||
context.Arm64Assembler.Umov(rtOperand, rnReg, (int)index, (int)size);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Smov(rtOperand, rnReg, (int)index, (int)size);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmovSs(CodeGenContext context, uint rt, uint rt2, uint rm, bool op)
|
||||
{
|
||||
if ((rm & 1) == 0)
|
||||
{
|
||||
// If we are moving an aligned pair of single-precision registers,
|
||||
// we can just move a single double-precision register.
|
||||
|
||||
VmovD(context, rt, rt2, rm >> 1, op);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (op)
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
Operand rt2Operand = InstEmitCommon.GetOutputGpr(context, rt2);
|
||||
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, true);
|
||||
using ScopedRegister rmReg2 = InstEmitNeonCommon.MoveScalarToSide(context, rm + 1, true);
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(rtOperand, rmReg.Operand, 0, 0, 0, 0);
|
||||
context.Arm64Assembler.FmovFloatGen(rt2Operand, rmReg2.Operand, 0, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
Operand rt2Operand = InstEmitCommon.GetInputGpr(context, rt2);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 0, 0, 1, 0);
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm, true);
|
||||
|
||||
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rt2Operand, 0, 0, 1, 0);
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm + 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmvnI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
|
||||
{
|
||||
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(imm8);
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Mvni(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
|
||||
context.Arm64Assembler.Mvni(rdOperand, h, g, f, e, d, cmode, c, b, a, q);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VmvnR(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, q, context.Arm64Assembler.Not);
|
||||
}
|
||||
|
||||
public static void Vswp(CodeGenContext context, uint rd, uint rm, uint q)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, rmReg.Operand, rd, false);
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
|
||||
|
||||
context.Arm64Assembler.Orr(tempRegister.Operand, rdOperand, rdOperand); // Temp = Rd
|
||||
context.Arm64Assembler.Orr(rdOperand, rmOperand, rmOperand); // Rd = Rm
|
||||
context.Arm64Assembler.Orr(rmOperand, tempRegister.Operand, tempRegister.Operand); // Rm = Temp
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vtbl(CodeGenContext context, uint rd, uint rn, uint rm, bool op, uint len)
|
||||
{
|
||||
// On AArch64, TBL/TBX works with 128-bit vectors, while on AArch32 it works with 64-bit vectors.
|
||||
// We must combine the 64-bit vectors into a larger 128-bit one in some cases.
|
||||
|
||||
// TODO: Peephole optimization to combine adjacent TBL instructions?
|
||||
|
||||
Debug.Assert(len <= 3);
|
||||
|
||||
bool isTbl = !op;
|
||||
|
||||
len = Math.Min(len, 31 - rn);
|
||||
|
||||
bool rangeMismatch = !isTbl && (len & 1) == 0;
|
||||
|
||||
using ScopedRegister indicesReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false, rangeMismatch);
|
||||
|
||||
if (rangeMismatch)
|
||||
{
|
||||
// Force any index >= 8 * regs to be the maximum value, since on AArch64 we are working with a full vector,
|
||||
// and the out of range value is 16 * regs, not 8 * regs.
|
||||
|
||||
Debug.Assert(indicesReg.IsAllocated);
|
||||
|
||||
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
if (len == 0)
|
||||
{
|
||||
(uint immb, uint immh) = InstEmitNeonCommon.GetImmbImmhForShift(3, 0, isShl: false);
|
||||
|
||||
context.Arm64Assembler.UshrV(tempRegister2.Operand, indicesReg.Operand, immb, immh, 0);
|
||||
context.Arm64Assembler.CmeqZeroV(tempRegister2.Operand, tempRegister2.Operand, 0, 0);
|
||||
context.Arm64Assembler.Orn(indicesReg.Operand, indicesReg.Operand, tempRegister2.Operand, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(8u * (len + 1));
|
||||
|
||||
context.Arm64Assembler.Movi(tempRegister2.Operand, h, g, f, e, d, 0xe, c, b, a, 0, 0);
|
||||
context.Arm64Assembler.CmgeRegV(tempRegister2.Operand, indicesReg.Operand, tempRegister2.Operand, 0, 0);
|
||||
context.Arm64Assembler.OrrReg(indicesReg.Operand, indicesReg.Operand, tempRegister2.Operand, 0);
|
||||
}
|
||||
}
|
||||
|
||||
ScopedRegister tableReg1 = default;
|
||||
ScopedRegister tableReg2 = default;
|
||||
|
||||
switch (len)
|
||||
{
|
||||
case 0:
|
||||
tableReg1 = MoveHalfToSideZeroUpper(context, rn);
|
||||
break;
|
||||
case 1:
|
||||
tableReg1 = MoveDoublewords(context, rn, rn + 1);
|
||||
break;
|
||||
case 2:
|
||||
tableReg1 = MoveDoublewords(context, rn, rn + 1, isOdd: true);
|
||||
tableReg2 = MoveHalfToSideZeroUpper(context, rn + 2);
|
||||
break;
|
||||
case 3:
|
||||
tableReg1 = MoveDoublewords(context, rn, rn + 1);
|
||||
tableReg2 = MoveDoublewords(context, rn + 2, rn + 3);
|
||||
break;
|
||||
}
|
||||
|
||||
// TBL works with consecutive registers, it is assumed that two consecutive calls to the register allocator
|
||||
// will return consecutive registers.
|
||||
|
||||
Debug.Assert(len < 2 || tableReg1.Operand.GetRegister().Index + 1 == tableReg2.Operand.GetRegister().Index);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
if (isTbl)
|
||||
{
|
||||
context.Arm64Assembler.Tbl(tempRegister.Operand, tableReg1.Operand, len >> 1, indicesReg.Operand, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
|
||||
|
||||
context.Arm64Assembler.Tbx(tempRegister.Operand, tableReg1.Operand, len >> 1, indicesReg.Operand, 0);
|
||||
}
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
|
||||
tableReg1.Dispose();
|
||||
|
||||
if (len > 1)
|
||||
{
|
||||
tableReg2.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vtrn(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
EmitVectorBinaryInterleavedTrn(context, rd, rm, size, q, context.Arm64Assembler.Trn1, context.Arm64Assembler.Trn2);
|
||||
}
|
||||
|
||||
public static void Vuzp(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
EmitVectorBinaryInterleaved(context, rd, rm, size, q, context.Arm64Assembler.Uzp1, context.Arm64Assembler.Uzp2);
|
||||
}
|
||||
|
||||
public static void Vzip(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
EmitVectorBinaryInterleaved(context, rd, rm, size, q, context.Arm64Assembler.Zip1, context.Arm64Assembler.Zip2);
|
||||
}
|
||||
|
||||
public static (uint, uint, uint, uint, uint, uint, uint, uint) Split(uint imm8)
|
||||
{
|
||||
uint a = (imm8 >> 7) & 1;
|
||||
uint b = (imm8 >> 6) & 1;
|
||||
uint c = (imm8 >> 5) & 1;
|
||||
uint d = (imm8 >> 4) & 1;
|
||||
uint e = (imm8 >> 3) & 1;
|
||||
uint f = (imm8 >> 2) & 1;
|
||||
uint g = (imm8 >> 1) & 1;
|
||||
uint h = imm8 & 1;
|
||||
|
||||
return (a, b, c, d, e, f, g, h);
|
||||
}
|
||||
|
||||
private static ScopedRegister MoveHalfToSideZeroUpper(CodeGenContext context, uint srcReg)
|
||||
{
|
||||
uint elt = srcReg & 1u;
|
||||
|
||||
Operand source = context.RegisterAllocator.RemapSimdRegister((int)(srcReg >> 1));
|
||||
ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(false);
|
||||
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(elt, false);
|
||||
|
||||
context.Arm64Assembler.DupEltScalarFromElement(tempRegister.Operand, source, imm5);
|
||||
|
||||
return tempRegister;
|
||||
}
|
||||
|
||||
private static ScopedRegister MoveDoublewords(CodeGenContext context, uint lowerReg, uint upperReg, bool isOdd = false)
|
||||
{
|
||||
if ((lowerReg & 1) == 0 && upperReg == lowerReg + 1 && !isOdd)
|
||||
{
|
||||
return new ScopedRegister(context.RegisterAllocator, context.RegisterAllocator.RemapSimdRegister((int)(lowerReg >> 1)), false);
|
||||
}
|
||||
|
||||
Operand lowerSrc = context.RegisterAllocator.RemapSimdRegister((int)(lowerReg >> 1));
|
||||
Operand upperSrc = context.RegisterAllocator.RemapSimdRegister((int)(upperReg >> 1));
|
||||
ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(false);
|
||||
|
||||
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(lowerReg & 1u, false);
|
||||
|
||||
context.Arm64Assembler.DupEltScalarFromElement(tempRegister.Operand, lowerSrc, imm5);
|
||||
|
||||
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(1, false);
|
||||
|
||||
context.Arm64Assembler.InsElt(tempRegister.Operand, upperSrc, (upperReg & 1u) << 3, imm5);
|
||||
|
||||
return tempRegister;
|
||||
}
|
||||
|
||||
private static void EmitVectorBinaryInterleavedTrn(
|
||||
CodeGenContext context,
|
||||
uint rd,
|
||||
uint rm,
|
||||
uint size,
|
||||
uint q,
|
||||
Action<Operand, Operand, Operand, uint, uint> action1,
|
||||
Action<Operand, Operand, Operand, uint, uint> action2)
|
||||
{
|
||||
if (rd == rm)
|
||||
{
|
||||
// The behaviour when the registers are the same is "unpredictable" according to the manual.
|
||||
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, false);
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
|
||||
|
||||
using ScopedRegister tempRegister1 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
using ScopedRegister tempRegister2 = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rdReg, rmReg);
|
||||
|
||||
action1(tempRegister1.Operand, rdReg.Operand, rmReg.Operand, size, q);
|
||||
action2(tempRegister2.Operand, rdReg.Operand, tempRegister1.Operand, size, q);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rd, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
action1(tempRegister.Operand, rdOperand, rmOperand, size, q);
|
||||
action2(rmOperand, rdOperand, tempRegister.Operand, size, q);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryInterleaved(context, rd, rm, size, q, action1, action2);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitVectorBinaryInterleaved(
|
||||
CodeGenContext context,
|
||||
uint rd,
|
||||
uint rm,
|
||||
uint size,
|
||||
uint q,
|
||||
Action<Operand, Operand, Operand, uint, uint> action1,
|
||||
Action<Operand, Operand, Operand, uint, uint> action2)
|
||||
{
|
||||
if (q == 0)
|
||||
{
|
||||
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, false);
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
|
||||
|
||||
using ScopedRegister tempRegister1 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
using ScopedRegister tempRegister2 = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rdReg, rmReg);
|
||||
|
||||
action1(tempRegister1.Operand, rdReg.Operand, rmReg.Operand, size, q);
|
||||
action2(tempRegister2.Operand, rdReg.Operand, rmReg.Operand, size, q);
|
||||
|
||||
if (rd != rm)
|
||||
{
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister1.Operand, rd, false);
|
||||
}
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rm, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
|
||||
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
action1(tempRegister.Operand, rdOperand, rmOperand, size, q);
|
||||
action2(rmOperand, rdOperand, rmOperand, size, q);
|
||||
|
||||
if (rd != rm)
|
||||
{
|
||||
context.Arm64Assembler.OrrReg(rdOperand, tempRegister.Operand, tempRegister.Operand, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonRound
|
||||
{
|
||||
public static void Vraddhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Raddhn);
|
||||
}
|
||||
|
||||
public static void Vrhadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Urhadd : context.Arm64Assembler.Srhadd, null);
|
||||
}
|
||||
|
||||
public static void Vrshl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
rn,
|
||||
size,
|
||||
q,
|
||||
u ? context.Arm64Assembler.UrshlV : context.Arm64Assembler.SrshlV,
|
||||
u ? context.Arm64Assembler.UrshlS : context.Arm64Assembler.SrshlS);
|
||||
}
|
||||
|
||||
public static void Vrshr(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
shift,
|
||||
size,
|
||||
q,
|
||||
isShl: false,
|
||||
u ? context.Arm64Assembler.UrshrV : context.Arm64Assembler.SrshrV,
|
||||
u ? context.Arm64Assembler.UrshrS : context.Arm64Assembler.SrshrS);
|
||||
}
|
||||
|
||||
public static void Vrshrn(CodeGenContext context, uint rd, uint rm, uint imm6)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
|
||||
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.Rshrn);
|
||||
}
|
||||
|
||||
public static void Vrsra(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdShift(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
shift,
|
||||
size,
|
||||
q,
|
||||
isShl: false,
|
||||
u ? context.Arm64Assembler.UrsraV : context.Arm64Assembler.SrsraV,
|
||||
u ? context.Arm64Assembler.UrsraS : context.Arm64Assembler.SrsraS);
|
||||
}
|
||||
|
||||
public static void Vrsubhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Rsubhn);
|
||||
}
|
||||
|
||||
public static void Vrinta(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintaSingleAndDouble, context.Arm64Assembler.FrintaHalf);
|
||||
}
|
||||
|
||||
public static void Vrintm(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintmSingleAndDouble, context.Arm64Assembler.FrintmHalf);
|
||||
}
|
||||
|
||||
public static void Vrintn(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintnSingleAndDouble, context.Arm64Assembler.FrintnHalf);
|
||||
}
|
||||
|
||||
public static void Vrintp(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintpSingleAndDouble, context.Arm64Assembler.FrintpHalf);
|
||||
}
|
||||
|
||||
public static void Vrintx(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintxSingleAndDouble, context.Arm64Assembler.FrintxHalf);
|
||||
}
|
||||
|
||||
public static void Vrintz(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintzSingleAndDouble, context.Arm64Assembler.FrintzHalf);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,205 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonSaturate
|
||||
{
|
||||
public static void Vqabs(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.SqabsV);
|
||||
}
|
||||
|
||||
public static void Vqadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(
|
||||
context,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size,
|
||||
q,
|
||||
u ? context.Arm64Assembler.UqaddV : context.Arm64Assembler.SqaddV,
|
||||
u ? context.Arm64Assembler.UqaddS : context.Arm64Assembler.SqaddS);
|
||||
}
|
||||
|
||||
public static void Vqdmlal(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlalVecV);
|
||||
}
|
||||
|
||||
public static void VqdmlalS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlalElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqdmlsl(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlslVecV);
|
||||
}
|
||||
|
||||
public static void VqdmlslS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlslElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqdmulh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SqdmulhVecV, context.Arm64Assembler.SqdmulhVecS);
|
||||
}
|
||||
|
||||
public static void VqdmulhS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqdmulhElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqdmull(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmullVecV);
|
||||
}
|
||||
|
||||
public static void VqdmullS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmullElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqmovn(CodeGenContext context, uint rd, uint rm, uint op, uint size)
|
||||
{
|
||||
if (op == 3)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, context.Arm64Assembler.UqxtnV);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, op == 1 ? context.Arm64Assembler.SqxtunV : context.Arm64Assembler.SqxtnV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vqneg(CodeGenContext context, uint rd, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.SqnegV);
|
||||
}
|
||||
|
||||
public static void Vqrdmlah(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlahVecV);
|
||||
}
|
||||
|
||||
public static void VqrdmlahS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlahElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqrdmlsh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlshVecV);
|
||||
}
|
||||
|
||||
public static void VqrdmlshS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlshElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqrdmulh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmulhVecV, context.Arm64Assembler.SqrdmulhVecS);
|
||||
}
|
||||
|
||||
public static void VqrdmulhS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmulhElt2regElement);
|
||||
}
|
||||
|
||||
public static void Vqrshl(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.SqrshlV, context.Arm64Assembler.SqrshlS);
|
||||
}
|
||||
|
||||
public static void Vqrshrn(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint imm6)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
|
||||
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
|
||||
|
||||
if (u && op == 0)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqrshrunV);
|
||||
}
|
||||
else if (!u && op == 1)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqrshrnV);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.UqrshrnV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VqshlI(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = InstEmitNeonShift.GetShiftLeft(imm6, size);
|
||||
|
||||
if (u && op == 0)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.SqshluV, context.Arm64Assembler.SqshluS);
|
||||
}
|
||||
else if (!u && op == 1)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.SqshlImmV, context.Arm64Assembler.SqshlImmS);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.UqshlImmV, context.Arm64Assembler.UqshlImmS);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VqshlR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
if (u)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.UqshlRegV, context.Arm64Assembler.UqshlRegS);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.SqshlRegV, context.Arm64Assembler.SqshlRegS);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vqshrn(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint imm6)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
|
||||
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
|
||||
|
||||
if (u && op == 0)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqshrunV);
|
||||
}
|
||||
else if (!u && op == 1)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqshrnV);
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.UqshrnV);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vqsub(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(
|
||||
context,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size,
|
||||
q,
|
||||
u ? context.Arm64Assembler.UqsubV : context.Arm64Assembler.SqsubV,
|
||||
u ? context.Arm64Assembler.UqsubS : context.Arm64Assembler.SqsubS);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,123 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonShift
|
||||
{
|
||||
public static void Vshll(CodeGenContext context, uint rd, uint rm, uint imm6, bool u)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6);
|
||||
uint shift = GetShiftLeft(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rm, shift, size, isShl: true, u ? context.Arm64Assembler.Ushll : context.Arm64Assembler.Sshll);
|
||||
}
|
||||
|
||||
public static void Vshll2(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
// Shift can't be encoded, so shift by value - 1 first, then first again by 1.
|
||||
// Doesn't matter if we do a signed or unsigned shift in this case since all sign bits will be shifted out.
|
||||
|
||||
uint shift = 8u << (int)size;
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rm, shift - 1, size, isShl: true, context.Arm64Assembler.Sshll);
|
||||
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rd, 1, size, isShl: true, context.Arm64Assembler.Sshll);
|
||||
}
|
||||
|
||||
public static void VshlI(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = GetShiftLeft(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.ShlV, context.Arm64Assembler.ShlS);
|
||||
}
|
||||
|
||||
public static void VshlR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
|
||||
{
|
||||
InstEmitNeonCommon.EmitVectorBinary(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
rn,
|
||||
size,
|
||||
q,
|
||||
u ? context.Arm64Assembler.UshlV : context.Arm64Assembler.SshlV,
|
||||
u ? context.Arm64Assembler.UshlS : context.Arm64Assembler.SshlS);
|
||||
}
|
||||
|
||||
public static void Vshr(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
shift,
|
||||
size,
|
||||
q,
|
||||
isShl: false,
|
||||
u ? context.Arm64Assembler.UshrV : context.Arm64Assembler.SshrV,
|
||||
u ? context.Arm64Assembler.UshrS : context.Arm64Assembler.SshrS);
|
||||
}
|
||||
|
||||
public static void Vshrn(CodeGenContext context, uint rd, uint rm, uint imm6)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
|
||||
uint shift = GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.Shrn);
|
||||
}
|
||||
|
||||
public static void Vsli(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = GetShiftLeft(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
shift,
|
||||
size,
|
||||
q,
|
||||
isShl: true,
|
||||
context.Arm64Assembler.SliV,
|
||||
context.Arm64Assembler.SliS);
|
||||
}
|
||||
|
||||
public static void Vsra(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorTernaryRdShift(
|
||||
context,
|
||||
rd,
|
||||
rm,
|
||||
shift,
|
||||
size,
|
||||
q,
|
||||
isShl: false,
|
||||
u ? context.Arm64Assembler.UsraV : context.Arm64Assembler.SsraV,
|
||||
u ? context.Arm64Assembler.UsraS : context.Arm64Assembler.SsraS);
|
||||
}
|
||||
|
||||
public static void Vsri(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
|
||||
{
|
||||
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
|
||||
uint shift = GetShiftRight(imm6, size);
|
||||
|
||||
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: false, context.Arm64Assembler.SriV, context.Arm64Assembler.SriS);
|
||||
}
|
||||
|
||||
public static uint GetShiftLeft(uint imm6, uint size)
|
||||
{
|
||||
return size < 3 ? imm6 - (8u << (int)size) : imm6;
|
||||
}
|
||||
|
||||
public static uint GetShiftRight(uint imm6, uint size)
|
||||
{
|
||||
return (size == 3 ? 64u : (16u << (int)size)) - imm6;
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitNeonSystem
|
||||
{
|
||||
public static void Vmrs(CodeGenContext context, uint rt, uint reg)
|
||||
{
|
||||
if (context.ConsumeSkipNextInstruction())
|
||||
{
|
||||
// This case means that we managed to combine a VCMP and VMRS instruction,
|
||||
// so we have nothing to do here as FCMP/FCMPE already set PSTATE.NZCV.
|
||||
context.SetNzcvModified();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (reg == 1)
|
||||
{
|
||||
// FPSCR
|
||||
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
if (rt == RegisterUtils.PcRegister)
|
||||
{
|
||||
using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
|
||||
context.Arm64Assembler.Lsr(fpsrRegister.Operand, fpsrRegister.Operand, InstEmitCommon.Const(28));
|
||||
|
||||
InstEmitCommon.RestoreNzcvFlags(context, fpsrRegister.Operand);
|
||||
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
else
|
||||
{
|
||||
// FPSCR is a combination of the FPCR and FPSR registers.
|
||||
// We also need to set the FPSR NZCV bits that no longer exist on AArch64.
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
|
||||
context.Arm64Assembler.MrsFpsr(rtOperand);
|
||||
context.Arm64Assembler.MrsFpcr(tempRegister.Operand);
|
||||
context.Arm64Assembler.Orr(rtOperand, rtOperand, tempRegister.Operand);
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
|
||||
context.Arm64Assembler.Bfc(tempRegister.Operand, 0, 28);
|
||||
context.Arm64Assembler.Orr(rtOperand, rtOperand, tempRegister.Operand);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
|
||||
|
||||
context.Arm64Assembler.Mov(rtOperand, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmsr(CodeGenContext context, uint rt, uint reg)
|
||||
{
|
||||
if (reg == 1)
|
||||
{
|
||||
// FPSCR
|
||||
|
||||
// TODO: Do not set bits related to features that are not supported (like FP16)?
|
||||
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
|
||||
context.Arm64Assembler.MsrFpcr(rtOperand);
|
||||
context.Arm64Assembler.MsrFpsr(rtOperand);
|
||||
context.Arm64Assembler.StrRiUn(rtOperand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,452 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitSaturate
|
||||
{
|
||||
public static void Qadd(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSubSaturate(context, rd, rn, rm, doubling: false, add: true);
|
||||
}
|
||||
|
||||
public static void Qadd16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Qadd8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
EmitSaturateRange(context, d, d, 8, unsigned: false, setQ: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Qasx(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
|
||||
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Qdadd(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSubSaturate(context, rd, rn, rm, doubling: true, add: true);
|
||||
}
|
||||
|
||||
public static void Qdsub(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSubSaturate(context, rd, rn, rm, doubling: true, add: false);
|
||||
}
|
||||
|
||||
public static void Qsax(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
|
||||
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Qsub(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
EmitAddSubSaturate(context, rd, rn, rm, doubling: false, add: false);
|
||||
}
|
||||
|
||||
public static void Qsub16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Qsub8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
EmitSaturateRange(context, d, d, 8, unsigned: false, setQ: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Ssat(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift)
|
||||
{
|
||||
EmitSaturate(context, rd, imm + 1, rn, sh, shift, unsigned: false);
|
||||
}
|
||||
|
||||
public static void Ssat16(CodeGenContext context, uint rd, uint imm, uint rn)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, (d, n) =>
|
||||
{
|
||||
EmitSaturateRange(context, d, n, imm + 1, unsigned: false);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uqadd16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
EmitSaturateUnsignedRange(context, d, 16);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uqadd8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
EmitSaturateUnsignedRange(context, d, 8);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uqasx(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
|
||||
EmitSaturateUnsignedRange(context, d, 16);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uqsax(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
|
||||
{
|
||||
if (e == 0)
|
||||
{
|
||||
context.Arm64Assembler.Add(d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
}
|
||||
|
||||
EmitSaturateUnsignedRange(context, d, 16);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uqsub16(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
EmitSaturateUnsignedRange(context, d, 16);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Uqsub8(CodeGenContext context, uint rd, uint rn, uint rm)
|
||||
{
|
||||
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
|
||||
{
|
||||
context.Arm64Assembler.Sub(d, n, m);
|
||||
EmitSaturateUnsignedRange(context, d, 8);
|
||||
});
|
||||
}
|
||||
|
||||
public static void Usat(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift)
|
||||
{
|
||||
EmitSaturate(context, rd, imm, rn, sh, shift, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Usat16(CodeGenContext context, uint rd, uint imm, uint rn)
|
||||
{
|
||||
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, (d, n) =>
|
||||
{
|
||||
EmitSaturateRange(context, d, n, imm, unsigned: true);
|
||||
});
|
||||
}
|
||||
|
||||
private static void EmitAddSubSaturate(CodeGenContext context, uint rd, uint rn, uint rm, bool doubling, bool add)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
|
||||
|
||||
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
|
||||
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Sxtw(tempN64, rnOperand);
|
||||
context.Arm64Assembler.Sxtw(tempM64, rmOperand);
|
||||
|
||||
if (doubling)
|
||||
{
|
||||
context.Arm64Assembler.Lsl(tempN64, tempN64, InstEmitCommon.Const(1));
|
||||
|
||||
EmitSaturateLongToInt(context, tempN64, tempN64);
|
||||
}
|
||||
|
||||
if (add)
|
||||
{
|
||||
context.Arm64Assembler.Add(tempN64, tempN64, tempM64);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sub(tempN64, tempN64, tempM64);
|
||||
}
|
||||
|
||||
EmitSaturateLongToInt(context, rdOperand, tempN64);
|
||||
}
|
||||
|
||||
private static void EmitSaturate(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift, bool unsigned)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
if (sh && shift == 0)
|
||||
{
|
||||
shift = 31;
|
||||
}
|
||||
|
||||
if (shift != 0)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
if (sh)
|
||||
{
|
||||
context.Arm64Assembler.Asr(tempRegister.Operand, rnOperand, InstEmitCommon.Const((int)shift));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Lsl(tempRegister.Operand, rnOperand, InstEmitCommon.Const((int)shift));
|
||||
}
|
||||
|
||||
EmitSaturateRange(context, rdOperand, tempRegister.Operand, imm, unsigned);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitSaturateRange(context, rdOperand, rnOperand, imm, unsigned);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitSaturateRange(CodeGenContext context, Operand result, Operand value, uint saturateTo, bool unsigned, bool setQ = true)
|
||||
{
|
||||
Debug.Assert(saturateTo <= 32);
|
||||
Debug.Assert(!unsigned || saturateTo < 32);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
ScopedRegister tempValue = default;
|
||||
|
||||
bool resultValueOverlap = result.Value == value.Value;
|
||||
|
||||
if (!unsigned && saturateTo == 32)
|
||||
{
|
||||
// No saturation possible for this case.
|
||||
|
||||
if (!resultValueOverlap)
|
||||
{
|
||||
context.Arm64Assembler.Mov(result, value);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
else if (saturateTo == 0)
|
||||
{
|
||||
// Result is always zero if we saturate 0 bits.
|
||||
|
||||
context.Arm64Assembler.Mov(result, 0u);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (resultValueOverlap)
|
||||
{
|
||||
tempValue = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.Mov(tempValue.Operand, value);
|
||||
value = tempValue.Operand;
|
||||
}
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
// Negative values always saturate (to zero).
|
||||
// So we must always ignore the sign bit when masking, so that the truncated value will differ from the original one.
|
||||
|
||||
context.Arm64Assembler.And(result, value, InstEmitCommon.Const((int)(uint.MaxValue >> (32 - (int)saturateTo))));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Sbfx(result, value, 0, (int)saturateTo);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Sub(tempRegister.Operand, value, result);
|
||||
|
||||
int branchIndex = context.CodeWriter.InstructionPointer;
|
||||
|
||||
// If the result is 0, the values are equal and we don't need saturation.
|
||||
context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
|
||||
|
||||
// Saturate and set Q flag.
|
||||
if (unsigned)
|
||||
{
|
||||
if (saturateTo == 31)
|
||||
{
|
||||
// Only saturation case possible when going from 32 bits signed to 32 or 31 bits unsigned
|
||||
// is when the signed input is negative, as all positive values are representable on a 31 bits range.
|
||||
|
||||
context.Arm64Assembler.Mov(result, 0u);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Asr(result, value, InstEmitCommon.Const(31));
|
||||
context.Arm64Assembler.Mvn(result, result);
|
||||
context.Arm64Assembler.Lsr(result, result, InstEmitCommon.Const(32 - (int)saturateTo));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (saturateTo == 1)
|
||||
{
|
||||
context.Arm64Assembler.Asr(result, value, InstEmitCommon.Const(31));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Mov(result, uint.MaxValue >> (33 - (int)saturateTo));
|
||||
context.Arm64Assembler.Eor(result, result, value, ArmShiftType.Asr, 31);
|
||||
}
|
||||
}
|
||||
|
||||
if (setQ)
|
||||
{
|
||||
SetQFlag(context);
|
||||
}
|
||||
|
||||
int delta = context.CodeWriter.InstructionPointer - branchIndex;
|
||||
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
|
||||
|
||||
if (resultValueOverlap)
|
||||
{
|
||||
tempValue.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitSaturateUnsignedRange(CodeGenContext context, Operand value, uint saturateTo)
|
||||
{
|
||||
Debug.Assert(saturateTo <= 32);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
if (saturateTo == 32)
|
||||
{
|
||||
// No saturation possible for this case.
|
||||
|
||||
return;
|
||||
}
|
||||
else if (saturateTo == 0)
|
||||
{
|
||||
// Result is always zero if we saturate 0 bits.
|
||||
|
||||
context.Arm64Assembler.Mov(value, 0u);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const(32 - (int)saturateTo));
|
||||
|
||||
int branchIndex = context.CodeWriter.InstructionPointer;
|
||||
|
||||
// If the result is 0, the values are equal and we don't need saturation.
|
||||
context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
|
||||
|
||||
// Saturate.
|
||||
context.Arm64Assembler.Mov(value, uint.MaxValue >> (32 - (int)saturateTo));
|
||||
|
||||
int delta = context.CodeWriter.InstructionPointer - branchIndex;
|
||||
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
|
||||
}
|
||||
|
||||
private static void EmitSaturateLongToInt(CodeGenContext context, Operand result, Operand value)
|
||||
{
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
ScopedRegister tempValue = default;
|
||||
|
||||
bool resultValueOverlap = result.Value == value.Value;
|
||||
|
||||
if (resultValueOverlap)
|
||||
{
|
||||
tempValue = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand tempValue64 = new(OperandKind.Register, OperandType.I64, tempValue.Operand.Value);
|
||||
|
||||
context.Arm64Assembler.Mov(tempValue64, value);
|
||||
value = tempValue64;
|
||||
}
|
||||
|
||||
Operand temp64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
|
||||
Operand result64 = new(OperandKind.Register, OperandType.I64, result.Value);
|
||||
|
||||
context.Arm64Assembler.Sxtw(result64, value);
|
||||
context.Arm64Assembler.Sub(temp64, value, result64);
|
||||
|
||||
int branchIndex = context.CodeWriter.InstructionPointer;
|
||||
|
||||
// If the result is 0, the values are equal and we don't need saturation.
|
||||
context.Arm64Assembler.Cbz(temp64, 0);
|
||||
|
||||
// Saturate and set Q flag.
|
||||
context.Arm64Assembler.Mov(result, uint.MaxValue >> 1);
|
||||
context.Arm64Assembler.Eor(result64, result64, value, ArmShiftType.Asr, 63);
|
||||
|
||||
SetQFlag(context);
|
||||
|
||||
int delta = context.CodeWriter.InstructionPointer - branchIndex;
|
||||
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
|
||||
|
||||
context.Arm64Assembler.Mov(result, result); // Zero-extend.
|
||||
|
||||
if (resultValueOverlap)
|
||||
{
|
||||
tempValue.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public static void SetQFlag(CodeGenContext context)
|
||||
{
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 27));
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,648 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitSystem
|
||||
{
|
||||
private delegate void SoftwareInterruptHandler(ulong address, int imm);
|
||||
private delegate ulong Get64();
|
||||
private delegate bool GetBool();
|
||||
|
||||
private const int SpIndex = 31;
|
||||
|
||||
public static void Bkpt(CodeGenContext context, uint imm)
|
||||
{
|
||||
context.AddPendingBkpt(imm);
|
||||
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public static void Cps(CodeGenContext context, uint imod, uint m, uint a, uint i, uint f, uint mode)
|
||||
{
|
||||
// NOP in user mode.
|
||||
}
|
||||
|
||||
public static void Dbg(CodeGenContext context, uint option)
|
||||
{
|
||||
// NOP in ARMv8.
|
||||
}
|
||||
|
||||
public static void Hlt(CodeGenContext context, uint imm)
|
||||
{
|
||||
}
|
||||
|
||||
public static void Mcr(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crn, uint crm, uint opc2)
|
||||
{
|
||||
if (coproc != 15 || opc1 != 0)
|
||||
{
|
||||
Udf(context, encoding, 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
|
||||
switch (crn)
|
||||
{
|
||||
case 13: // Process and Thread Info.
|
||||
if (crm == 0)
|
||||
{
|
||||
switch (opc2)
|
||||
{
|
||||
case 2:
|
||||
context.Arm64Assembler.StrRiUn(rtOperand, ctx, NativeContextOffsets.TpidrEl0Offset);
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mcrr(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crm)
|
||||
{
|
||||
if (coproc != 15 || opc1 != 0)
|
||||
{
|
||||
Udf(context, encoding, 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// We don't have any system register that needs to be modified using a 64-bit value.
|
||||
}
|
||||
|
||||
public static void Mrc(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crn, uint crm, uint opc2)
|
||||
{
|
||||
if (coproc != 15 || opc1 != 0)
|
||||
{
|
||||
Udf(context, encoding, 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
|
||||
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
|
||||
bool hasValue = false;
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
Operand dest = rt == RegisterUtils.PcRegister ? tempRegister.Operand : rtOperand;
|
||||
|
||||
switch (crn)
|
||||
{
|
||||
case 13: // Process and Thread Info.
|
||||
if (crm == 0)
|
||||
{
|
||||
switch (opc2)
|
||||
{
|
||||
case 2:
|
||||
context.Arm64Assembler.LdrRiUn(dest, ctx, NativeContextOffsets.TpidrEl0Offset);
|
||||
hasValue = true;
|
||||
break;
|
||||
case 3:
|
||||
context.Arm64Assembler.LdrRiUn(dest, ctx, NativeContextOffsets.TpidrroEl0Offset);
|
||||
hasValue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (rt == RegisterUtils.PcRegister)
|
||||
{
|
||||
context.Arm64Assembler.MsrNzcv(dest);
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
else if (!hasValue)
|
||||
{
|
||||
context.Arm64Assembler.Mov(dest, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mrrc(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint rt2, uint crm)
|
||||
{
|
||||
if (coproc != 15)
|
||||
{
|
||||
Udf(context, encoding, 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
switch (crm)
|
||||
{
|
||||
case 14:
|
||||
switch (opc1)
|
||||
{
|
||||
case 0:
|
||||
context.AddPendingReadCntpct(rt, rt2);
|
||||
context.Arm64Assembler.B(0);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Unsupported system register.
|
||||
context.Arm64Assembler.Mov(InstEmitCommon.GetOutputGpr(context, rt), 0u);
|
||||
context.Arm64Assembler.Mov(InstEmitCommon.GetOutputGpr(context, rt2), 0u);
|
||||
}
|
||||
|
||||
public static void Mrs(CodeGenContext context, uint rd, bool r)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
|
||||
if (r)
|
||||
{
|
||||
// Reads SPSR, unpredictable in user mode.
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, 0u);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
|
||||
// Copy GE flags to destination register.
|
||||
context.Arm64Assembler.Ubfx(rdOperand, tempRegister.Operand, 16, 4);
|
||||
|
||||
// Insert Q flag.
|
||||
context.Arm64Assembler.And(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 27));
|
||||
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
|
||||
|
||||
// Insert NZCV flags.
|
||||
context.Arm64Assembler.MrsNzcv(tempRegister.Operand);
|
||||
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
|
||||
|
||||
// All other flags can't be accessed in user mode or have "unknown" values.
|
||||
}
|
||||
}
|
||||
|
||||
public static void MrsBr(CodeGenContext context, uint rd, uint m1, bool r)
|
||||
{
|
||||
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
|
||||
|
||||
// Reads banked register, unpredictable in user mode.
|
||||
|
||||
context.Arm64Assembler.Mov(rdOperand, 0u);
|
||||
}
|
||||
|
||||
public static void MsrBr(CodeGenContext context, uint rn, uint m1, bool r)
|
||||
{
|
||||
// Writes banked register, unpredictable in user mode.
|
||||
}
|
||||
|
||||
public static void MsrI(CodeGenContext context, uint imm, uint mask, bool r)
|
||||
{
|
||||
if (r)
|
||||
{
|
||||
// Writes SPSR, unpredictable in user mode.
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
|
||||
if ((mask & 2) != 0)
|
||||
{
|
||||
// Endian flag.
|
||||
|
||||
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 9) & 1);
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 9, 1);
|
||||
}
|
||||
|
||||
if ((mask & 4) != 0)
|
||||
{
|
||||
// GE flags.
|
||||
|
||||
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 16) & 0xf);
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 16, 4);
|
||||
}
|
||||
|
||||
if ((mask & 8) != 0)
|
||||
{
|
||||
// NZCVQ flags.
|
||||
|
||||
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 27) & 0x1f);
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 27, 5);
|
||||
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 28) & 0xf);
|
||||
InstEmitCommon.RestoreNzcvFlags(context, tempRegister2.Operand);
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void MsrR(CodeGenContext context, uint rn, uint mask, bool r)
|
||||
{
|
||||
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
|
||||
|
||||
if (r)
|
||||
{
|
||||
// Writes SPSR, unpredictable in user mode.
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
|
||||
if ((mask & 2) != 0)
|
||||
{
|
||||
// Endian flag.
|
||||
|
||||
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(9));
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 9, 1);
|
||||
}
|
||||
|
||||
if ((mask & 4) != 0)
|
||||
{
|
||||
// GE flags.
|
||||
|
||||
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(16));
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 16, 4);
|
||||
}
|
||||
|
||||
if ((mask & 8) != 0)
|
||||
{
|
||||
// NZCVQ flags.
|
||||
|
||||
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(27));
|
||||
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 27, 5);
|
||||
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(28));
|
||||
InstEmitCommon.RestoreNzcvFlags(context, tempRegister2.Operand);
|
||||
context.SetNzcvModified();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void Setend(CodeGenContext context, bool e)
|
||||
{
|
||||
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
|
||||
if (e)
|
||||
{
|
||||
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 9));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.Bfc(tempRegister.Operand, 9, 1);
|
||||
}
|
||||
|
||||
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public static void Svc(CodeGenContext context, uint imm)
|
||||
{
|
||||
context.AddPendingSvc(imm);
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public static void Udf(CodeGenContext context, uint encoding, uint imm)
|
||||
{
|
||||
context.AddPendingUdf(encoding);
|
||||
context.Arm64Assembler.B(0);
|
||||
}
|
||||
|
||||
public static void PrivilegedInstruction(CodeGenContext context, uint encoding)
|
||||
{
|
||||
Udf(context, encoding, 0);
|
||||
}
|
||||
|
||||
private static IntPtr GetBkptHandlerPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Break);
|
||||
}
|
||||
|
||||
private static IntPtr GetSvcHandlerPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.SupervisorCall);
|
||||
}
|
||||
|
||||
private static IntPtr GetUdfHandlerPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Undefined);
|
||||
}
|
||||
|
||||
private static IntPtr GetCntpctEl0Ptr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<Get64>(NativeInterface.GetCntpctEl0);
|
||||
}
|
||||
|
||||
private static IntPtr CheckSynchronizationPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<GetBool>(NativeInterface.CheckSynchronization);
|
||||
}
|
||||
|
||||
public static bool NeedsCall(InstName name)
|
||||
{
|
||||
// All instructions that might do a host call should be included here.
|
||||
// That is required to reserve space on the stack for caller saved registers.
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Mcr:
|
||||
case InstName.Mrc:
|
||||
case InstName.Mrrc:
|
||||
case InstName.Svc:
|
||||
case InstName.Udf:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static void WriteBkpt(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint imm)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetBkptHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, imm);
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
|
||||
}
|
||||
|
||||
public static void WriteSvc(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint svcId)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetSvcHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, svcId);
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
|
||||
}
|
||||
|
||||
public static void WriteUdf(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint imm)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetUdfHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, imm);
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
|
||||
}
|
||||
|
||||
public static void WriteReadCntpct(CodeWriter writer, RegisterAllocator regAlloc, int spillBaseOffset, int rt, int rt2)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
uint resultMask = (1u << rt) | (1u << rt2);
|
||||
int tempRegister = 0;
|
||||
|
||||
while ((resultMask & (1u << tempRegister)) != 0 && tempRegister < 32)
|
||||
{
|
||||
tempRegister++;
|
||||
}
|
||||
|
||||
Debug.Assert(tempRegister < 32);
|
||||
|
||||
WriteSpill(ref asm, regAlloc, resultMask, skipContext: false, spillBaseOffset, tempRegister);
|
||||
|
||||
Operand rn = Register(tempRegister);
|
||||
|
||||
asm.Mov(rn, (ulong)GetCntpctEl0Ptr());
|
||||
asm.Blr(rn);
|
||||
|
||||
if (rt != rt2)
|
||||
{
|
||||
asm.Lsr(Register(rt2), Register(0), InstEmitCommon.Const(32));
|
||||
}
|
||||
|
||||
asm.Mov(Register(rt, OperandType.I32), Register(0, OperandType.I32)); // Zero-extend.
|
||||
|
||||
WriteFill(ref asm, regAlloc, resultMask, skipContext: false, spillBaseOffset, tempRegister);
|
||||
}
|
||||
|
||||
public static void WriteSyncPoint(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: false, spillBaseOffset);
|
||||
}
|
||||
|
||||
private static void WriteSyncPoint(CodeWriter writer, ref Assembler asm, RegisterAllocator regAlloc, TailMerger tailMerger, bool skipContext, int spillBaseOffset)
|
||||
{
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
|
||||
|
||||
int branchIndex = writer.InstructionPointer;
|
||||
asm.Cbnz(rt, 0);
|
||||
|
||||
WriteSpill(ref asm, regAlloc, 1u << tempRegister, skipContext, spillBaseOffset, tempRegister);
|
||||
|
||||
Operand rn = Register(tempRegister == 0 ? 1 : 0);
|
||||
|
||||
asm.Mov(rn, (ulong)CheckSynchronizationPtr());
|
||||
asm.Blr(rn);
|
||||
|
||||
tailMerger.AddConditionalZeroReturn(writer, asm, Register(0, OperandType.I32));
|
||||
|
||||
WriteFill(ref asm, regAlloc, 1u << tempRegister, skipContext, spillBaseOffset, tempRegister);
|
||||
|
||||
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
|
||||
|
||||
uint branchInst = writer.ReadInstructionAt(branchIndex);
|
||||
writer.WriteInstructionAt(branchIndex, branchInst | (((uint)(writer.InstructionPointer - branchIndex) & 0x7ffff) << 5));
|
||||
|
||||
asm.Sub(rt, rt, new Operand(OperandKind.Constant, OperandType.I32, 1));
|
||||
asm.StrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private static void WriteCall(
|
||||
ref Assembler asm,
|
||||
RegisterAllocator regAlloc,
|
||||
IntPtr funcPtr,
|
||||
bool skipContext,
|
||||
int spillBaseOffset,
|
||||
int? resultRegister,
|
||||
params ulong[] callArgs)
|
||||
{
|
||||
uint resultMask = 0u;
|
||||
|
||||
if (resultRegister.HasValue)
|
||||
{
|
||||
resultMask = 1u << resultRegister.Value;
|
||||
}
|
||||
|
||||
int tempRegister = callArgs.Length;
|
||||
|
||||
if (resultRegister.HasValue && tempRegister == resultRegister.Value)
|
||||
{
|
||||
tempRegister++;
|
||||
}
|
||||
|
||||
WriteSpill(ref asm, regAlloc, resultMask, skipContext, spillBaseOffset, tempRegister);
|
||||
|
||||
// We only support up to 7 arguments right now.
|
||||
// ABI defines the first 8 integer arguments to be passed on registers X0-X7.
|
||||
// We need at least one register to put the function address on, so that reduces the number of
|
||||
// registers we can use for that by one.
|
||||
|
||||
Debug.Assert(callArgs.Length < 8);
|
||||
|
||||
for (int index = 0; index < callArgs.Length; index++)
|
||||
{
|
||||
asm.Mov(Register(index), callArgs[index]);
|
||||
}
|
||||
|
||||
Operand rn = Register(tempRegister);
|
||||
|
||||
asm.Mov(rn, (ulong)funcPtr);
|
||||
asm.Blr(rn);
|
||||
|
||||
if (resultRegister.HasValue && resultRegister.Value != 0)
|
||||
{
|
||||
asm.Mov(Register(resultRegister.Value), Register(0));
|
||||
}
|
||||
|
||||
WriteFill(ref asm, regAlloc, resultMask, skipContext, spillBaseOffset, tempRegister);
|
||||
}
|
||||
|
||||
private static void WriteSpill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, bool skipContext, int spillOffset, int tempRegister)
|
||||
{
|
||||
WriteSpillOrFill(ref asm, regAlloc, skipContext, exceptMask, spillOffset, tempRegister, spill: true);
|
||||
}
|
||||
|
||||
private static void WriteFill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, bool skipContext, int spillOffset, int tempRegister)
|
||||
{
|
||||
WriteSpillOrFill(ref asm, regAlloc, skipContext, exceptMask, spillOffset, tempRegister, spill: false);
|
||||
}
|
||||
|
||||
private static void WriteSpillOrFill(
|
||||
ref Assembler asm,
|
||||
RegisterAllocator regAlloc,
|
||||
bool skipContext,
|
||||
uint exceptMask,
|
||||
int spillOffset,
|
||||
int tempRegister,
|
||||
bool spill)
|
||||
{
|
||||
uint gprMask = regAlloc.UsedGprsMask & ~(AbiConstants.GprCalleeSavedRegsMask | exceptMask);
|
||||
|
||||
if (skipContext)
|
||||
{
|
||||
gprMask &= ~Compiler.UsableGprsMask;
|
||||
}
|
||||
|
||||
if (!spill)
|
||||
{
|
||||
// We must reload the status register before reloading the GPRs,
|
||||
// since we might otherwise trash one of them by using it as temp register.
|
||||
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.LdrRiUn(rt, Register(SpIndex), spillOffset + BitOperations.PopCount(gprMask) * 8);
|
||||
asm.MsrNzcv(rt);
|
||||
}
|
||||
|
||||
while (gprMask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(gprMask);
|
||||
|
||||
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(3u << reg);
|
||||
spillOffset += 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StrRiUn(Register(reg), Register(SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(reg), Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(1u << reg);
|
||||
spillOffset += 8;
|
||||
}
|
||||
}
|
||||
|
||||
if (spill)
|
||||
{
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.MrsNzcv(rt);
|
||||
asm.StrRiUn(rt, Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
spillOffset += 8;
|
||||
|
||||
if ((spillOffset & 8) != 0)
|
||||
{
|
||||
spillOffset += 8;
|
||||
}
|
||||
|
||||
uint fpSimdMask = regAlloc.UsedFpSimdMask;
|
||||
|
||||
if (skipContext)
|
||||
{
|
||||
fpSimdMask &= ~Compiler.UsableFpSimdMask;
|
||||
}
|
||||
|
||||
while (fpSimdMask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(fpSimdMask);
|
||||
|
||||
if (reg < 31 && (fpSimdMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StpRiUn(Register(reg, OperandType.V128), Register(reg + 1, OperandType.V128), Register(SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(Register(reg, OperandType.V128), Register(reg + 1, OperandType.V128), Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
fpSimdMask &= ~(3u << reg);
|
||||
spillOffset += 32;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StrRiUn(Register(reg, OperandType.V128), Register(SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(reg, OperandType.V128), Register(SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
fpSimdMask &= ~(1u << reg);
|
||||
spillOffset += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static Operand Register(int register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand(register, RegisterType.Integer, type);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitVfpArithmetic
|
||||
{
|
||||
public static void VabsF(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FabsFloat);
|
||||
}
|
||||
|
||||
public static void VaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FaddFloat);
|
||||
}
|
||||
|
||||
public static void VdivF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FdivFloat);
|
||||
}
|
||||
|
||||
public static void VfmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FmaddFloat);
|
||||
}
|
||||
|
||||
public static void VfmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FmsubFloat);
|
||||
}
|
||||
|
||||
public static void VfnmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FnmaddFloat);
|
||||
}
|
||||
|
||||
public static void VfnmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FnmsubFloat);
|
||||
}
|
||||
|
||||
public static void Vmaxnm(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FmaxnmFloat);
|
||||
}
|
||||
|
||||
public static void Vminnm(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FminnmFloat);
|
||||
}
|
||||
|
||||
public static void VmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: false, negProduct: false);
|
||||
}
|
||||
|
||||
public static void VmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: false, negProduct: true);
|
||||
}
|
||||
|
||||
public static void VmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FmulFloat);
|
||||
}
|
||||
|
||||
public static void VnegF(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FnegFloat);
|
||||
}
|
||||
|
||||
public static void VnmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: true, negProduct: true);
|
||||
}
|
||||
|
||||
public static void VnmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: true, negProduct: false);
|
||||
}
|
||||
|
||||
public static void VnmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FnmulFloat);
|
||||
}
|
||||
|
||||
public static void VsqrtF(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FsqrtFloat);
|
||||
}
|
||||
|
||||
public static void VsubF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FsubFloat);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,133 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitVfpCompare
|
||||
{
|
||||
public static void VcmpI(CodeGenContext context, uint cond, uint rd, uint size)
|
||||
{
|
||||
EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: false);
|
||||
}
|
||||
|
||||
public static void VcmpR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
|
||||
{
|
||||
EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: false);
|
||||
}
|
||||
|
||||
public static void VcmpeI(CodeGenContext context, uint cond, uint rd, uint size)
|
||||
{
|
||||
EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: true);
|
||||
}
|
||||
|
||||
public static void VcmpeR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
|
||||
{
|
||||
EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: true);
|
||||
}
|
||||
|
||||
private static void EmitVcmpVcmpe(CodeGenContext context, uint cond, uint rd, uint rm, uint size, bool zero, bool e)
|
||||
{
|
||||
Debug.Assert(size == 1 || size == 2 || size == 3);
|
||||
|
||||
bool singleRegs = size != 3;
|
||||
uint ftype = size ^ 2u;
|
||||
uint opc = zero ? 1u : 0u;
|
||||
|
||||
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
|
||||
ScopedRegister rmReg;
|
||||
Operand rmOrZero;
|
||||
|
||||
if (zero)
|
||||
{
|
||||
rmReg = default;
|
||||
rmOrZero = new Operand(0, RegisterType.Vector, OperandType.V128);
|
||||
}
|
||||
else
|
||||
{
|
||||
rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
|
||||
rmOrZero = rmReg.Operand;
|
||||
}
|
||||
|
||||
using ScopedRegister oldFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
bool canPeepholeOptimize = CanFuseVcmpVmrs(context, cond);
|
||||
if (!canPeepholeOptimize)
|
||||
{
|
||||
InstEmitCommon.GetCurrentFlags(context, oldFlags.Operand);
|
||||
}
|
||||
|
||||
if (e)
|
||||
{
|
||||
context.Arm64Assembler.FcmpeFloat(rdReg.Operand, rmOrZero, opc, ftype);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.FcmpFloat(rdReg.Operand, rmOrZero, opc, ftype);
|
||||
}
|
||||
|
||||
// Save result flags from the FCMP operation on FPSCR register, then restore the old flags if needed.
|
||||
|
||||
WriteUpdateFpsrNzcv(context);
|
||||
|
||||
if (!canPeepholeOptimize)
|
||||
{
|
||||
InstEmitCommon.RestoreNzcvFlags(context, oldFlags.Operand);
|
||||
}
|
||||
|
||||
if (!zero)
|
||||
{
|
||||
rmReg.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteUpdateFpsrNzcv(CodeGenContext context)
|
||||
{
|
||||
using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
|
||||
|
||||
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
|
||||
|
||||
context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
|
||||
|
||||
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
|
||||
|
||||
context.Arm64Assembler.Bfi(fpsrRegister.Operand, flagsRegister.Operand, 28, 4);
|
||||
context.Arm64Assembler.StrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
|
||||
}
|
||||
|
||||
private static bool CanFuseVcmpVmrs(CodeGenContext context, uint vcmpCond)
|
||||
{
|
||||
// Conditions might be different for the VCMP and VMRS instructions if they are inside a IT block,
|
||||
// we don't bother to check right now, so just always skip if inside an IT block.
|
||||
if (context.InITBlock)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
InstInfo nextInfo = context.PeekNextInstruction();
|
||||
|
||||
// We're looking for a VMRS instructions.
|
||||
if (nextInfo.Name != InstName.Vmrs)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Conditions must match.
|
||||
if (vcmpCond != (nextInfo.Encoding >> 28))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reg must be 1, Rt must be PC indicating VMRS to PSTATE.NZCV.
|
||||
if (((nextInfo.Encoding >> 16) & 0xf) != 1 || ((nextInfo.Encoding >> 12) & 0xf) != RegisterUtils.PcRegister)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
context.SetSkipNextInstruction();
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,305 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitVfpConvert
|
||||
{
|
||||
public static void Vcvta(CodeGenContext context, uint rd, uint rm, bool op, uint size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
|
||||
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtasFloat);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtauFloat);
|
||||
}
|
||||
}
|
||||
else if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtasS, context.Arm64Assembler.FcvtasSH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtauS, context.Arm64Assembler.FcvtauSH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcvtb(CodeGenContext context, uint rd, uint rm, uint sz, uint op)
|
||||
{
|
||||
EmitVcvtbVcvtt(context, rd, rm, sz, op, top: false);
|
||||
}
|
||||
|
||||
public static void Vcvtm(CodeGenContext context, uint rd, uint rm, bool op, uint size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
|
||||
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtmsFloat);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtmuFloat);
|
||||
}
|
||||
}
|
||||
else if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtmsS, context.Arm64Assembler.FcvtmsSH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtmuS, context.Arm64Assembler.FcvtmuSH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcvtn(CodeGenContext context, uint rd, uint rm, bool op, uint size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
|
||||
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtnsFloat);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtnuFloat);
|
||||
}
|
||||
}
|
||||
else if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtnsS, context.Arm64Assembler.FcvtnsSH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtnuS, context.Arm64Assembler.FcvtnuSH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vcvtp(CodeGenContext context, uint rd, uint rm, bool op, uint size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
|
||||
|
||||
if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtpsFloat);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtpuFloat);
|
||||
}
|
||||
}
|
||||
else if (op)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtpsS, context.Arm64Assembler.FcvtpsSH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtpuS, context.Arm64Assembler.FcvtpuSH);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtDs(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
bool doubleToSingle = size == 3;
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
if (doubleToSingle)
|
||||
{
|
||||
// Double to single.
|
||||
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
|
||||
|
||||
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 0, 1);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Single to double.
|
||||
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, true);
|
||||
|
||||
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 1, 0);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtIv(CodeGenContext context, uint rd, uint rm, bool unsigned, uint size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtzuFloatInt);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtzsFloatInt);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtzuIntS, context.Arm64Assembler.FcvtzuIntSH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtzsIntS, context.Arm64Assembler.FcvtzsIntSH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtVi(CodeGenContext context, uint rd, uint rm, bool unsigned, uint size)
|
||||
{
|
||||
if (size == 3)
|
||||
{
|
||||
// S32/U32 -> F64 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryFromGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.UcvtfFloatInt);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryFromGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.ScvtfFloatInt);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.UcvtfIntS, context.Arm64Assembler.UcvtfIntSH);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.ScvtfIntS, context.Arm64Assembler.ScvtfIntSH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtXv(CodeGenContext context, uint rd, uint imm5, bool sx, uint sf, uint op, bool u)
|
||||
{
|
||||
Debug.Assert(op >> 1 == 0);
|
||||
|
||||
bool unsigned = u;
|
||||
bool toFixed = op == 1;
|
||||
uint size = sf;
|
||||
uint fbits = Math.Clamp((sx ? 32u : 16u) - imm5, 1, 8u << (int)size);
|
||||
|
||||
if (toFixed)
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: false, context.Arm64Assembler.FcvtzuFixS);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: false, context.Arm64Assembler.FcvtzsFixS);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unsigned)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: !sx, context.Arm64Assembler.UcvtfFixS);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: !sx, context.Arm64Assembler.ScvtfFixS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void VcvtrIv(CodeGenContext context, uint rd, uint rm, uint op, uint size)
|
||||
{
|
||||
bool unsigned = (op & 1) == 0;
|
||||
|
||||
Debug.Assert(size == 1 || size == 2 || size == 3);
|
||||
|
||||
bool singleRegs = size != 3;
|
||||
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
|
||||
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rmReg);
|
||||
|
||||
// Round using the FPCR rounding mode first, since the FCVTZ instructions will use the round to zero mode.
|
||||
context.Arm64Assembler.FrintiFloat(tempRegister.Operand, rmReg.Operand, size ^ 2u);
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
if (size == 1)
|
||||
{
|
||||
context.Arm64Assembler.FcvtzuIntSH(tempRegister.Operand, tempRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.FcvtzuIntS(tempRegister.Operand, tempRegister.Operand, size & 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (size == 1)
|
||||
{
|
||||
context.Arm64Assembler.FcvtzsIntSH(tempRegister.Operand, tempRegister.Operand);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Arm64Assembler.FcvtzsIntS(tempRegister.Operand, tempRegister.Operand, size & 1);
|
||||
}
|
||||
}
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
|
||||
}
|
||||
|
||||
public static void Vcvtt(CodeGenContext context, uint rd, uint rm, uint sz, uint op)
|
||||
{
|
||||
EmitVcvtbVcvtt(context, rd, rm, sz, op, top: true);
|
||||
}
|
||||
|
||||
public static void EmitVcvtbVcvtt(CodeGenContext context, uint rd, uint rm, uint sz, uint op, bool top)
|
||||
{
|
||||
bool usesDouble = sz == 1;
|
||||
bool convertFromHalf = op == 0;
|
||||
|
||||
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
|
||||
|
||||
if (convertFromHalf)
|
||||
{
|
||||
// Half to single/double.
|
||||
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.Move16BitScalarToSide(context, rm, top);
|
||||
|
||||
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, usesDouble ? 1u : 0u, 3u);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, !usesDouble);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Single/double to half.
|
||||
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, !usesDouble);
|
||||
|
||||
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 3u, usesDouble ? 1u : 0u);
|
||||
|
||||
InstEmitNeonCommon.Insert16BitResult(context, tempRegister.Operand, rd, top);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitVfpMove
|
||||
{
|
||||
public static void Vsel(CodeGenContext context, uint rd, uint rn, uint rm, uint cc, uint size)
|
||||
{
|
||||
bool singleRegs = size != 3;
|
||||
uint cond = (cc << 2) | ((cc & 2) ^ ((cc << 1) & 2));
|
||||
|
||||
using ScopedRegister rnReg = InstEmitNeonCommon.MoveScalarToSide(context, rn, singleRegs);
|
||||
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
|
||||
|
||||
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rnReg, rmReg);
|
||||
|
||||
context.Arm64Assembler.FcselFloat(tempRegister.Operand, rnReg.Operand, cond, rmReg.Operand, size ^ 2u);
|
||||
|
||||
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
|
||||
{
|
||||
static class InstEmitVfpRound
|
||||
{
|
||||
public static void Vrinta(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintaFloat);
|
||||
}
|
||||
|
||||
public static void Vrintm(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintmFloat);
|
||||
}
|
||||
|
||||
public static void Vrintn(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintnFloat);
|
||||
}
|
||||
|
||||
public static void Vrintp(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintpFloat);
|
||||
}
|
||||
|
||||
public static void Vrintr(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintiFloat);
|
||||
}
|
||||
|
||||
public static void Vrintx(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintxFloat);
|
||||
}
|
||||
|
||||
public static void Vrintz(CodeGenContext context, uint rd, uint rm, uint size)
|
||||
{
|
||||
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintzFloat);
|
||||
}
|
||||
}
|
||||
}
|
29
src/Ryujinx.Cpu/LightningJit/Arm64/A64Compiler.cs
Normal file
29
src/Ryujinx.Cpu/LightningJit/Arm64/A64Compiler.cs
Normal file
@ -0,0 +1,29 @@
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
static class A64Compiler
|
||||
{
|
||||
public static CompiledFunction Compile(
|
||||
CpuPreset cpuPreset,
|
||||
IMemoryManager memoryManager,
|
||||
ulong address,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr dispatchStubPtr,
|
||||
Architecture targetArch)
|
||||
{
|
||||
if (targetArch == Architecture.Arm64)
|
||||
{
|
||||
return Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new PlatformNotSupportedException();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
138
src/Ryujinx.Cpu/LightningJit/Arm64/Block.cs
Normal file
138
src/Ryujinx.Cpu/LightningJit/Arm64/Block.cs
Normal file
@ -0,0 +1,138 @@
|
||||
using Ryujinx.Cpu.LightningJit.Graph;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
class Block : IBlock
|
||||
{
|
||||
public int Index { get; private set; }
|
||||
|
||||
private readonly List<Block> _predecessors;
|
||||
private readonly List<Block> _successors;
|
||||
|
||||
public int PredecessorsCount => _predecessors.Count;
|
||||
public int SuccessorsCount => _successors.Count;
|
||||
|
||||
public readonly ulong Address;
|
||||
public readonly ulong EndAddress;
|
||||
public readonly List<InstInfo> Instructions;
|
||||
public readonly bool EndsWithBranch;
|
||||
public readonly bool IsTruncated;
|
||||
public readonly bool IsLoopEnd;
|
||||
|
||||
public Block(ulong address, ulong endAddress, List<InstInfo> instructions, bool endsWithBranch, bool isTruncated, bool isLoopEnd)
|
||||
{
|
||||
Debug.Assert((int)((endAddress - address) / 4) == instructions.Count);
|
||||
|
||||
_predecessors = new();
|
||||
_successors = new();
|
||||
Address = address;
|
||||
EndAddress = endAddress;
|
||||
Instructions = instructions;
|
||||
EndsWithBranch = endsWithBranch;
|
||||
IsTruncated = isTruncated;
|
||||
IsLoopEnd = isLoopEnd;
|
||||
}
|
||||
|
||||
public (Block, Block) SplitAtAddress(ulong address)
|
||||
{
|
||||
int splitIndex = (int)((address - Address) / 4);
|
||||
int splitCount = Instructions.Count - splitIndex;
|
||||
|
||||
// Technically those are valid, but we don't want to create empty blocks.
|
||||
Debug.Assert(splitIndex != 0);
|
||||
Debug.Assert(splitCount != 0);
|
||||
|
||||
Block leftBlock = new(
|
||||
Address,
|
||||
address,
|
||||
Instructions.GetRange(0, splitIndex),
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
|
||||
Block rightBlock = new(
|
||||
address,
|
||||
EndAddress,
|
||||
Instructions.GetRange(splitIndex, splitCount),
|
||||
EndsWithBranch,
|
||||
IsTruncated,
|
||||
IsLoopEnd);
|
||||
|
||||
return (leftBlock, rightBlock);
|
||||
}
|
||||
|
||||
public void Number(int index)
|
||||
{
|
||||
Index = index;
|
||||
}
|
||||
|
||||
public void AddSuccessor(Block block)
|
||||
{
|
||||
if (!_successors.Contains(block))
|
||||
{
|
||||
_successors.Add(block);
|
||||
}
|
||||
}
|
||||
|
||||
public void AddPredecessor(Block block)
|
||||
{
|
||||
if (!_predecessors.Contains(block))
|
||||
{
|
||||
_predecessors.Add(block);
|
||||
}
|
||||
}
|
||||
|
||||
public IBlock GetSuccessor(int index)
|
||||
{
|
||||
return _successors[index];
|
||||
}
|
||||
|
||||
public IBlock GetPredecessor(int index)
|
||||
{
|
||||
return _predecessors[index];
|
||||
}
|
||||
|
||||
public RegisterUse ComputeUseMasks()
|
||||
{
|
||||
if (Instructions.Count == 0)
|
||||
{
|
||||
return new(0u, 0u, 0u, 0u, 0u, 0u);
|
||||
}
|
||||
|
||||
RegisterUse use = Instructions[0].RegisterUse;
|
||||
|
||||
for (int index = 1; index < Instructions.Count; index++)
|
||||
{
|
||||
RegisterUse currentUse = Instructions[index].RegisterUse;
|
||||
|
||||
use = new(use.Read | (currentUse.Read & ~use.Write), use.Write | currentUse.Write);
|
||||
}
|
||||
|
||||
return use;
|
||||
}
|
||||
|
||||
public bool EndsWithContextLoad()
|
||||
{
|
||||
return !IsTruncated && EndsWithContextStoreAndLoad();
|
||||
}
|
||||
|
||||
public bool EndsWithContextStore()
|
||||
{
|
||||
return EndsWithContextStoreAndLoad();
|
||||
}
|
||||
|
||||
private bool EndsWithContextStoreAndLoad()
|
||||
{
|
||||
if (Instructions.Count == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
InstName lastInstructionName = Instructions[^1].Name;
|
||||
|
||||
return lastInstructionName.IsCall() || lastInstructionName.IsException();
|
||||
}
|
||||
}
|
||||
}
|
20
src/Ryujinx.Cpu/LightningJit/Arm64/ImmUtils.cs
Normal file
20
src/Ryujinx.Cpu/LightningJit/Arm64/ImmUtils.cs
Normal file
@ -0,0 +1,20 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
static class ImmUtils
|
||||
{
|
||||
public static int ExtractSImm14Times4(uint encoding)
|
||||
{
|
||||
return ((int)(encoding >> 5) << 18) >> 16;
|
||||
}
|
||||
|
||||
public static int ExtractSImm19Times4(uint encoding)
|
||||
{
|
||||
return ((int)(encoding >> 5) << 13) >> 11;
|
||||
}
|
||||
|
||||
public static int ExtractSImm26Times4(uint encoding)
|
||||
{
|
||||
return (int)(encoding << 6) >> 4;
|
||||
}
|
||||
}
|
||||
}
|
108
src/Ryujinx.Cpu/LightningJit/Arm64/InstFlags.cs
Normal file
108
src/Ryujinx.Cpu/LightningJit/Arm64/InstFlags.cs
Normal file
@ -0,0 +1,108 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
[Flags]
|
||||
enum InstFlags
|
||||
{
|
||||
None = 0,
|
||||
Rd = 1 << 0,
|
||||
RdSP = Rd | (1 << 1),
|
||||
ReadRd = 1 << 2,
|
||||
Rt = 1 << 3,
|
||||
RtSeq = Rt | (1 << 4),
|
||||
ReadRt = 1 << 5,
|
||||
Rt2 = 1 << 6,
|
||||
Rn = 1 << 7,
|
||||
RnSeq = Rn | (1 << 8),
|
||||
RnSP = Rn | (1 << 9),
|
||||
Rm = 1 << 10,
|
||||
Rs = 1 << 11,
|
||||
Ra = 1 << 12,
|
||||
Nzcv = 1 << 13,
|
||||
C = 1 << 14,
|
||||
S = 1 << 15,
|
||||
Qc = 1 << 16,
|
||||
FpSimd = 1 << 17,
|
||||
FpSimdFromGpr = FpSimd | (1 << 18),
|
||||
FpSimdToGpr = FpSimd | (1 << 19),
|
||||
FpSimdFromToGpr = FpSimdFromGpr | FpSimdToGpr,
|
||||
Memory = 1 << 20,
|
||||
MemWBack = 1 << 21,
|
||||
|
||||
RdFpSimd = Rd | FpSimd,
|
||||
RdReadRd = Rd | ReadRd,
|
||||
RdReadRdRn = Rd | ReadRd | Rn,
|
||||
RdReadRdRnFpSimd = Rd | ReadRd | Rn | FpSimd,
|
||||
RdReadRdRnFpSimdFromGpr = Rd | ReadRd | Rn | FpSimdFromGpr,
|
||||
RdReadRdRnQcFpSimd = Rd | ReadRd | Rn | Qc | FpSimd,
|
||||
RdReadRdRnRmFpSimd = Rd | ReadRd | Rn | Rm | FpSimd,
|
||||
RdReadRdRnRmQcFpSimd = Rd | ReadRd | Rn | Rm | Qc | FpSimd,
|
||||
RdRn = Rd | Rn,
|
||||
RdRnFpSimd = Rd | Rn | FpSimd,
|
||||
RdRnFpSimdFromGpr = Rd | Rn | FpSimdFromGpr,
|
||||
RdRnFpSimdToGpr = Rd | Rn | FpSimdToGpr,
|
||||
RdRnQcFpSimd = Rd | Rn | Qc | FpSimd,
|
||||
RdRnRm = Rd | Rn | Rm,
|
||||
RdRnRmC = Rd | Rn | Rm | C,
|
||||
RdRnRmCS = Rd | Rn | Rm | C | S,
|
||||
RdRnRmFpSimd = Rd | Rn | Rm | FpSimd,
|
||||
RdRnRmNzcv = Rd | Rn | Rm | Nzcv,
|
||||
RdRnRmNzcvFpSimd = Rd | Rn | Rm | Nzcv | FpSimd,
|
||||
RdRnRmQcFpSimd = Rd | Rn | Rm | Qc | FpSimd,
|
||||
RdRnRmRa = Rd | Rn | Rm | Ra,
|
||||
RdRnRmRaFpSimd = Rd | Rn | Rm | Ra | FpSimd,
|
||||
RdRnRmS = Rd | Rn | Rm | S,
|
||||
RdRnRsS = Rd | Rn | Rs | S,
|
||||
RdRnS = Rd | Rn | S,
|
||||
RdRnSeqRmFpSimd = Rd | RnSeq | Rm | FpSimd,
|
||||
RdRnSFpSimd = Rd | Rn | S | FpSimd,
|
||||
RdRnSFpSimdFromToGpr = Rd | Rn | S | FpSimdFromToGpr,
|
||||
RdRnSP = Rd | RnSP,
|
||||
RdRnSPRmS = Rd | RnSP | Rm | S,
|
||||
RdRnSPS = Rd | RnSP | S,
|
||||
RdSPRn = RdSP | Rn,
|
||||
RdSPRnSP = RdSP | RnSP,
|
||||
RdSPRnSPRm = RdSP | RnSP | Rm,
|
||||
RnC = Rn | C,
|
||||
RnNzcvS = Rn | Nzcv | S,
|
||||
RnRm = Rn | Rm,
|
||||
RnRmNzcvS = Rn | Rm | Nzcv | S,
|
||||
RnRmNzcvSFpSimd = Rn | Rm | Nzcv | S | FpSimd,
|
||||
RnRmSFpSimd = Rn | Rm | S | FpSimd,
|
||||
RnSPRm = RnSP | Rm,
|
||||
RtFpSimd = Rt | FpSimd,
|
||||
RtReadRt = Rt | ReadRt,
|
||||
RtReadRtRnSP = Rt | ReadRt | RnSP,
|
||||
RtReadRtRnSPFpSimd = Rt | ReadRt | RnSP | FpSimd,
|
||||
RtReadRtRnSPFpSimdMemWBack = Rt | ReadRt | RnSP | FpSimd | MemWBack,
|
||||
RtReadRtRnSPMemWBack = Rt | ReadRt | RnSP | MemWBack,
|
||||
RtReadRtRnSPRm = Rt | ReadRt | RnSP | Rm,
|
||||
RtReadRtRnSPRmFpSimd = Rt | ReadRt | RnSP | Rm | FpSimd,
|
||||
RtReadRtRnSPRmFpSimdMemWBack = Rt | ReadRt | RnSP | Rm | FpSimd | MemWBack,
|
||||
RtReadRtRnSPRs = Rt | ReadRt | RnSP | Rs,
|
||||
RtReadRtRnSPRsS = Rt | ReadRt | RnSP | Rs | S,
|
||||
RtReadRtRt2RnSP = Rt | ReadRt | Rt2 | RnSP,
|
||||
RtReadRtRt2RnSPFpSimd = Rt | ReadRt | Rt2 | RnSP | FpSimd,
|
||||
RtReadRtRt2RnSPFpSimdMemWBack = Rt | ReadRt | Rt2 | RnSP | FpSimd | MemWBack,
|
||||
RtReadRtRt2RnSPMemWBack = Rt | ReadRt | Rt2 | RnSP | MemWBack,
|
||||
RtReadRtRt2RnSPRs = Rt | ReadRt | Rt2 | RnSP | Rs,
|
||||
RtReadRtRt2RnSPS = Rt | ReadRt | Rt2 | RnSP | S,
|
||||
RtRnSP = Rt | RnSP,
|
||||
RtRnSPFpSimd = Rt | RnSP | FpSimd,
|
||||
RtRnSPFpSimdMemWBack = Rt | RnSP | FpSimd | MemWBack,
|
||||
RtRnSPMemWBack = Rt | RnSP | MemWBack,
|
||||
RtRnSPRm = Rt | RnSP | Rm,
|
||||
RtRnSPRmFpSimd = Rt | RnSP | Rm | FpSimd,
|
||||
RtRnSPRmFpSimdMemWBack = Rt | RnSP | Rm | FpSimd | MemWBack,
|
||||
RtRnSPRs = Rt | RnSP | Rs,
|
||||
RtRt2RnSP = Rt | Rt2 | RnSP,
|
||||
RtRt2RnSPFpSimd = Rt | Rt2 | RnSP | FpSimd,
|
||||
RtRt2RnSPFpSimdMemWBack = Rt | Rt2 | RnSP | FpSimd | MemWBack,
|
||||
RtRt2RnSPMemWBack = Rt | Rt2 | RnSP | MemWBack,
|
||||
RtSeqReadRtRnSPFpSimd = RtSeq | ReadRt | RnSP | FpSimd,
|
||||
RtSeqReadRtRnSPRmFpSimdMemWBack = RtSeq | ReadRt | RnSP | Rm | FpSimd | MemWBack,
|
||||
RtSeqRnSPFpSimd = RtSeq | RnSP | FpSimd,
|
||||
RtSeqRnSPRmFpSimdMemWBack = RtSeq | RnSP | Rm | FpSimd | MemWBack,
|
||||
}
|
||||
}
|
22
src/Ryujinx.Cpu/LightningJit/Arm64/InstInfo.cs
Normal file
22
src/Ryujinx.Cpu/LightningJit/Arm64/InstInfo.cs
Normal file
@ -0,0 +1,22 @@
|
||||
using Ryujinx.Cpu.LightningJit.Graph;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
readonly struct InstInfo
|
||||
{
|
||||
public readonly uint Encoding;
|
||||
public readonly InstName Name;
|
||||
public readonly InstFlags Flags;
|
||||
public readonly AddressForm AddressForm;
|
||||
public readonly RegisterUse RegisterUse;
|
||||
|
||||
public InstInfo(uint encoding, InstName name, InstFlags flags, AddressForm addressForm, in RegisterUse registerUse)
|
||||
{
|
||||
Encoding = encoding;
|
||||
Name = name;
|
||||
Flags = flags;
|
||||
AddressForm = addressForm;
|
||||
RegisterUse = registerUse;
|
||||
}
|
||||
}
|
||||
}
|
1134
src/Ryujinx.Cpu/LightningJit/Arm64/InstName.cs
Normal file
1134
src/Ryujinx.Cpu/LightningJit/Arm64/InstName.cs
Normal file
File diff suppressed because it is too large
Load Diff
64
src/Ryujinx.Cpu/LightningJit/Arm64/MultiBlock.cs
Normal file
64
src/Ryujinx.Cpu/LightningJit/Arm64/MultiBlock.cs
Normal file
@ -0,0 +1,64 @@
|
||||
using Ryujinx.Cpu.LightningJit.Graph;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
class MultiBlock : IBlockList
|
||||
{
|
||||
public readonly List<Block> Blocks;
|
||||
public readonly RegisterMask[] ReadMasks;
|
||||
public readonly RegisterMask[] WriteMasks;
|
||||
public readonly RegisterMask GlobalUseMask;
|
||||
public readonly bool HasHostCall;
|
||||
public readonly bool HasMemoryInstruction;
|
||||
public readonly bool IsTruncated;
|
||||
|
||||
public int Count => Blocks.Count;
|
||||
|
||||
public IBlock this[int index] => Blocks[index];
|
||||
|
||||
public MultiBlock(List<Block> blocks, RegisterMask globalUseMask, bool hasHostCall, bool hasMemoryInstruction)
|
||||
{
|
||||
Blocks = blocks;
|
||||
|
||||
(ReadMasks, WriteMasks) = DataFlow.GetGlobalUses(this);
|
||||
|
||||
GlobalUseMask = globalUseMask;
|
||||
HasHostCall = hasHostCall;
|
||||
HasMemoryInstruction = hasMemoryInstruction;
|
||||
IsTruncated = blocks[^1].IsTruncated;
|
||||
}
|
||||
|
||||
public void PrintDebugInfo()
|
||||
{
|
||||
foreach (Block block in Blocks)
|
||||
{
|
||||
Console.WriteLine($"bb {block.Index}");
|
||||
|
||||
List<int> predList = new();
|
||||
List<int> succList = new();
|
||||
|
||||
for (int index = 0; index < block.PredecessorsCount; index++)
|
||||
{
|
||||
predList.Add(block.GetPredecessor(index).Index);
|
||||
}
|
||||
|
||||
for (int index = 0; index < block.SuccessorsCount; index++)
|
||||
{
|
||||
succList.Add(block.GetSuccessor(index).Index);
|
||||
}
|
||||
|
||||
Console.WriteLine($" predecessors: {string.Join(' ', predList)}");
|
||||
Console.WriteLine($" successors: {string.Join(' ', succList)}");
|
||||
Console.WriteLine($" gpr read mask: 0x{ReadMasks[block.Index].GprMask:X} 0x{block.ComputeUseMasks().Read.GprMask:X}");
|
||||
Console.WriteLine($" gpr write mask: 0x{WriteMasks[block.Index].GprMask:X}");
|
||||
|
||||
for (int index = 0; index < block.Instructions.Count; index++)
|
||||
{
|
||||
Console.WriteLine($" {index} 0x{block.Instructions[index].Encoding:X8} {block.Instructions[index].Name}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
154
src/Ryujinx.Cpu/LightningJit/Arm64/RegisterAllocator.cs
Normal file
154
src/Ryujinx.Cpu/LightningJit/Arm64/RegisterAllocator.cs
Normal file
@ -0,0 +1,154 @@
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
class RegisterAllocator
|
||||
{
|
||||
public const int MaxTemps = 1;
|
||||
public const int MaxTempsInclFixed = MaxTemps + 2;
|
||||
|
||||
private uint _gprMask;
|
||||
private readonly uint _fpSimdMask;
|
||||
private readonly uint _pStateMask;
|
||||
|
||||
private uint _tempGprsMask;
|
||||
|
||||
private readonly int[] _registerMap;
|
||||
|
||||
public int FixedContextRegister { get; }
|
||||
public int FixedPageTableRegister { get; }
|
||||
|
||||
public uint AllGprMask => (_gprMask & ~RegisterUtils.ReservedRegsMask) | _tempGprsMask;
|
||||
public uint AllFpSimdMask => _fpSimdMask;
|
||||
public uint AllPStateMask => _pStateMask;
|
||||
|
||||
public RegisterAllocator(uint gprMask, uint fpSimdMask, uint pStateMask, bool hasHostCall)
|
||||
{
|
||||
_gprMask = gprMask;
|
||||
_fpSimdMask = fpSimdMask;
|
||||
_pStateMask = pStateMask;
|
||||
|
||||
if (hasHostCall)
|
||||
{
|
||||
// If the function has calls, we can avoid the need to spill those registers across
|
||||
// calls by puting them on callee saved registers.
|
||||
|
||||
FixedContextRegister = AllocateAndMarkTempGprRegisterWithPreferencing();
|
||||
FixedPageTableRegister = AllocateAndMarkTempGprRegisterWithPreferencing();
|
||||
}
|
||||
else
|
||||
{
|
||||
FixedContextRegister = AllocateAndMarkTempGprRegister();
|
||||
FixedPageTableRegister = AllocateAndMarkTempGprRegister();
|
||||
}
|
||||
|
||||
_tempGprsMask = (1u << FixedContextRegister) | (1u << FixedPageTableRegister);
|
||||
|
||||
_registerMap = new int[32];
|
||||
|
||||
for (int index = 0; index < _registerMap.Length; index++)
|
||||
{
|
||||
_registerMap[index] = index;
|
||||
}
|
||||
|
||||
BuildRegisterMap(_registerMap);
|
||||
|
||||
Span<int> tempRegisters = stackalloc int[MaxTemps];
|
||||
|
||||
for (int index = 0; index < tempRegisters.Length; index++)
|
||||
{
|
||||
tempRegisters[index] = AllocateAndMarkTempGprRegister();
|
||||
}
|
||||
|
||||
for (int index = 0; index < tempRegisters.Length; index++)
|
||||
{
|
||||
FreeTempGprRegister(tempRegisters[index]);
|
||||
}
|
||||
}
|
||||
|
||||
private void BuildRegisterMap(Span<int> map)
|
||||
{
|
||||
uint mask = _gprMask & RegisterUtils.ReservedRegsMask;
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int index = BitOperations.TrailingZeroCount(mask);
|
||||
int remapIndex = AllocateAndMarkTempGprRegister();
|
||||
|
||||
map[index] = remapIndex;
|
||||
_tempGprsMask |= 1u << remapIndex;
|
||||
|
||||
mask &= ~(1u << index);
|
||||
}
|
||||
}
|
||||
|
||||
public int RemapReservedGprRegister(int index)
|
||||
{
|
||||
return _registerMap[index];
|
||||
}
|
||||
|
||||
private int AllocateAndMarkTempGprRegister()
|
||||
{
|
||||
int index = AllocateTempGprRegister();
|
||||
_tempGprsMask |= 1u << index;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
private int AllocateAndMarkTempGprRegisterWithPreferencing()
|
||||
{
|
||||
int index = AllocateTempRegisterWithPreferencing();
|
||||
_tempGprsMask |= 1u << index;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
public int AllocateTempGprRegister()
|
||||
{
|
||||
return AllocateTempRegister(ref _gprMask);
|
||||
}
|
||||
|
||||
public void FreeTempGprRegister(int index)
|
||||
{
|
||||
FreeTempRegister(ref _gprMask, index);
|
||||
}
|
||||
|
||||
private int AllocateTempRegisterWithPreferencing()
|
||||
{
|
||||
int firstCalleeSaved = BitOperations.TrailingZeroCount(~_gprMask & AbiConstants.GprCalleeSavedRegsMask);
|
||||
if (firstCalleeSaved < 32)
|
||||
{
|
||||
uint regMask = 1u << firstCalleeSaved;
|
||||
if ((regMask & RegisterUtils.ReservedRegsMask) == 0)
|
||||
{
|
||||
_gprMask |= regMask;
|
||||
|
||||
return firstCalleeSaved;
|
||||
}
|
||||
}
|
||||
|
||||
return AllocateTempRegister(ref _gprMask);
|
||||
}
|
||||
|
||||
private static int AllocateTempRegister(ref uint mask)
|
||||
{
|
||||
int index = BitOperations.TrailingZeroCount(~(mask | RegisterUtils.ReservedRegsMask));
|
||||
if (index == sizeof(uint) * 8)
|
||||
{
|
||||
throw new InvalidOperationException("No free registers.");
|
||||
}
|
||||
|
||||
mask |= 1u << index;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
private static void FreeTempRegister(ref uint mask, int index)
|
||||
{
|
||||
mask &= ~(1u << index);
|
||||
}
|
||||
}
|
||||
}
|
495
src/Ryujinx.Cpu/LightningJit/Arm64/RegisterUtils.cs
Normal file
495
src/Ryujinx.Cpu/LightningJit/Arm64/RegisterUtils.cs
Normal file
@ -0,0 +1,495 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64
|
||||
{
|
||||
static class RegisterUtils
|
||||
{
|
||||
private const int RdRtBit = 0;
|
||||
private const int RnBit = 5;
|
||||
private const int RmRsBit = 16;
|
||||
private const int RaRt2Bit = 10;
|
||||
|
||||
// Some of those register have specific roles and can't be used as general purpose registers.
|
||||
// X18 - Reserved for platform specific usage.
|
||||
// X29 - Frame pointer.
|
||||
// X30 - Return address.
|
||||
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
|
||||
public const uint ReservedRegsMask = (1u << 18) | (1u << 29) | (1u << 30) | (1u << 31);
|
||||
|
||||
public const int LrIndex = 30;
|
||||
public const int SpIndex = 31;
|
||||
public const int ZrIndex = 31;
|
||||
public const int SpecialZrIndex = 32;
|
||||
|
||||
public static uint RemapRegisters(RegisterAllocator regAlloc, InstFlags flags, uint encoding)
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd) && (!flags.HasFlag(InstFlags.FpSimd) || IsFpToGpr(flags, encoding)))
|
||||
{
|
||||
encoding = ReplaceGprRegister(regAlloc, encoding, RdRtBit, flags.HasFlag(InstFlags.RdSP));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rn) && (!flags.HasFlag(InstFlags.FpSimd) || IsFpFromGpr(flags, encoding) || flags.HasFlag(InstFlags.Memory)))
|
||||
{
|
||||
encoding = ReplaceGprRegister(regAlloc, encoding, RnBit, flags.HasFlag(InstFlags.RnSP));
|
||||
}
|
||||
|
||||
if (!flags.HasFlag(InstFlags.FpSimd))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rm) || flags.HasFlag(InstFlags.Rs))
|
||||
{
|
||||
encoding = ReplaceGprRegister(regAlloc, encoding, RmRsBit);
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Ra) || flags.HasFlag(InstFlags.Rt2))
|
||||
{
|
||||
encoding = ReplaceGprRegister(regAlloc, encoding, RaRt2Bit);
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rt))
|
||||
{
|
||||
encoding = ReplaceGprRegister(regAlloc, encoding, RdRtBit);
|
||||
}
|
||||
}
|
||||
else if (flags.HasFlag(InstFlags.Rm) && flags.HasFlag(InstFlags.Memory))
|
||||
{
|
||||
encoding = ReplaceGprRegister(regAlloc, encoding, RmRsBit);
|
||||
}
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
public static uint ReplaceRt(uint encoding, int newIndex)
|
||||
{
|
||||
return ReplaceRegister(encoding, newIndex, RdRtBit);
|
||||
}
|
||||
|
||||
public static uint ReplaceRn(uint encoding, int newIndex)
|
||||
{
|
||||
return ReplaceRegister(encoding, newIndex, RnBit);
|
||||
}
|
||||
|
||||
private static uint ReplaceRegister(uint encoding, int newIndex, int bit)
|
||||
{
|
||||
encoding &= ~(0x1fu << bit);
|
||||
encoding |= (uint)newIndex << bit;
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
private static uint ReplaceGprRegister(RegisterAllocator regAlloc, uint encoding, int bit, bool hasSP = false)
|
||||
{
|
||||
int oldIndex = (int)(encoding >> bit) & 0x1f;
|
||||
if (oldIndex == ZrIndex && !hasSP)
|
||||
{
|
||||
return encoding;
|
||||
}
|
||||
|
||||
int newIndex = regAlloc.RemapReservedGprRegister(oldIndex);
|
||||
|
||||
encoding &= ~(0x1fu << bit);
|
||||
encoding |= (uint)newIndex << bit;
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
public static (uint, uint) PopulateReadMasks(InstName name, InstFlags flags, uint encoding)
|
||||
{
|
||||
uint gprMask = 0;
|
||||
uint fpSimdMask = 0;
|
||||
|
||||
if (flags.HasFlag(InstFlags.FpSimd))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd) && flags.HasFlag(InstFlags.ReadRd))
|
||||
{
|
||||
uint mask = MaskFromIndex(ExtractRd(flags, encoding));
|
||||
|
||||
if (IsFpToGpr(flags, encoding))
|
||||
{
|
||||
gprMask |= mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rn))
|
||||
{
|
||||
uint mask = MaskFromIndex(ExtractRn(flags, encoding));
|
||||
|
||||
if (flags.HasFlag(InstFlags.RnSeq))
|
||||
{
|
||||
int count = GetRnSequenceCount(encoding);
|
||||
|
||||
for (int index = 0; index < count; index++, mask <<= 1)
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
else if (IsFpFromGpr(flags, encoding) || flags.HasFlag(InstFlags.Memory))
|
||||
{
|
||||
gprMask |= mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rm))
|
||||
{
|
||||
uint mask = MaskFromIndex(ExtractRm(flags, encoding));
|
||||
|
||||
if (flags.HasFlag(InstFlags.Memory))
|
||||
{
|
||||
gprMask |= mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Ra))
|
||||
{
|
||||
fpSimdMask |= MaskFromIndex(ExtractRa(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.ReadRt))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rt))
|
||||
{
|
||||
uint mask = MaskFromIndex(ExtractRt(flags, encoding));
|
||||
|
||||
if (flags.HasFlag(InstFlags.RtSeq))
|
||||
{
|
||||
int count = GetRtSequenceCount(name, encoding);
|
||||
|
||||
for (int index = 0; index < count; index++, mask <<= 1)
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rt2))
|
||||
{
|
||||
fpSimdMask |= MaskFromIndex(ExtractRt2(flags, encoding));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd) && flags.HasFlag(InstFlags.ReadRd))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRd(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rn))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRn(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rm))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRm(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Ra))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRa(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.ReadRt))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rt))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRt(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rt2))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRt2(flags, encoding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (gprMask, fpSimdMask);
|
||||
}
|
||||
|
||||
public static (uint, uint) PopulateWriteMasks(InstName name, InstFlags flags, uint encoding)
|
||||
{
|
||||
uint gprMask = 0;
|
||||
uint fpSimdMask = 0;
|
||||
|
||||
if (flags.HasFlag(InstFlags.MemWBack))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRn(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.FpSimd))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd))
|
||||
{
|
||||
uint mask = MaskFromIndex(ExtractRd(flags, encoding));
|
||||
|
||||
if (IsFpToGpr(flags, encoding))
|
||||
{
|
||||
gprMask |= mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (!flags.HasFlag(InstFlags.ReadRt))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rt))
|
||||
{
|
||||
uint mask = MaskFromIndex(ExtractRt(flags, encoding));
|
||||
|
||||
if (flags.HasFlag(InstFlags.RtSeq))
|
||||
{
|
||||
int count = GetRtSequenceCount(name, encoding);
|
||||
|
||||
for (int index = 0; index < count; index++, mask <<= 1)
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fpSimdMask |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rt2))
|
||||
{
|
||||
fpSimdMask |= MaskFromIndex(ExtractRt2(flags, encoding));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rd))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRd(flags, encoding));
|
||||
}
|
||||
|
||||
if (!flags.HasFlag(InstFlags.ReadRt))
|
||||
{
|
||||
if (flags.HasFlag(InstFlags.Rt))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRt(flags, encoding));
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rt2))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRt2(flags, encoding));
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Rs))
|
||||
{
|
||||
gprMask |= MaskFromIndex(ExtractRs(flags, encoding));
|
||||
}
|
||||
}
|
||||
|
||||
return (gprMask, fpSimdMask);
|
||||
}
|
||||
|
||||
private static uint MaskFromIndex(int index)
|
||||
{
|
||||
if (index < SpecialZrIndex)
|
||||
{
|
||||
return 1u << index;
|
||||
}
|
||||
|
||||
return 0u;
|
||||
}
|
||||
|
||||
private static bool IsFpFromGpr(InstFlags flags, uint encoding)
|
||||
{
|
||||
InstFlags bothFlags = InstFlags.FpSimdFromGpr | InstFlags.FpSimdToGpr;
|
||||
|
||||
if ((flags & bothFlags) == bothFlags) // FMOV (general)
|
||||
{
|
||||
return (encoding & (1u << 16)) != 0;
|
||||
}
|
||||
|
||||
return flags.HasFlag(InstFlags.FpSimdFromGpr);
|
||||
}
|
||||
|
||||
private static bool IsFpToGpr(InstFlags flags, uint encoding)
|
||||
{
|
||||
InstFlags bothFlags = InstFlags.FpSimdFromGpr | InstFlags.FpSimdToGpr;
|
||||
|
||||
if ((flags & bothFlags) == bothFlags) // FMOV (general)
|
||||
{
|
||||
return (encoding & (1u << 16)) == 0;
|
||||
}
|
||||
|
||||
return flags.HasFlag(InstFlags.FpSimdToGpr);
|
||||
}
|
||||
|
||||
private static int GetRtSequenceCount(InstName name, uint encoding)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Ld1AdvsimdMultAsNoPostIndex:
|
||||
case InstName.Ld1AdvsimdMultAsPostIndex:
|
||||
case InstName.St1AdvsimdMultAsNoPostIndex:
|
||||
case InstName.St1AdvsimdMultAsPostIndex:
|
||||
return ((encoding >> 12) & 0xf) switch
|
||||
{
|
||||
0b0000 => 4,
|
||||
0b0010 => 4,
|
||||
0b0100 => 3,
|
||||
0b0110 => 3,
|
||||
0b0111 => 1,
|
||||
0b1000 => 2,
|
||||
0b1010 => 2,
|
||||
_ => 1,
|
||||
};
|
||||
case InstName.Ld1rAdvsimdAsNoPostIndex:
|
||||
case InstName.Ld1rAdvsimdAsPostIndex:
|
||||
case InstName.Ld1AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.Ld1AdvsimdSnglAsPostIndex:
|
||||
case InstName.St1AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.St1AdvsimdSnglAsPostIndex:
|
||||
return 1;
|
||||
case InstName.Ld2rAdvsimdAsNoPostIndex:
|
||||
case InstName.Ld2rAdvsimdAsPostIndex:
|
||||
case InstName.Ld2AdvsimdMultAsNoPostIndex:
|
||||
case InstName.Ld2AdvsimdMultAsPostIndex:
|
||||
case InstName.Ld2AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.Ld2AdvsimdSnglAsPostIndex:
|
||||
case InstName.St2AdvsimdMultAsNoPostIndex:
|
||||
case InstName.St2AdvsimdMultAsPostIndex:
|
||||
case InstName.St2AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.St2AdvsimdSnglAsPostIndex:
|
||||
return 2;
|
||||
case InstName.Ld3rAdvsimdAsNoPostIndex:
|
||||
case InstName.Ld3rAdvsimdAsPostIndex:
|
||||
case InstName.Ld3AdvsimdMultAsNoPostIndex:
|
||||
case InstName.Ld3AdvsimdMultAsPostIndex:
|
||||
case InstName.Ld3AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.Ld3AdvsimdSnglAsPostIndex:
|
||||
case InstName.St3AdvsimdMultAsNoPostIndex:
|
||||
case InstName.St3AdvsimdMultAsPostIndex:
|
||||
case InstName.St3AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.St3AdvsimdSnglAsPostIndex:
|
||||
return 3;
|
||||
case InstName.Ld4rAdvsimdAsNoPostIndex:
|
||||
case InstName.Ld4rAdvsimdAsPostIndex:
|
||||
case InstName.Ld4AdvsimdMultAsNoPostIndex:
|
||||
case InstName.Ld4AdvsimdMultAsPostIndex:
|
||||
case InstName.Ld4AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.Ld4AdvsimdSnglAsPostIndex:
|
||||
case InstName.St4AdvsimdMultAsNoPostIndex:
|
||||
case InstName.St4AdvsimdMultAsPostIndex:
|
||||
case InstName.St4AdvsimdSnglAsNoPostIndex:
|
||||
case InstName.St4AdvsimdSnglAsPostIndex:
|
||||
return 4;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
private static int GetRnSequenceCount(uint encoding)
|
||||
{
|
||||
return ((int)(encoding >> 13) & 3) + 1;
|
||||
}
|
||||
|
||||
public static int ExtractRd(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Rd));
|
||||
int index = (int)(encoding >> RdRtBit) & 0x1f;
|
||||
|
||||
if (!flags.HasFlag(InstFlags.RdSP) && index == ZrIndex)
|
||||
{
|
||||
return SpecialZrIndex;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
public static int ExtractRn(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RnBit) & 0x1f;
|
||||
}
|
||||
|
||||
public static int ExtractRn(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Rn));
|
||||
int index = ExtractRn(encoding);
|
||||
|
||||
if (!flags.HasFlag(InstFlags.RnSP) && index == ZrIndex)
|
||||
{
|
||||
return SpecialZrIndex;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
public static int ExtractRm(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RmRsBit) & 0x1f;
|
||||
}
|
||||
|
||||
public static int ExtractRm(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Rm));
|
||||
int index = ExtractRm(encoding);
|
||||
|
||||
return index == ZrIndex ? SpecialZrIndex : index;
|
||||
}
|
||||
|
||||
public static int ExtractRs(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RmRsBit) & 0x1f;
|
||||
}
|
||||
|
||||
public static int ExtractRs(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Rs));
|
||||
int index = ExtractRs(encoding);
|
||||
|
||||
return index == ZrIndex ? SpecialZrIndex : index;
|
||||
}
|
||||
|
||||
public static int ExtractRa(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Ra));
|
||||
int index = (int)(encoding >> RaRt2Bit) & 0x1f;
|
||||
|
||||
return index == ZrIndex ? SpecialZrIndex : index;
|
||||
}
|
||||
|
||||
public static int ExtractRt(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> RdRtBit) & 0x1f;
|
||||
}
|
||||
|
||||
public static int ExtractRt(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Rt));
|
||||
int index = ExtractRt(encoding);
|
||||
|
||||
return index == ZrIndex ? SpecialZrIndex : index;
|
||||
}
|
||||
|
||||
public static int ExtractRt2(InstFlags flags, uint encoding)
|
||||
{
|
||||
Debug.Assert(flags.HasFlag(InstFlags.Rt2));
|
||||
int index = (int)(encoding >> RaRt2Bit) & 0x1f;
|
||||
|
||||
return index == ZrIndex ? SpecialZrIndex : index;
|
||||
}
|
||||
}
|
||||
}
|
743
src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/Compiler.cs
Normal file
743
src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/Compiler.cs
Normal file
@ -0,0 +1,743 @@
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using Ryujinx.Cpu.LightningJit.Graph;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
|
||||
{
|
||||
static class Compiler
|
||||
{
|
||||
private const int Encodable26BitsOffsetLimit = 0x2000000;
|
||||
|
||||
private readonly struct Context
|
||||
{
|
||||
public readonly CodeWriter Writer;
|
||||
public readonly RegisterAllocator RegisterAllocator;
|
||||
public readonly TailMerger TailMerger;
|
||||
public readonly AddressTable<ulong> FuncTable;
|
||||
public readonly IntPtr DispatchStubPointer;
|
||||
|
||||
private readonly MultiBlock _multiBlock;
|
||||
private readonly RegisterSaveRestore _registerSaveRestore;
|
||||
private readonly IntPtr _pageTablePointer;
|
||||
|
||||
public Context(
|
||||
CodeWriter writer,
|
||||
RegisterAllocator registerAllocator,
|
||||
TailMerger tailMerger,
|
||||
RegisterSaveRestore registerSaveRestore,
|
||||
MultiBlock multiBlock,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr dispatchStubPointer,
|
||||
IntPtr pageTablePointer)
|
||||
{
|
||||
Writer = writer;
|
||||
RegisterAllocator = registerAllocator;
|
||||
TailMerger = tailMerger;
|
||||
_registerSaveRestore = registerSaveRestore;
|
||||
_multiBlock = multiBlock;
|
||||
FuncTable = funcTable;
|
||||
DispatchStubPointer = dispatchStubPointer;
|
||||
_pageTablePointer = pageTablePointer;
|
||||
}
|
||||
|
||||
public readonly int GetLrRegisterIndex()
|
||||
{
|
||||
return RemapGprRegister(RegisterUtils.LrIndex);
|
||||
}
|
||||
|
||||
public readonly int RemapGprRegister(int index)
|
||||
{
|
||||
return RegisterAllocator.RemapReservedGprRegister(index);
|
||||
}
|
||||
|
||||
public readonly int GetReservedStackOffset()
|
||||
{
|
||||
return _registerSaveRestore.GetReservedStackOffset();
|
||||
}
|
||||
|
||||
public readonly void WritePrologue()
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
_registerSaveRestore.WritePrologue(ref asm);
|
||||
|
||||
// If needed, set up the fixed registers with the pointers we will use.
|
||||
// First one is the context pointer (passed as first argument),
|
||||
// second one is the page table or address space base, it is at a fixed memory location and considered constant.
|
||||
|
||||
if (RegisterAllocator.FixedContextRegister != 0)
|
||||
{
|
||||
asm.Mov(Register(RegisterAllocator.FixedContextRegister), Register(0));
|
||||
}
|
||||
|
||||
if (_multiBlock.HasMemoryInstruction)
|
||||
{
|
||||
asm.Mov(Register(RegisterAllocator.FixedPageTableRegister), (ulong)_pageTablePointer);
|
||||
}
|
||||
|
||||
// This assumes that the block with the index 0 is always the entry block.
|
||||
LoadFromContext(ref asm, _multiBlock.ReadMasks[0]);
|
||||
}
|
||||
|
||||
public readonly void WriteEpilogueWithoutContext()
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
_registerSaveRestore.WriteEpilogue(ref asm);
|
||||
}
|
||||
|
||||
public void LoadFromContextAfterCall(int blockIndex)
|
||||
{
|
||||
Block block = _multiBlock.Blocks[blockIndex];
|
||||
|
||||
if (block.SuccessorsCount != 0)
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
RegisterMask readMask = _multiBlock.ReadMasks[block.GetSuccessor(0).Index];
|
||||
|
||||
for (int sIndex = 1; sIndex < block.SuccessorsCount; sIndex++)
|
||||
{
|
||||
IBlock successor = block.GetSuccessor(sIndex);
|
||||
|
||||
readMask |= _multiBlock.ReadMasks[successor.Index];
|
||||
}
|
||||
|
||||
LoadFromContext(ref asm, readMask);
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadFromContext(ref Assembler asm, RegisterMask readMask)
|
||||
{
|
||||
LoadGprFromContext(ref asm, readMask.GprMask, NativeContextOffsets.GprBaseOffset);
|
||||
LoadFpSimdFromContext(ref asm, readMask.FpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
|
||||
LoadPStateFromContext(ref asm, readMask.PStateMask, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
public void StoreToContextBeforeCall(int blockIndex, ulong? newLrValue = null)
|
||||
{
|
||||
Assembler asm = new(Writer);
|
||||
|
||||
StoreToContext(ref asm, _multiBlock.WriteMasks[blockIndex], newLrValue);
|
||||
}
|
||||
|
||||
private void StoreToContext(ref Assembler asm, RegisterMask writeMask, ulong? newLrValue)
|
||||
{
|
||||
StoreGprToContext(ref asm, writeMask.GprMask, NativeContextOffsets.GprBaseOffset, newLrValue);
|
||||
StoreFpSimdToContext(ref asm, writeMask.FpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
|
||||
StorePStateToContext(ref asm, writeMask.PStateMask, NativeContextOffsets.FlagsBaseOffset);
|
||||
}
|
||||
|
||||
private void LoadGprFromContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 8;
|
||||
|
||||
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
mask &= ~(3u << reg);
|
||||
|
||||
asm.LdpRiUn(
|
||||
Register(RegisterAllocator.RemapReservedGprRegister(reg)),
|
||||
Register(RegisterAllocator.RemapReservedGprRegister(reg + 1)),
|
||||
contextPtr,
|
||||
offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.LdrRiUn(Register(RegisterAllocator.RemapReservedGprRegister(reg)), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadFpSimdFromContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 16;
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.LdrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
|
||||
private void LoadPStateFromContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
if (mask == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
int tempRegister = RegisterAllocator.AllocateTempGprRegister();
|
||||
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.LdrRiUn(rt, contextPtr, baseOffset);
|
||||
asm.MsrNzcv(rt);
|
||||
|
||||
RegisterAllocator.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private void StoreGprToContext(ref Assembler asm, uint mask, int baseOffset, ulong? newLrValue)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
int tempRegister = -1;
|
||||
|
||||
if (newLrValue.HasValue)
|
||||
{
|
||||
// This is required for BLR X30 instructions, where we need to get the target address
|
||||
// before it is overwritten with the return address that the call would write there.
|
||||
|
||||
tempRegister = RegisterAllocator.AllocateTempGprRegister();
|
||||
|
||||
asm.Mov(Register(tempRegister), newLrValue.Value);
|
||||
}
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 8;
|
||||
|
||||
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
mask &= ~(3u << reg);
|
||||
|
||||
asm.StpRiUn(
|
||||
Register(RemapReservedGprRegister(reg, tempRegister)),
|
||||
Register(RemapReservedGprRegister(reg + 1, tempRegister)),
|
||||
contextPtr,
|
||||
offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.StrRiUn(Register(RemapReservedGprRegister(reg, tempRegister)), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (tempRegister >= 0)
|
||||
{
|
||||
RegisterAllocator.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
}
|
||||
|
||||
private int RemapReservedGprRegister(int index, int tempRegister)
|
||||
{
|
||||
if (tempRegister >= 0 && index == RegisterUtils.LrIndex)
|
||||
{
|
||||
return tempRegister;
|
||||
}
|
||||
|
||||
return RegisterAllocator.RemapReservedGprRegister(index);
|
||||
}
|
||||
|
||||
private void StoreFpSimdToContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
int offset = baseOffset + reg * 16;
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
asm.StrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
|
||||
}
|
||||
}
|
||||
|
||||
private void StorePStateToContext(ref Assembler asm, uint mask, int baseOffset)
|
||||
{
|
||||
if (mask == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
|
||||
|
||||
int tempRegister = RegisterAllocator.AllocateTempGprRegister();
|
||||
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.MrsNzcv(rt);
|
||||
asm.StrRiUn(rt, contextPtr, baseOffset);
|
||||
|
||||
RegisterAllocator.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly struct PendingBranch
|
||||
{
|
||||
public readonly int BlockIndex;
|
||||
public readonly ulong Pc;
|
||||
public readonly InstName Name;
|
||||
public readonly uint Encoding;
|
||||
public readonly int WriterPointer;
|
||||
|
||||
public PendingBranch(int blockIndex, ulong pc, InstName name, uint encoding, int writerPointer)
|
||||
{
|
||||
BlockIndex = blockIndex;
|
||||
Pc = pc;
|
||||
Name = name;
|
||||
Encoding = encoding;
|
||||
WriterPointer = writerPointer;
|
||||
}
|
||||
}
|
||||
|
||||
public static CompiledFunction Compile(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, AddressTable<ulong> funcTable, IntPtr dispatchStubPtr)
|
||||
{
|
||||
MultiBlock multiBlock = Decoder.DecodeMulti(cpuPreset, memoryManager, address);
|
||||
|
||||
Dictionary<ulong, int> targets = new();
|
||||
List<PendingBranch> pendingBranches = new();
|
||||
|
||||
uint gprUseMask = multiBlock.GlobalUseMask.GprMask;
|
||||
uint fpSimdUseMask = multiBlock.GlobalUseMask.FpSimdMask;
|
||||
uint pStateUseMask = multiBlock.GlobalUseMask.PStateMask;
|
||||
|
||||
CodeWriter writer = new();
|
||||
RegisterAllocator regAlloc = new(gprUseMask, fpSimdUseMask, pStateUseMask, multiBlock.HasHostCall);
|
||||
RegisterSaveRestore rsr = new(
|
||||
regAlloc.AllGprMask & AbiConstants.GprCalleeSavedRegsMask,
|
||||
regAlloc.AllFpSimdMask & AbiConstants.FpSimdCalleeSavedRegsMask,
|
||||
OperandType.FP64,
|
||||
multiBlock.HasHostCall,
|
||||
multiBlock.HasHostCall ? CalculateStackSizeForCallSpill(regAlloc.AllGprMask, regAlloc.AllFpSimdMask, regAlloc.AllPStateMask) : 0);
|
||||
|
||||
TailMerger tailMerger = new();
|
||||
|
||||
Context context = new(writer, regAlloc, tailMerger, rsr, multiBlock, funcTable, dispatchStubPtr, memoryManager.PageTablePointer);
|
||||
|
||||
context.WritePrologue();
|
||||
|
||||
ulong pc = address;
|
||||
|
||||
for (int blockIndex = 0; blockIndex < multiBlock.Blocks.Count; blockIndex++)
|
||||
{
|
||||
Block block = multiBlock.Blocks[blockIndex];
|
||||
|
||||
Debug.Assert(block.Address == pc);
|
||||
|
||||
targets.Add(pc, writer.InstructionPointer);
|
||||
|
||||
int instCount = block.EndsWithBranch ? block.Instructions.Count - 1 : block.Instructions.Count;
|
||||
|
||||
for (int index = 0; index < instCount; index++)
|
||||
{
|
||||
InstInfo instInfo = block.Instructions[index];
|
||||
|
||||
uint encoding = RegisterUtils.RemapRegisters(regAlloc, instInfo.Flags, instInfo.Encoding);
|
||||
|
||||
if (instInfo.AddressForm != AddressForm.None)
|
||||
{
|
||||
InstEmitMemory.RewriteInstruction(
|
||||
memoryManager.AddressSpaceBits,
|
||||
memoryManager.Type,
|
||||
writer,
|
||||
regAlloc,
|
||||
instInfo.Name,
|
||||
instInfo.Flags,
|
||||
instInfo.AddressForm,
|
||||
pc,
|
||||
encoding);
|
||||
}
|
||||
else if (instInfo.Name == InstName.Sys)
|
||||
{
|
||||
InstEmitMemory.RewriteSysInstruction(memoryManager.AddressSpaceBits, memoryManager.Type, writer, regAlloc, encoding);
|
||||
}
|
||||
else if (instInfo.Name.IsSystem())
|
||||
{
|
||||
bool needsContextStoreLoad = InstEmitSystem.NeedsContextStoreLoad(instInfo.Name);
|
||||
|
||||
if (needsContextStoreLoad)
|
||||
{
|
||||
context.StoreToContextBeforeCall(blockIndex);
|
||||
}
|
||||
|
||||
InstEmitSystem.RewriteInstruction(writer, regAlloc, tailMerger, instInfo.Name, pc, encoding, rsr.GetReservedStackOffset());
|
||||
|
||||
if (needsContextStoreLoad)
|
||||
{
|
||||
context.LoadFromContextAfterCall(blockIndex);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.WriteInstruction(encoding);
|
||||
}
|
||||
|
||||
pc += 4UL;
|
||||
}
|
||||
|
||||
if (block.IsLoopEnd)
|
||||
{
|
||||
// If this is a loop, the code might run for a long time uninterrupted.
|
||||
// We insert a "sync point" here to ensure the loop can be interrupted if needed.
|
||||
|
||||
InstEmitSystem.WriteSyncPoint(writer, context.RegisterAllocator, tailMerger, context.GetReservedStackOffset());
|
||||
}
|
||||
|
||||
if (blockIndex < multiBlock.Blocks.Count - 1)
|
||||
{
|
||||
InstInfo lastInstructionInfo = block.Instructions[^1];
|
||||
InstName lastInstructionName = lastInstructionInfo.Name;
|
||||
InstFlags lastInstructionFlags = lastInstructionInfo.Flags;
|
||||
uint lastInstructionEncoding = lastInstructionInfo.Encoding;
|
||||
|
||||
lastInstructionEncoding = RegisterUtils.RemapRegisters(regAlloc, lastInstructionFlags, lastInstructionEncoding);
|
||||
|
||||
if (lastInstructionName.IsCall())
|
||||
{
|
||||
context.StoreToContextBeforeCall(blockIndex, pc + 4UL);
|
||||
|
||||
InstEmitSystem.RewriteCallInstruction(
|
||||
writer,
|
||||
regAlloc,
|
||||
tailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
funcTable,
|
||||
dispatchStubPtr,
|
||||
lastInstructionName,
|
||||
pc,
|
||||
lastInstructionEncoding,
|
||||
context.GetReservedStackOffset());
|
||||
|
||||
context.LoadFromContextAfterCall(blockIndex);
|
||||
|
||||
pc += 4UL;
|
||||
}
|
||||
else if (lastInstructionName == InstName.Ret)
|
||||
{
|
||||
RewriteBranchInstruction(context, blockIndex, lastInstructionName, pc, lastInstructionEncoding);
|
||||
|
||||
pc += 4UL;
|
||||
}
|
||||
else if (block.EndsWithBranch)
|
||||
{
|
||||
pendingBranches.Add(new(blockIndex, pc, lastInstructionName, lastInstructionEncoding, writer.InstructionPointer));
|
||||
writer.WriteInstruction(0u); // Placeholder.
|
||||
|
||||
pc += 4UL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int lastBlockIndex = multiBlock.Blocks[^1].Index;
|
||||
|
||||
if (multiBlock.IsTruncated)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteTailCallConstant(context, ref asm, lastBlockIndex, pc);
|
||||
}
|
||||
else
|
||||
{
|
||||
InstInfo lastInstructionInfo = multiBlock.Blocks[^1].Instructions[^1];
|
||||
InstName lastInstructionName = lastInstructionInfo.Name;
|
||||
InstFlags lastInstructionFlags = lastInstructionInfo.Flags;
|
||||
uint lastInstructionEncoding = lastInstructionInfo.Encoding;
|
||||
|
||||
lastInstructionEncoding = RegisterUtils.RemapRegisters(regAlloc, lastInstructionFlags, lastInstructionEncoding);
|
||||
|
||||
RewriteBranchInstruction(context, lastBlockIndex, lastInstructionName, pc, lastInstructionEncoding);
|
||||
|
||||
pc += 4;
|
||||
}
|
||||
|
||||
foreach (PendingBranch pendingBranch in pendingBranches)
|
||||
{
|
||||
RewriteBranchInstructionWithTarget(
|
||||
context,
|
||||
pendingBranch.BlockIndex,
|
||||
pendingBranch.Name,
|
||||
pendingBranch.Pc,
|
||||
pendingBranch.Encoding,
|
||||
pendingBranch.WriterPointer,
|
||||
targets);
|
||||
}
|
||||
|
||||
tailMerger.WriteReturn(writer, context.WriteEpilogueWithoutContext);
|
||||
|
||||
return new(writer.AsByteSpan(), (int)(pc - address));
|
||||
}
|
||||
|
||||
private static int CalculateStackSizeForCallSpill(uint gprUseMask, uint fpSimdUseMask, uint pStateUseMask)
|
||||
{
|
||||
// Note that we don't discard callee saved FP/SIMD register because only the lower 64 bits is callee saved,
|
||||
// so if the function is using the full register, that won't be enough.
|
||||
// We could do better, but it's likely not worth it since this case happens very rarely in practice.
|
||||
|
||||
return BitOperations.PopCount(gprUseMask & ~AbiConstants.GprCalleeSavedRegsMask) * 8 +
|
||||
BitOperations.PopCount(fpSimdUseMask) * 16 +
|
||||
(pStateUseMask != 0 ? 8 : 0);
|
||||
}
|
||||
|
||||
private static void RewriteBranchInstruction(in Context context, int blockIndex, InstName name, ulong pc, uint encoding)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
int originalOffset;
|
||||
ulong nextAddress = pc + 4UL;
|
||||
ulong targetAddress;
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
|
||||
break;
|
||||
|
||||
case InstName.Bl:
|
||||
case InstName.Blr:
|
||||
case InstName.Br:
|
||||
if (name == InstName.Bl)
|
||||
{
|
||||
asm.Mov(Register(context.GetLrRegisterIndex()), nextAddress);
|
||||
|
||||
int imm = ImmUtils.ExtractSImm26Times4(encoding);
|
||||
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, pc + (ulong)imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool isCall = name == InstName.Blr;
|
||||
if (isCall)
|
||||
{
|
||||
context.StoreToContextBeforeCall(blockIndex, nextAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.StoreToContextBeforeCall(blockIndex);
|
||||
}
|
||||
|
||||
InstEmitSystem.RewriteCallInstruction(
|
||||
context.Writer,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
name,
|
||||
pc,
|
||||
encoding,
|
||||
context.GetReservedStackOffset(),
|
||||
isTail: true);
|
||||
}
|
||||
break;
|
||||
|
||||
case InstName.Ret:
|
||||
int rnIndex = RegisterUtils.ExtractRn(encoding);
|
||||
if (rnIndex == RegisterUtils.ZrIndex)
|
||||
{
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, 0UL);
|
||||
}
|
||||
else
|
||||
{
|
||||
rnIndex = context.RemapGprRegister(rnIndex);
|
||||
context.StoreToContextBeforeCall(blockIndex);
|
||||
|
||||
if (rnIndex != 0)
|
||||
{
|
||||
asm.Mov(Register(0), Register(rnIndex));
|
||||
}
|
||||
|
||||
context.TailMerger.AddUnconditionalReturn(writer, asm);
|
||||
}
|
||||
break;
|
||||
|
||||
case InstName.BCond:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Cbz:
|
||||
case InstName.Tbnz:
|
||||
case InstName.Tbz:
|
||||
uint branchMask;
|
||||
|
||||
if (name == InstName.Tbnz || name == InstName.Tbz)
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
|
||||
branchMask = 0x3fff;
|
||||
}
|
||||
else
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
|
||||
branchMask = 0x7ffff;
|
||||
}
|
||||
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
|
||||
int branchIndex = writer.InstructionPointer;
|
||||
|
||||
writer.WriteInstruction(0u); // Reserved for branch.
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, nextAddress);
|
||||
|
||||
int targetIndex = writer.InstructionPointer;
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)(((targetIndex - branchIndex) & branchMask) << 5));
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
|
||||
break;
|
||||
|
||||
default:
|
||||
Debug.Fail($"Unknown branch instruction \"{name}\".");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void RewriteBranchInstructionWithTarget(
|
||||
in Context context,
|
||||
int blockIndex,
|
||||
InstName name,
|
||||
ulong pc,
|
||||
uint encoding,
|
||||
int branchIndex,
|
||||
Dictionary<ulong, int> targets)
|
||||
{
|
||||
CodeWriter writer = context.Writer;
|
||||
Assembler asm = new(writer);
|
||||
|
||||
int delta;
|
||||
int targetIndex;
|
||||
int originalOffset;
|
||||
ulong targetAddress;
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
|
||||
if (targets.TryGetValue(targetAddress, out targetIndex))
|
||||
{
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
if (delta >= -Encodable26BitsOffsetLimit && delta < Encodable26BitsOffsetLimit)
|
||||
{
|
||||
writer.WriteInstructionAt(branchIndex, (encoding & ~0x3ffffffu) | (uint)(delta & 0x3ffffff));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
targetIndex = writer.InstructionPointer;
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, (encoding & ~0x3ffffffu) | (uint)(delta & 0x3ffffff));
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
|
||||
break;
|
||||
|
||||
case InstName.BCond:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Cbz:
|
||||
case InstName.Tbnz:
|
||||
case InstName.Tbz:
|
||||
uint branchMask;
|
||||
|
||||
if (name == InstName.Tbnz || name == InstName.Tbz)
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
|
||||
branchMask = 0x3fff;
|
||||
}
|
||||
else
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
|
||||
branchMask = 0x7ffff;
|
||||
}
|
||||
|
||||
int branchMax = (int)(branchMask + 1) / 2;
|
||||
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
|
||||
if (targets.TryGetValue(targetAddress, out targetIndex))
|
||||
{
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
if (delta >= -branchMax && delta < branchMax)
|
||||
{
|
||||
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
targetIndex = writer.InstructionPointer;
|
||||
delta = targetIndex - branchIndex;
|
||||
|
||||
if (delta >= -branchMax && delta < branchMax)
|
||||
{
|
||||
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the branch target is too far away, we use a regular unconditional branch
|
||||
// instruction instead which has a much higher range.
|
||||
// We branch directly to the end of the function, where we put the conditional branch,
|
||||
// and then branch back to the next instruction or return the branch target depending
|
||||
// on the branch being taken or not.
|
||||
|
||||
uint branchInst = 0x14000000u | ((uint)delta & 0x3ffffff);
|
||||
Debug.Assert(ImmUtils.ExtractSImm26Times4(branchInst) == delta * 4);
|
||||
|
||||
writer.WriteInstructionAt(branchIndex, branchInst);
|
||||
|
||||
int movedBranchIndex = writer.InstructionPointer;
|
||||
|
||||
writer.WriteInstruction(0u); // Placeholder
|
||||
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
|
||||
|
||||
delta = writer.InstructionPointer - movedBranchIndex;
|
||||
|
||||
writer.WriteInstructionAt(movedBranchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
|
||||
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
Debug.Fail($"Unknown branch instruction \"{name}\".");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteTailCallConstant(in Context context, ref Assembler asm, int blockIndex, ulong address)
|
||||
{
|
||||
context.StoreToContextBeforeCall(blockIndex);
|
||||
InstEmitSystem.WriteCallWithGuestAddress(
|
||||
context.Writer,
|
||||
ref asm,
|
||||
context.RegisterAllocator,
|
||||
context.TailMerger,
|
||||
context.WriteEpilogueWithoutContext,
|
||||
context.FuncTable,
|
||||
context.DispatchStubPointer,
|
||||
context.GetReservedStackOffset(),
|
||||
0UL,
|
||||
new Operand(OperandKind.Constant, OperandType.I64, address),
|
||||
isTail: true);
|
||||
}
|
||||
|
||||
private static Operand Register(int register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand(register, RegisterType.Integer, type);
|
||||
}
|
||||
}
|
||||
}
|
384
src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/Decoder.cs
Normal file
384
src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/Decoder.cs
Normal file
@ -0,0 +1,384 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.Graph;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
|
||||
{
|
||||
static class Decoder
|
||||
{
|
||||
private const int MaxInstructionsPerBlock = 1000;
|
||||
|
||||
private const uint NzcvFlags = 0xfu << 28;
|
||||
private const uint CFlag = 0x1u << 29;
|
||||
|
||||
public static MultiBlock DecodeMulti(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address)
|
||||
{
|
||||
List<Block> blocks = new();
|
||||
List<ulong> branchTargets = new();
|
||||
|
||||
RegisterMask useMask = RegisterMask.Zero;
|
||||
|
||||
bool hasHostCall = false;
|
||||
bool hasMemoryInstruction = false;
|
||||
|
||||
while (true)
|
||||
{
|
||||
Block block = Decode(cpuPreset, memoryManager, address, ref useMask, ref hasHostCall, ref hasMemoryInstruction);
|
||||
|
||||
if (!block.IsTruncated && TryGetBranchTarget(block, out ulong targetAddress))
|
||||
{
|
||||
branchTargets.Add(targetAddress);
|
||||
}
|
||||
|
||||
blocks.Add(block);
|
||||
|
||||
if (block.IsTruncated || !HasNextBlock(block, block.EndAddress - 4UL, branchTargets))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
address = block.EndAddress;
|
||||
}
|
||||
|
||||
branchTargets.Sort();
|
||||
SplitBlocks(blocks, branchTargets);
|
||||
NumberAndLinkBlocks(blocks);
|
||||
|
||||
return new(blocks, useMask, hasHostCall, hasMemoryInstruction);
|
||||
}
|
||||
|
||||
private static bool TryGetBranchTarget(Block block, out ulong targetAddress)
|
||||
{
|
||||
return TryGetBranchTarget(block.Instructions[^1].Name, block.EndAddress - 4UL, block.Instructions[^1].Encoding, out targetAddress);
|
||||
}
|
||||
|
||||
private static bool TryGetBranchTarget(InstName name, ulong pc, uint encoding, out ulong targetAddress)
|
||||
{
|
||||
int originalOffset;
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
|
||||
return true;
|
||||
|
||||
case InstName.BCond:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Cbz:
|
||||
case InstName.Tbnz:
|
||||
case InstName.Tbz:
|
||||
if (name == InstName.Tbnz || name == InstName.Tbz)
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
|
||||
}
|
||||
else
|
||||
{
|
||||
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
|
||||
}
|
||||
|
||||
targetAddress = pc + (ulong)originalOffset;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
targetAddress = 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void SplitBlocks(List<Block> blocks, List<ulong> branchTargets)
|
||||
{
|
||||
int btIndex = 0;
|
||||
|
||||
while (btIndex < branchTargets.Count)
|
||||
{
|
||||
for (int blockIndex = 0; blockIndex < blocks.Count && btIndex < branchTargets.Count; blockIndex++)
|
||||
{
|
||||
Block block = blocks[blockIndex];
|
||||
ulong currentBranchTarget = branchTargets[btIndex];
|
||||
|
||||
while (currentBranchTarget >= block.Address && currentBranchTarget < block.EndAddress)
|
||||
{
|
||||
if (block.Address != currentBranchTarget)
|
||||
{
|
||||
(Block leftBlock, Block rightBlock) = block.SplitAtAddress(currentBranchTarget);
|
||||
|
||||
blocks.Insert(blockIndex, leftBlock);
|
||||
blocks[blockIndex + 1] = rightBlock;
|
||||
|
||||
block = leftBlock;
|
||||
}
|
||||
|
||||
btIndex++;
|
||||
|
||||
while (btIndex < branchTargets.Count && branchTargets[btIndex] == currentBranchTarget)
|
||||
{
|
||||
btIndex++;
|
||||
}
|
||||
|
||||
if (btIndex >= branchTargets.Count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
currentBranchTarget = branchTargets[btIndex];
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(btIndex < int.MaxValue);
|
||||
btIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
private static void NumberAndLinkBlocks(List<Block> blocks)
|
||||
{
|
||||
Dictionary<ulong, Block> blocksByAddress = new();
|
||||
|
||||
for (int blockIndex = 0; blockIndex < blocks.Count; blockIndex++)
|
||||
{
|
||||
Block block = blocks[blockIndex];
|
||||
|
||||
blocksByAddress.Add(block.Address, block);
|
||||
}
|
||||
|
||||
for (int blockIndex = 0; blockIndex < blocks.Count; blockIndex++)
|
||||
{
|
||||
Block block = blocks[blockIndex];
|
||||
|
||||
block.Number(blockIndex);
|
||||
|
||||
if (!block.IsTruncated)
|
||||
{
|
||||
bool hasNext = !block.EndsWithBranch;
|
||||
bool hasBranch = false;
|
||||
|
||||
switch (block.Instructions[^1].Name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
hasBranch = true;
|
||||
break;
|
||||
|
||||
case InstName.BCond:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Cbz:
|
||||
case InstName.Tbnz:
|
||||
case InstName.Tbz:
|
||||
hasNext = true;
|
||||
hasBranch = true;
|
||||
break;
|
||||
|
||||
case InstName.Bl:
|
||||
case InstName.Blr:
|
||||
hasNext = true;
|
||||
break;
|
||||
|
||||
case InstName.Ret:
|
||||
hasNext = false;
|
||||
hasBranch = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasNext && blocksByAddress.TryGetValue(block.EndAddress, out Block nextBlock))
|
||||
{
|
||||
block.AddSuccessor(nextBlock);
|
||||
nextBlock.AddPredecessor(block);
|
||||
}
|
||||
|
||||
if (hasBranch &&
|
||||
TryGetBranchTarget(block, out ulong targetAddress) &&
|
||||
blocksByAddress.TryGetValue(targetAddress, out Block branchBlock))
|
||||
{
|
||||
block.AddSuccessor(branchBlock);
|
||||
branchBlock.AddPredecessor(block);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool HasNextBlock(in Block block, ulong pc, List<ulong> branchTargets)
|
||||
{
|
||||
switch (block.Instructions[^1].Name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
return branchTargets.Contains(pc + 4UL) ||
|
||||
(TryGetBranchTarget(block, out ulong targetAddress) && targetAddress >= pc && targetAddress < pc + 0x1000);
|
||||
|
||||
case InstName.BCond:
|
||||
case InstName.Bl:
|
||||
case InstName.Blr:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Cbz:
|
||||
case InstName.Tbnz:
|
||||
case InstName.Tbz:
|
||||
return true;
|
||||
|
||||
case InstName.Br:
|
||||
return false;
|
||||
|
||||
case InstName.Ret:
|
||||
return branchTargets.Contains(pc + 4UL);
|
||||
}
|
||||
|
||||
return !block.EndsWithBranch;
|
||||
}
|
||||
|
||||
private static Block Decode(
|
||||
CpuPreset cpuPreset,
|
||||
IMemoryManager memoryManager,
|
||||
ulong address,
|
||||
ref RegisterMask useMask,
|
||||
ref bool hasHostCall,
|
||||
ref bool hasMemoryInstruction)
|
||||
{
|
||||
ulong startAddress = address;
|
||||
|
||||
List<InstInfo> insts = new();
|
||||
|
||||
uint gprUseMask = useMask.GprMask;
|
||||
uint fpSimdUseMask = useMask.FpSimdMask;
|
||||
uint pStateUseMask = useMask.PStateMask;
|
||||
|
||||
uint encoding;
|
||||
InstName name;
|
||||
InstFlags flags;
|
||||
bool isControlFlow;
|
||||
bool isTruncated = false;
|
||||
|
||||
do
|
||||
{
|
||||
encoding = memoryManager.Read<uint>(address);
|
||||
address += 4UL;
|
||||
|
||||
(name, flags, AddressForm addressForm) = InstTable.GetInstNameAndFlags(encoding, cpuPreset.Version, cpuPreset.Features);
|
||||
|
||||
if (name.IsPrivileged())
|
||||
{
|
||||
name = InstName.UdfPermUndef;
|
||||
flags = InstFlags.None;
|
||||
addressForm = AddressForm.None;
|
||||
}
|
||||
|
||||
(uint instGprReadMask, uint instFpSimdReadMask) = RegisterUtils.PopulateReadMasks(name, flags, encoding);
|
||||
(uint instGprWriteMask, uint instFpSimdWriteMask) = RegisterUtils.PopulateWriteMasks(name, flags, encoding);
|
||||
|
||||
if (name.IsCall())
|
||||
{
|
||||
instGprWriteMask |= 1u << RegisterUtils.LrIndex;
|
||||
}
|
||||
|
||||
uint tempGprUseMask = gprUseMask | instGprReadMask | instGprWriteMask;
|
||||
|
||||
if (CalculateAvailableTemps(tempGprUseMask) < CalculateRequiredGprTemps(tempGprUseMask) || insts.Count >= MaxInstructionsPerBlock)
|
||||
{
|
||||
isTruncated = true;
|
||||
address -= 4UL;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
gprUseMask = tempGprUseMask;
|
||||
|
||||
uint instPStateReadMask = 0;
|
||||
uint instPStateWriteMask = 0;
|
||||
|
||||
if (flags.HasFlag(InstFlags.Nzcv) || IsMrsNzcv(encoding))
|
||||
{
|
||||
instPStateReadMask = NzcvFlags;
|
||||
}
|
||||
else if (flags.HasFlag(InstFlags.C))
|
||||
{
|
||||
instPStateReadMask = CFlag;
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.S) || IsMsrNzcv(encoding))
|
||||
{
|
||||
instPStateWriteMask = NzcvFlags;
|
||||
}
|
||||
|
||||
if (flags.HasFlag(InstFlags.Memory) || name == InstName.Sys)
|
||||
{
|
||||
hasMemoryInstruction = true;
|
||||
}
|
||||
|
||||
fpSimdUseMask |= instFpSimdReadMask | instFpSimdWriteMask;
|
||||
pStateUseMask |= instPStateReadMask | instPStateWriteMask;
|
||||
|
||||
if (name.IsSystemOrCall() && !hasHostCall)
|
||||
{
|
||||
hasHostCall = name.IsCall() || InstEmitSystem.NeedsCall(encoding);
|
||||
}
|
||||
|
||||
isControlFlow = name.IsControlFlowOrException();
|
||||
|
||||
RegisterUse registerUse = new(
|
||||
instGprReadMask,
|
||||
instGprWriteMask,
|
||||
instFpSimdReadMask,
|
||||
instFpSimdWriteMask,
|
||||
instPStateReadMask,
|
||||
instPStateWriteMask);
|
||||
|
||||
insts.Add(new(encoding, name, flags, addressForm, registerUse));
|
||||
}
|
||||
while (!isControlFlow);
|
||||
|
||||
bool isLoopEnd = false;
|
||||
|
||||
if (!isTruncated && IsBackwardsBranch(name, encoding))
|
||||
{
|
||||
hasHostCall = true;
|
||||
isLoopEnd = true;
|
||||
}
|
||||
|
||||
useMask = new(gprUseMask, fpSimdUseMask, pStateUseMask);
|
||||
|
||||
return new(startAddress, address, insts, !isTruncated && !name.IsException(), isTruncated, isLoopEnd);
|
||||
}
|
||||
|
||||
private static bool IsMrsNzcv(uint encoding)
|
||||
{
|
||||
return (encoding & ~0x1fu) == 0xd53b4200u;
|
||||
}
|
||||
|
||||
private static bool IsMsrNzcv(uint encoding)
|
||||
{
|
||||
return (encoding & ~0x1fu) == 0xd51b4200u;
|
||||
}
|
||||
|
||||
private static bool IsBackwardsBranch(InstName name, uint encoding)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
return ImmUtils.ExtractSImm26Times4(encoding) < 0;
|
||||
|
||||
case InstName.BCond:
|
||||
case InstName.Cbnz:
|
||||
case InstName.Cbz:
|
||||
case InstName.Tbnz:
|
||||
case InstName.Tbz:
|
||||
int imm = name == InstName.Tbnz || name == InstName.Tbz
|
||||
? ImmUtils.ExtractSImm14Times4(encoding)
|
||||
: ImmUtils.ExtractSImm19Times4(encoding);
|
||||
|
||||
return imm < 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static int CalculateRequiredGprTemps(uint gprUseMask)
|
||||
{
|
||||
return BitOperations.PopCount(gprUseMask & RegisterUtils.ReservedRegsMask) + RegisterAllocator.MaxTempsInclFixed;
|
||||
}
|
||||
|
||||
private static int CalculateAvailableTemps(uint gprUseMask)
|
||||
{
|
||||
return BitOperations.PopCount(~(gprUseMask | RegisterUtils.ReservedRegsMask));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,593 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
|
||||
{
|
||||
static class InstEmitMemory
|
||||
{
|
||||
private const uint XMask = 0x3f808000u;
|
||||
private const uint XValue = 0x8000000u;
|
||||
|
||||
public static void RewriteSysInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
|
||||
{
|
||||
int rtIndex = RegisterUtils.ExtractRt(encoding);
|
||||
if (rtIndex == RegisterUtils.ZrIndex)
|
||||
{
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rt = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
Operand guestAddress = new(rtIndex, RegisterType.Integer, OperandType.I64);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rt, guestAddress);
|
||||
|
||||
encoding = RegisterUtils.ReplaceRt(encoding, tempRegister);
|
||||
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
public static void RewriteInstruction(
|
||||
int asBits,
|
||||
MemoryManagerType mmType,
|
||||
CodeWriter writer,
|
||||
RegisterAllocator regAlloc,
|
||||
InstName name,
|
||||
InstFlags flags,
|
||||
AddressForm addressForm,
|
||||
ulong pc,
|
||||
uint encoding)
|
||||
{
|
||||
switch (addressForm)
|
||||
{
|
||||
case AddressForm.OffsetReg:
|
||||
RewriteOffsetRegMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
|
||||
break;
|
||||
case AddressForm.PostIndexed:
|
||||
RewritePostIndexedMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
|
||||
break;
|
||||
case AddressForm.PreIndexed:
|
||||
RewritePreIndexedMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
|
||||
break;
|
||||
case AddressForm.SignedScaled:
|
||||
RewriteSignedScaledMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
|
||||
break;
|
||||
case AddressForm.UnsignedScaled:
|
||||
RewriteUnsignedScaledMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
|
||||
break;
|
||||
case AddressForm.BaseRegister:
|
||||
// Some applications uses unordered memory instructions in places where
|
||||
// it does need proper ordering, and only work on some CPUs.
|
||||
// To work around this, make all exclusive access operations ordered.
|
||||
|
||||
if ((encoding & XMask) == XValue)
|
||||
{
|
||||
// Set ordered flag.
|
||||
encoding |= 1u << 15;
|
||||
}
|
||||
|
||||
RewriteBaseRegisterMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
|
||||
break;
|
||||
case AddressForm.StructNoOffset:
|
||||
RewriteBaseRegisterMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
|
||||
break;
|
||||
case AddressForm.BasePlusOffset:
|
||||
RewriteBasePlusOffsetMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
|
||||
break;
|
||||
case AddressForm.Literal:
|
||||
RewriteLiteralMemoryInstruction(asBits, mmType, writer, regAlloc, name, pc, encoding);
|
||||
break;
|
||||
case AddressForm.StructPostIndexedReg:
|
||||
RewriteStructPostIndexedRegMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
|
||||
break;
|
||||
default:
|
||||
writer.WriteInstruction(encoding);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void RewriteOffsetRegMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
|
||||
{
|
||||
// TODO: Some unallocated encoding cases.
|
||||
|
||||
ArmExtensionType extensionType = (ArmExtensionType)((encoding >> 13) & 7);
|
||||
|
||||
uint size = encoding >> 30;
|
||||
|
||||
if (flags.HasFlag(InstFlags.FpSimd))
|
||||
{
|
||||
size |= (encoding >> 21) & 4u;
|
||||
}
|
||||
|
||||
int shift = (encoding & (1u << 12)) != 0 ? (int)size : 0;
|
||||
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
|
||||
Operand guestOffset = new(RegisterUtils.ExtractRm(encoding), RegisterType.Integer, OperandType.I64);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
asm.Add(rn, guestAddress, guestOffset, extensionType, shift);
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, rn);
|
||||
|
||||
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
|
||||
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24); // Register -> Unsigned offset
|
||||
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private static void RewritePostIndexedMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
|
||||
{
|
||||
bool isPair = flags.HasFlag(InstFlags.Rt2);
|
||||
int imm = isPair ? ExtractSImm7Scaled(flags, encoding) : ExtractSImm9(encoding);
|
||||
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
|
||||
|
||||
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
|
||||
|
||||
if (isPair)
|
||||
{
|
||||
// Post-index -> Signed offset
|
||||
encoding &= ~(0x7fu << 15);
|
||||
encoding ^= 3u << 23;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Post-index -> Unsigned offset
|
||||
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24);
|
||||
}
|
||||
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
WriteAddConstant(ref asm, guestAddress, guestAddress, imm);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private static void RewritePreIndexedMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
|
||||
{
|
||||
bool isPair = flags.HasFlag(InstFlags.Rt2);
|
||||
int imm = isPair ? ExtractSImm7Scaled(flags, encoding) : ExtractSImm9(encoding);
|
||||
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteAddConstant(ref asm, guestAddress, guestAddress, imm);
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
|
||||
|
||||
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
|
||||
|
||||
if (isPair)
|
||||
{
|
||||
// Pre-index -> Signed offset
|
||||
encoding &= ~(0x7fu << 15);
|
||||
encoding &= ~(1u << 23);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pre-index -> Unsigned offset
|
||||
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24);
|
||||
}
|
||||
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private static void RewriteSignedScaledMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
|
||||
{
|
||||
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractSImm7Scaled(flags, encoding), 0x7fu << 15);
|
||||
}
|
||||
|
||||
private static void RewriteUnsignedScaledMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
|
||||
{
|
||||
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractUImm12Scaled(flags, encoding), 0xfffu << 10);
|
||||
}
|
||||
|
||||
private static void RewriteBaseRegisterMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
|
||||
{
|
||||
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, 0, 0u);
|
||||
}
|
||||
|
||||
private static void RewriteBasePlusOffsetMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
|
||||
{
|
||||
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractSImm9(encoding), 0x1ffu << 12);
|
||||
}
|
||||
|
||||
private static void RewriteMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding, int imm, uint immMask)
|
||||
{
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
bool canFoldOffset = CanFoldOffset(mmType, imm);
|
||||
if (canFoldOffset)
|
||||
{
|
||||
imm = 0;
|
||||
}
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress, imm);
|
||||
|
||||
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
|
||||
|
||||
if (!canFoldOffset)
|
||||
{
|
||||
encoding &= ~immMask; // Clear offset
|
||||
}
|
||||
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private static void RewriteLiteralMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstName name, ulong pc, uint encoding)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
ulong targetAddress;
|
||||
long imm;
|
||||
int rtIndex = (int)(encoding & 0x1f);
|
||||
|
||||
if (rtIndex == RegisterUtils.ZrIndex && name != InstName.PrfmLit)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand rt;
|
||||
|
||||
if (name == InstName.LdrLitFpsimd)
|
||||
{
|
||||
uint opc = encoding >> 30;
|
||||
|
||||
// TODO: Undefined if opc is invalid?
|
||||
|
||||
rt = new(rtIndex, RegisterType.Vector, opc switch
|
||||
{
|
||||
0 => OperandType.FP32,
|
||||
1 => OperandType.FP64,
|
||||
_ => OperandType.V128,
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
rt = new(rtIndex, RegisterType.Integer, OperandType.I64);
|
||||
}
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Adr:
|
||||
case InstName.Adrp:
|
||||
imm = ((long)(encoding >> 29) & 3) | ((long)(encoding >> 3) & 0x1ffffc);
|
||||
imm <<= 43;
|
||||
|
||||
if (name == InstName.Adrp)
|
||||
{
|
||||
imm >>= 31;
|
||||
targetAddress = (pc & ~0xfffUL) + (ulong)imm;
|
||||
}
|
||||
else
|
||||
{
|
||||
imm >>= 43;
|
||||
targetAddress = pc + (ulong)imm;
|
||||
}
|
||||
|
||||
asm.Mov(rt, targetAddress);
|
||||
break;
|
||||
case InstName.LdrLitGen:
|
||||
case InstName.LdrswLit:
|
||||
case InstName.LdrLitFpsimd:
|
||||
case InstName.PrfmLit:
|
||||
imm = encoding & ~0x1fu;
|
||||
imm <<= 40;
|
||||
imm >>= 43;
|
||||
targetAddress = pc + (ulong)imm;
|
||||
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, targetAddress);
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.LdrLitGen:
|
||||
case InstName.LdrLitFpsimd:
|
||||
asm.LdrRiUn(rt, rn, 0);
|
||||
break;
|
||||
case InstName.LdrswLit:
|
||||
asm.LdrswRiUn(rt, rn, 0);
|
||||
break;
|
||||
case InstName.PrfmLit:
|
||||
asm.PrfmR(rt, rn);
|
||||
break;
|
||||
}
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
break;
|
||||
default:
|
||||
Debug.Fail($"Invalid literal memory instruction '{name}'.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void RewriteStructPostIndexedRegMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
|
||||
{
|
||||
// TODO: Some unallocated encoding cases.
|
||||
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
|
||||
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
|
||||
|
||||
int rmIndex = RegisterUtils.ExtractRm(encoding);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
|
||||
|
||||
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
|
||||
encoding &= ~((0x1fu << 16) | (1u << 23)); // Post-index -> No offset
|
||||
|
||||
writer.WriteInstruction(encoding);
|
||||
|
||||
if (rmIndex == RegisterUtils.ZrIndex)
|
||||
{
|
||||
bool isSingleStruct = (encoding & (1u << 24)) != 0;
|
||||
int offset;
|
||||
|
||||
if (isSingleStruct)
|
||||
{
|
||||
int sElems = (int)(((encoding >> 12) & 2u) | ((encoding >> 21) & 1u)) + 1;
|
||||
|
||||
int size = (int)(encoding >> 10) & 3;
|
||||
int s = (int)(encoding >> 12) & 1;
|
||||
int scale = (int)(encoding >> 14) & 3;
|
||||
int l = (int)(encoding >> 22) & 1;
|
||||
|
||||
switch (scale)
|
||||
{
|
||||
case 1:
|
||||
if ((size & 1) != 0)
|
||||
{
|
||||
// Undef.
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if ((size & 2) != 0 ||
|
||||
((size & 1) != 0 && s != 0))
|
||||
{
|
||||
// Undef.
|
||||
}
|
||||
|
||||
if ((size & 1) != 0)
|
||||
{
|
||||
scale = 3;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 3:
|
||||
if (l == 0 || s != 0)
|
||||
{
|
||||
// Undef.
|
||||
}
|
||||
|
||||
scale = size;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
int eBytes = 1 << scale;
|
||||
|
||||
offset = eBytes * sElems;
|
||||
}
|
||||
else
|
||||
{
|
||||
int reps;
|
||||
int sElems;
|
||||
|
||||
switch ((encoding >> 12) & 0xf)
|
||||
{
|
||||
case 0b0000:
|
||||
reps = 1;
|
||||
sElems = 4;
|
||||
break;
|
||||
case 0b0010:
|
||||
reps = 4;
|
||||
sElems = 1;
|
||||
break;
|
||||
case 0b0100:
|
||||
reps = 1;
|
||||
sElems = 3;
|
||||
break;
|
||||
case 0b0110:
|
||||
reps = 3;
|
||||
sElems = 1;
|
||||
break;
|
||||
case 0b0111:
|
||||
reps = 1;
|
||||
sElems = 1;
|
||||
break;
|
||||
case 0b1000:
|
||||
reps = 1;
|
||||
sElems = 2;
|
||||
break;
|
||||
case 0b1010:
|
||||
reps = 2;
|
||||
sElems = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
// Undef.
|
||||
reps = 0;
|
||||
sElems = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
int size = (int)(encoding >> 10) & 3;
|
||||
bool q = (encoding & (1u << 30)) != 0;
|
||||
|
||||
if (!q && size == 3 && sElems != 1)
|
||||
{
|
||||
// Undef.
|
||||
}
|
||||
|
||||
offset = reps * (q ? 16 : 8) * sElems;
|
||||
}
|
||||
|
||||
asm.Add(guestAddress, guestAddress, new Operand(OperandKind.Constant, OperandType.I32, (ulong)offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand guestOffset = new(rmIndex, RegisterType.Integer, OperandType.I64);
|
||||
|
||||
asm.Add(guestAddress, guestAddress, guestOffset);
|
||||
}
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
private static void WriteAddressTranslation(
|
||||
int asBits,
|
||||
MemoryManagerType mmType,
|
||||
RegisterAllocator regAlloc,
|
||||
ref Assembler asm,
|
||||
Operand destination,
|
||||
Operand guestAddress,
|
||||
int offset)
|
||||
{
|
||||
if (offset != 0)
|
||||
{
|
||||
// They are assumed to be on different registers, otherwise this operation will thrash the address.
|
||||
Debug.Assert(destination.Value != guestAddress.Value);
|
||||
|
||||
if (Math.Abs(offset) >= 0x1000)
|
||||
{
|
||||
// Too high to encode as 12-bit immediate, do a separate move.
|
||||
asm.Mov(destination, (ulong)offset);
|
||||
asm.Add(destination, destination, guestAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Encode as 12-bit immediate.
|
||||
WriteAddConstant(ref asm, destination, guestAddress, offset);
|
||||
}
|
||||
|
||||
guestAddress = destination;
|
||||
}
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, destination, guestAddress);
|
||||
}
|
||||
|
||||
private static void WriteAddressTranslation(int asBits, MemoryManagerType mmType, RegisterAllocator regAlloc, ref Assembler asm, Operand destination, ulong guestAddress)
|
||||
{
|
||||
asm.Mov(destination, guestAddress);
|
||||
|
||||
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, destination, destination);
|
||||
}
|
||||
|
||||
private static void WriteAddressTranslation(int asBits, MemoryManagerType mmType, RegisterAllocator regAlloc, ref Assembler asm, Operand destination, Operand guestAddress)
|
||||
{
|
||||
Operand basePointer = new(regAlloc.FixedPageTableRegister, RegisterType.Integer, OperandType.I64);
|
||||
|
||||
if (mmType == MemoryManagerType.HostMapped || mmType == MemoryManagerType.HostMappedUnsafe)
|
||||
{
|
||||
if (mmType == MemoryManagerType.HostMapped)
|
||||
{
|
||||
asm.And(destination, guestAddress, new Operand(OperandKind.Constant, OperandType.I64, ulong.MaxValue >> (64 - asBits)));
|
||||
guestAddress = destination;
|
||||
}
|
||||
|
||||
asm.Add(destination, basePointer, guestAddress);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new NotImplementedException(mmType.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteAddConstant(ref Assembler asm, Operand rd, Operand rn, int value)
|
||||
{
|
||||
if (value < 0)
|
||||
{
|
||||
asm.Sub(rd, rn, new Operand(OperandKind.Constant, OperandType.I32, (ulong)-value));
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Add(rd, rn, new Operand(OperandKind.Constant, OperandType.I32, (ulong)value));
|
||||
}
|
||||
}
|
||||
|
||||
private static bool CanFoldOffset(MemoryManagerType mmType, int offset)
|
||||
{
|
||||
return mmType == MemoryManagerType.HostMappedUnsafe;
|
||||
}
|
||||
|
||||
private static int ExtractSImm7Scaled(InstFlags flags, uint encoding)
|
||||
{
|
||||
uint opc = flags.HasFlag(InstFlags.FpSimd) ? encoding >> 30 : encoding >> 31;
|
||||
return ExtractSImm7(encoding) << (int)(2 + opc);
|
||||
}
|
||||
|
||||
private static int ExtractSImm7(uint encoding)
|
||||
{
|
||||
int imm = (int)(encoding >> 15);
|
||||
|
||||
imm <<= 25;
|
||||
imm >>= 25;
|
||||
|
||||
return imm;
|
||||
}
|
||||
|
||||
private static int ExtractSImm9(uint encoding)
|
||||
{
|
||||
int imm = (int)(encoding >> 12);
|
||||
|
||||
imm <<= 23;
|
||||
imm >>= 23;
|
||||
|
||||
return imm;
|
||||
}
|
||||
|
||||
private static int ExtractUImm12Scaled(InstFlags flags, uint encoding)
|
||||
{
|
||||
uint size = encoding >> 30;
|
||||
|
||||
if (flags.HasFlag(InstFlags.FpSimd))
|
||||
{
|
||||
size |= (encoding >> 21) & 4u;
|
||||
}
|
||||
|
||||
return ExtractUImm12(encoding) << (int)size;
|
||||
}
|
||||
|
||||
private static int ExtractUImm12(uint encoding)
|
||||
{
|
||||
return (int)(encoding >> 10) & 0xfff;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,610 @@
|
||||
using ARMeilleure.Common;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen;
|
||||
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
|
||||
{
|
||||
static class InstEmitSystem
|
||||
{
|
||||
private delegate void SoftwareInterruptHandler(ulong address, int imm);
|
||||
private delegate ulong Get64();
|
||||
private delegate bool GetBool();
|
||||
|
||||
public static void RewriteInstruction(
|
||||
CodeWriter writer,
|
||||
RegisterAllocator regAlloc,
|
||||
TailMerger tailMerger,
|
||||
InstName name,
|
||||
ulong pc,
|
||||
uint encoding,
|
||||
int spillBaseOffset)
|
||||
{
|
||||
if (name == InstName.Brk)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetBrkHandlerPtr(), spillBaseOffset, null, pc, encoding);
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
|
||||
}
|
||||
else if (name == InstName.Svc)
|
||||
{
|
||||
uint svcId = (ushort)(encoding >> 5);
|
||||
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetSvcHandlerPtr(), spillBaseOffset, null, pc, svcId);
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
|
||||
}
|
||||
else if (name == InstName.UdfPermUndef)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetUdfHandlerPtr(), spillBaseOffset, null, pc, encoding);
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd53bd060) // mrs x0, tpidrro_el0
|
||||
{
|
||||
uint rd = encoding & 0x1f;
|
||||
|
||||
if (rd != RegisterUtils.ZrIndex)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
asm.LdrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrroEl0Offset);
|
||||
}
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd53bd040) // mrs x0, tpidr_el0
|
||||
{
|
||||
uint rd = encoding & 0x1f;
|
||||
|
||||
if (rd != RegisterUtils.ZrIndex)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
asm.LdrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrEl0Offset);
|
||||
}
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd53b0020 && IsAppleOS()) // mrs x0, ctr_el0
|
||||
{
|
||||
uint rd = encoding & 0x1f;
|
||||
|
||||
if (rd != RegisterUtils.ZrIndex)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
// TODO: Use host value? But that register can't be accessed on macOS...
|
||||
asm.Mov(Register((int)rd, OperandType.I32), 0x8444c004);
|
||||
}
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
|
||||
{
|
||||
uint rd = encoding & 0x1f;
|
||||
|
||||
if (rd != RegisterUtils.ZrIndex)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteCall(ref asm, regAlloc, GetCntpctEl0Ptr(), spillBaseOffset, (int)rd);
|
||||
}
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd51bd040) // msr tpidr_el0, x0
|
||||
{
|
||||
uint rd = encoding & 0x1f;
|
||||
|
||||
if (rd != RegisterUtils.ZrIndex)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
asm.StrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrEl0Offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.WriteInstruction(encoding);
|
||||
}
|
||||
}
|
||||
|
||||
public static bool NeedsCall(uint encoding)
|
||||
{
|
||||
if ((encoding & ~(0xffffu << 5)) == 0xd4000001u) // svc #0
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd53b0020 && IsAppleOS()) // mrs x0, ctr_el0
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if ((encoding & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsAppleOS()
|
||||
{
|
||||
return OperatingSystem.IsMacOS() || OperatingSystem.IsIOS();
|
||||
}
|
||||
|
||||
public static bool NeedsContextStoreLoad(InstName name)
|
||||
{
|
||||
return name == InstName.Svc;
|
||||
}
|
||||
|
||||
private static IntPtr GetBrkHandlerPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Break);
|
||||
}
|
||||
|
||||
private static IntPtr GetSvcHandlerPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.SupervisorCall);
|
||||
}
|
||||
|
||||
private static IntPtr GetUdfHandlerPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Undefined);
|
||||
}
|
||||
|
||||
private static IntPtr GetCntpctEl0Ptr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<Get64>(NativeInterface.GetCntpctEl0);
|
||||
}
|
||||
|
||||
private static IntPtr CheckSynchronizationPtr()
|
||||
{
|
||||
return Marshal.GetFunctionPointerForDelegate<GetBool>(NativeInterface.CheckSynchronization);
|
||||
}
|
||||
|
||||
public static void WriteSyncPoint(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
|
||||
}
|
||||
|
||||
private static void WriteSyncPoint(CodeWriter writer, ref Assembler asm, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
|
||||
{
|
||||
int tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
|
||||
|
||||
int branchIndex = writer.InstructionPointer;
|
||||
asm.Cbnz(rt, 0);
|
||||
|
||||
WriteSpill(ref asm, regAlloc, 1u << tempRegister, spillBaseOffset, tempRegister);
|
||||
|
||||
Operand rn = Register(tempRegister == 0 ? 1 : 0);
|
||||
|
||||
asm.Mov(rn, (ulong)CheckSynchronizationPtr());
|
||||
asm.Blr(rn);
|
||||
|
||||
tailMerger.AddConditionalZeroReturn(writer, asm, Register(0, OperandType.I32));
|
||||
|
||||
WriteFill(ref asm, regAlloc, 1u << tempRegister, spillBaseOffset, tempRegister);
|
||||
|
||||
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
|
||||
|
||||
uint branchInst = writer.ReadInstructionAt(branchIndex);
|
||||
writer.WriteInstructionAt(branchIndex, branchInst | (((uint)(writer.InstructionPointer - branchIndex) & 0x7ffff) << 5));
|
||||
|
||||
asm.Sub(rt, rt, new Operand(OperandKind.Constant, OperandType.I32, 1));
|
||||
asm.StrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
|
||||
public static void RewriteCallInstruction(
|
||||
CodeWriter writer,
|
||||
RegisterAllocator regAlloc,
|
||||
TailMerger tailMerger,
|
||||
Action writeEpilogue,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr dispatchStubPtr,
|
||||
InstName name,
|
||||
ulong pc,
|
||||
uint encoding,
|
||||
int spillBaseOffset,
|
||||
bool isTail = false)
|
||||
{
|
||||
Assembler asm = new(writer);
|
||||
|
||||
switch (name)
|
||||
{
|
||||
case InstName.BUncond:
|
||||
case InstName.Bl:
|
||||
case InstName.Blr:
|
||||
case InstName.Br:
|
||||
if (name == InstName.BUncond || name == InstName.Bl)
|
||||
{
|
||||
int imm = ImmUtils.ExtractSImm26Times4(encoding);
|
||||
|
||||
WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
regAlloc,
|
||||
tailMerger,
|
||||
writeEpilogue,
|
||||
funcTable,
|
||||
dispatchStubPtr,
|
||||
spillBaseOffset,
|
||||
pc,
|
||||
new(OperandKind.Constant, OperandType.I64, pc + (ulong)imm),
|
||||
isTail);
|
||||
}
|
||||
else
|
||||
{
|
||||
int rnIndex = RegisterUtils.ExtractRn(encoding);
|
||||
if (rnIndex == RegisterUtils.ZrIndex)
|
||||
{
|
||||
WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
regAlloc,
|
||||
tailMerger,
|
||||
writeEpilogue,
|
||||
funcTable,
|
||||
dispatchStubPtr,
|
||||
spillBaseOffset,
|
||||
pc,
|
||||
new(OperandKind.Constant, OperandType.I64, 0UL),
|
||||
isTail);
|
||||
}
|
||||
else
|
||||
{
|
||||
rnIndex = regAlloc.RemapReservedGprRegister(rnIndex);
|
||||
|
||||
WriteCallWithGuestAddress(
|
||||
writer,
|
||||
ref asm,
|
||||
regAlloc,
|
||||
tailMerger,
|
||||
writeEpilogue,
|
||||
funcTable,
|
||||
dispatchStubPtr,
|
||||
spillBaseOffset,
|
||||
pc,
|
||||
Register(rnIndex),
|
||||
isTail);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
Debug.Fail($"Unknown branch instruction \"{name}\".");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe static void WriteCallWithGuestAddress(
|
||||
CodeWriter writer,
|
||||
ref Assembler asm,
|
||||
RegisterAllocator regAlloc,
|
||||
TailMerger tailMerger,
|
||||
Action writeEpilogue,
|
||||
AddressTable<ulong> funcTable,
|
||||
IntPtr funcPtr,
|
||||
int spillBaseOffset,
|
||||
ulong pc,
|
||||
Operand guestAddress,
|
||||
bool isTail = false)
|
||||
{
|
||||
int tempRegister;
|
||||
|
||||
if (guestAddress.Kind == OperandKind.Constant)
|
||||
{
|
||||
tempRegister = regAlloc.AllocateTempGprRegister();
|
||||
|
||||
asm.Mov(Register(tempRegister), guestAddress.Value);
|
||||
asm.StrRiUn(Register(tempRegister), Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
|
||||
|
||||
regAlloc.FreeTempGprRegister(tempRegister);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
|
||||
}
|
||||
|
||||
tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1;
|
||||
|
||||
if (!isTail)
|
||||
{
|
||||
WriteSpillSkipContext(ref asm, regAlloc, spillBaseOffset);
|
||||
}
|
||||
|
||||
Operand rn = Register(tempRegister);
|
||||
|
||||
if (regAlloc.FixedContextRegister != 0)
|
||||
{
|
||||
asm.Mov(Register(0), Register(regAlloc.FixedContextRegister));
|
||||
}
|
||||
|
||||
if (guestAddress.Kind == OperandKind.Constant && funcTable != null)
|
||||
{
|
||||
ulong funcPtrLoc = (ulong)Unsafe.AsPointer(ref funcTable.GetValue(guestAddress.Value));
|
||||
|
||||
asm.Mov(rn, funcPtrLoc & ~0xfffUL);
|
||||
asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL));
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Mov(rn, (ulong)funcPtr);
|
||||
}
|
||||
|
||||
if (isTail)
|
||||
{
|
||||
writeEpilogue();
|
||||
asm.Br(rn);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Blr(rn);
|
||||
|
||||
ulong nextAddress = pc + 4UL;
|
||||
|
||||
asm.Mov(rn, nextAddress);
|
||||
asm.Cmp(Register(0), rn);
|
||||
|
||||
tailMerger.AddConditionalReturn(writer, asm, ArmCondition.Ne);
|
||||
|
||||
WriteFillSkipContext(ref asm, regAlloc, spillBaseOffset);
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteCall(
|
||||
ref Assembler asm,
|
||||
RegisterAllocator regAlloc,
|
||||
IntPtr funcPtr,
|
||||
int spillBaseOffset,
|
||||
int? resultRegister,
|
||||
params ulong[] callArgs)
|
||||
{
|
||||
uint resultMask = 0u;
|
||||
|
||||
if (resultRegister.HasValue)
|
||||
{
|
||||
resultMask = 1u << resultRegister.Value;
|
||||
}
|
||||
|
||||
int tempRegister = callArgs.Length;
|
||||
|
||||
if (resultRegister.HasValue && tempRegister == resultRegister.Value)
|
||||
{
|
||||
tempRegister++;
|
||||
}
|
||||
|
||||
WriteSpill(ref asm, regAlloc, resultMask, spillBaseOffset, tempRegister);
|
||||
|
||||
// We only support up to 7 arguments right now.
|
||||
// ABI defines the first 8 integer arguments to be passed on registers X0-X7.
|
||||
// We need at least one register to put the function address on, so that reduces the number of
|
||||
// registers we can use for that by one.
|
||||
|
||||
Debug.Assert(callArgs.Length < 8);
|
||||
|
||||
for (int index = 0; index < callArgs.Length; index++)
|
||||
{
|
||||
asm.Mov(Register(index), callArgs[index]);
|
||||
}
|
||||
|
||||
Operand rn = Register(tempRegister);
|
||||
|
||||
asm.Mov(rn, (ulong)funcPtr);
|
||||
asm.Blr(rn);
|
||||
|
||||
if (resultRegister.HasValue && resultRegister.Value != 0)
|
||||
{
|
||||
asm.Mov(Register(resultRegister.Value), Register(0));
|
||||
}
|
||||
|
||||
WriteFill(ref asm, regAlloc, resultMask, spillBaseOffset, tempRegister);
|
||||
}
|
||||
|
||||
private static void WriteSpill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, int spillOffset, int tempRegister)
|
||||
{
|
||||
WriteSpillOrFill(ref asm, regAlloc, exceptMask, spillOffset, tempRegister, spill: true);
|
||||
}
|
||||
|
||||
private static void WriteFill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, int spillOffset, int tempRegister)
|
||||
{
|
||||
WriteSpillOrFill(ref asm, regAlloc, exceptMask, spillOffset, tempRegister, spill: false);
|
||||
}
|
||||
|
||||
private static void WriteSpillOrFill(
|
||||
ref Assembler asm,
|
||||
RegisterAllocator regAlloc,
|
||||
uint exceptMask,
|
||||
int spillOffset,
|
||||
int tempRegister,
|
||||
bool spill)
|
||||
{
|
||||
uint gprMask = regAlloc.AllGprMask & ~(AbiConstants.GprCalleeSavedRegsMask | exceptMask);
|
||||
|
||||
if (regAlloc.AllPStateMask != 0 && !spill)
|
||||
{
|
||||
// We must reload the status register before reloading the GPRs,
|
||||
// since we might otherwise trash one of them by using it as temp register.
|
||||
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.LdrRiUn(rt, Register(RegisterUtils.SpIndex), spillOffset + BitOperations.PopCount(gprMask) * 8);
|
||||
asm.MsrNzcv(rt);
|
||||
}
|
||||
|
||||
while (gprMask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(gprMask);
|
||||
|
||||
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StpRiUn(
|
||||
Register(regAlloc.RemapReservedGprRegister(reg)),
|
||||
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
|
||||
Register(RegisterUtils.SpIndex),
|
||||
spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(
|
||||
Register(regAlloc.RemapReservedGprRegister(reg)),
|
||||
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
|
||||
Register(RegisterUtils.SpIndex),
|
||||
spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(3u << reg);
|
||||
spillOffset += 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(1u << reg);
|
||||
spillOffset += 8;
|
||||
}
|
||||
}
|
||||
|
||||
if (regAlloc.AllPStateMask != 0)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
Operand rt = Register(tempRegister, OperandType.I32);
|
||||
|
||||
asm.MrsNzcv(rt);
|
||||
asm.StrRiUn(rt, Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
spillOffset += 8;
|
||||
}
|
||||
|
||||
if ((spillOffset & 8) != 0)
|
||||
{
|
||||
spillOffset += 8;
|
||||
}
|
||||
|
||||
uint fpSimdMask = regAlloc.AllFpSimdMask;
|
||||
|
||||
while (fpSimdMask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(fpSimdMask);
|
||||
|
||||
if (reg < 31 && (fpSimdMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StpRiUn(
|
||||
Register(reg, OperandType.V128),
|
||||
Register(reg + 1, OperandType.V128),
|
||||
Register(RegisterUtils.SpIndex),
|
||||
spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(
|
||||
Register(reg, OperandType.V128),
|
||||
Register(reg + 1, OperandType.V128),
|
||||
Register(RegisterUtils.SpIndex),
|
||||
spillOffset);
|
||||
}
|
||||
|
||||
fpSimdMask &= ~(3u << reg);
|
||||
spillOffset += 32;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StrRiUn(Register(reg, OperandType.V128), Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(reg, OperandType.V128), Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
fpSimdMask &= ~(1u << reg);
|
||||
spillOffset += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteSpillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
|
||||
{
|
||||
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: true);
|
||||
}
|
||||
|
||||
private static void WriteFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
|
||||
{
|
||||
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: false);
|
||||
}
|
||||
|
||||
private static void WriteSpillOrFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset, bool spill)
|
||||
{
|
||||
uint gprMask = regAlloc.AllGprMask & ((1u << regAlloc.FixedContextRegister) | (1u << regAlloc.FixedPageTableRegister));
|
||||
gprMask &= ~AbiConstants.GprCalleeSavedRegsMask;
|
||||
|
||||
while (gprMask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(gprMask);
|
||||
|
||||
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StpRiUn(
|
||||
Register(regAlloc.RemapReservedGprRegister(reg)),
|
||||
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
|
||||
Register(RegisterUtils.SpIndex),
|
||||
spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(
|
||||
Register(regAlloc.RemapReservedGprRegister(reg)),
|
||||
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
|
||||
Register(RegisterUtils.SpIndex),
|
||||
spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(3u << reg);
|
||||
spillOffset += 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (spill)
|
||||
{
|
||||
asm.StrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
|
||||
}
|
||||
|
||||
gprMask &= ~(1u << reg);
|
||||
spillOffset += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand Register(int register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand(register, RegisterType.Integer, type);
|
||||
}
|
||||
}
|
||||
}
|
1605
src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstTable.cs
Normal file
1605
src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstTable.cs
Normal file
File diff suppressed because it is too large
Load Diff
22
src/Ryujinx.Cpu/LightningJit/Cache/CacheEntry.cs
Normal file
22
src/Ryujinx.Cpu/LightningJit/Cache/CacheEntry.cs
Normal file
@ -0,0 +1,22 @@
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
readonly struct CacheEntry : IComparable<CacheEntry>
|
||||
{
|
||||
public int Offset { get; }
|
||||
public int Size { get; }
|
||||
|
||||
public CacheEntry(int offset, int size)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
}
|
||||
|
||||
public int CompareTo([AllowNull] CacheEntry other)
|
||||
{
|
||||
return Offset.CompareTo(other.Offset);
|
||||
}
|
||||
}
|
||||
}
|
136
src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs
Normal file
136
src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs
Normal file
@ -0,0 +1,136 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class CacheMemoryAllocator
|
||||
{
|
||||
private readonly struct MemoryBlock : IComparable<MemoryBlock>
|
||||
{
|
||||
public int Offset { get; }
|
||||
public int Size { get; }
|
||||
|
||||
public MemoryBlock(int offset, int size)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
}
|
||||
|
||||
public int CompareTo([AllowNull] MemoryBlock other)
|
||||
{
|
||||
return Offset.CompareTo(other.Offset);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly List<MemoryBlock> _blocks = new();
|
||||
|
||||
public CacheMemoryAllocator(int capacity)
|
||||
{
|
||||
_blocks.Add(new MemoryBlock(0, capacity));
|
||||
}
|
||||
|
||||
public int Allocate(int size)
|
||||
{
|
||||
for (int i = 0; i < _blocks.Count; i++)
|
||||
{
|
||||
MemoryBlock block = _blocks[i];
|
||||
|
||||
if (block.Size > size)
|
||||
{
|
||||
_blocks[i] = new(block.Offset + size, block.Size - size);
|
||||
return block.Offset;
|
||||
}
|
||||
else if (block.Size == size)
|
||||
{
|
||||
_blocks.RemoveAt(i);
|
||||
return block.Offset;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't have enough free memory to perform the allocation.
|
||||
return -1;
|
||||
}
|
||||
|
||||
public void ForceAllocation(int offset, int size)
|
||||
{
|
||||
int index = _blocks.BinarySearch(new(offset, size));
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
int endOffset = offset + size;
|
||||
|
||||
MemoryBlock block = _blocks[index];
|
||||
|
||||
Debug.Assert(block.Offset <= offset && block.Offset + block.Size >= endOffset);
|
||||
|
||||
if (offset > block.Offset && endOffset < block.Offset + block.Size)
|
||||
{
|
||||
_blocks[index] = new(block.Offset, offset - block.Offset);
|
||||
_blocks.Insert(index + 1, new(endOffset, (block.Offset + block.Size) - endOffset));
|
||||
}
|
||||
else if (offset > block.Offset)
|
||||
{
|
||||
_blocks[index] = new(block.Offset, offset - block.Offset);
|
||||
}
|
||||
else if (endOffset < block.Offset + block.Size)
|
||||
{
|
||||
_blocks[index] = new(endOffset, (block.Offset + block.Size) - endOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
_blocks.RemoveAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
public void Free(int offset, int size)
|
||||
{
|
||||
Insert(new MemoryBlock(offset, size));
|
||||
}
|
||||
|
||||
private void Insert(MemoryBlock block)
|
||||
{
|
||||
int index = _blocks.BinarySearch(block);
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
if (index < _blocks.Count)
|
||||
{
|
||||
MemoryBlock next = _blocks[index];
|
||||
|
||||
int endOffs = block.Offset + block.Size;
|
||||
|
||||
if (next.Offset == endOffs)
|
||||
{
|
||||
block = new MemoryBlock(block.Offset, block.Size + next.Size);
|
||||
_blocks.RemoveAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
MemoryBlock prev = _blocks[index - 1];
|
||||
|
||||
if (prev.Offset + prev.Size == block.Offset)
|
||||
{
|
||||
block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size);
|
||||
_blocks.RemoveAt(--index);
|
||||
}
|
||||
}
|
||||
|
||||
_blocks.Insert(index, block);
|
||||
}
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_blocks.Clear();
|
||||
}
|
||||
}
|
||||
}
|
197
src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs
Normal file
197
src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs
Normal file
@ -0,0 +1,197 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Versioning;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
static partial class JitCache
|
||||
{
|
||||
private static readonly int _pageSize = (int)MemoryBlock.GetPageSize();
|
||||
private static readonly int _pageMask = _pageSize - 1;
|
||||
|
||||
private const int CodeAlignment = 4; // Bytes.
|
||||
private const int CacheSize = 2047 * 1024 * 1024;
|
||||
|
||||
private static ReservedRegion _jitRegion;
|
||||
private static JitCacheInvalidation _jitCacheInvalidator;
|
||||
|
||||
private static CacheMemoryAllocator _cacheAllocator;
|
||||
|
||||
private static readonly List<CacheEntry> _cacheEntries = new();
|
||||
|
||||
private static readonly object _lock = new();
|
||||
private static bool _initialized;
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
[LibraryImport("kernel32.dll", SetLastError = true)]
|
||||
public static partial IntPtr FlushInstructionCache(IntPtr hProcess, IntPtr lpAddress, UIntPtr dwSize);
|
||||
|
||||
public static void Initialize(IJitMemoryAllocator allocator)
|
||||
{
|
||||
if (_initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_jitRegion = new ReservedRegion(allocator, CacheSize);
|
||||
|
||||
if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS())
|
||||
{
|
||||
_jitCacheInvalidator = new JitCacheInvalidation(allocator);
|
||||
}
|
||||
|
||||
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
|
||||
|
||||
_initialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe static IntPtr Map(ReadOnlySpan<byte> code)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Debug.Assert(_initialized);
|
||||
|
||||
int funcOffset = Allocate(code.Length);
|
||||
|
||||
IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
|
||||
|
||||
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
unsafe
|
||||
{
|
||||
fixed (byte* codePtr = code)
|
||||
{
|
||||
JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ReprotectAsWritable(funcOffset, code.Length);
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
ReprotectAsExecutable(funcOffset, code.Length);
|
||||
|
||||
if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
FlushInstructionCache(Process.GetCurrentProcess().Handle, funcPtr, (UIntPtr)code.Length);
|
||||
}
|
||||
else
|
||||
{
|
||||
_jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length);
|
||||
}
|
||||
}
|
||||
|
||||
Add(funcOffset, code.Length);
|
||||
|
||||
return funcPtr;
|
||||
}
|
||||
}
|
||||
|
||||
public static void Unmap(IntPtr pointer)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Debug.Assert(_initialized);
|
||||
|
||||
int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
|
||||
|
||||
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
|
||||
{
|
||||
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
|
||||
_cacheEntries.RemoveAt(entryIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ReprotectAsWritable(int offset, int size)
|
||||
{
|
||||
int endOffs = offset + size;
|
||||
|
||||
int regionStart = offset & ~_pageMask;
|
||||
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
|
||||
|
||||
_jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
|
||||
}
|
||||
|
||||
private static void ReprotectAsExecutable(int offset, int size)
|
||||
{
|
||||
int endOffs = offset + size;
|
||||
|
||||
int regionStart = offset & ~_pageMask;
|
||||
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
|
||||
|
||||
_jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
|
||||
}
|
||||
|
||||
private static int Allocate(int codeSize)
|
||||
{
|
||||
codeSize = AlignCodeSize(codeSize);
|
||||
|
||||
int allocOffset = _cacheAllocator.Allocate(codeSize);
|
||||
|
||||
if (allocOffset < 0)
|
||||
{
|
||||
throw new OutOfMemoryException("JIT Cache exhausted.");
|
||||
}
|
||||
|
||||
_jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
|
||||
|
||||
return allocOffset;
|
||||
}
|
||||
|
||||
private static int AlignCodeSize(int codeSize)
|
||||
{
|
||||
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
|
||||
}
|
||||
|
||||
private static void Add(int offset, int size)
|
||||
{
|
||||
CacheEntry entry = new(offset, size);
|
||||
|
||||
int index = _cacheEntries.BinarySearch(entry);
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
_cacheEntries.Insert(index, entry);
|
||||
}
|
||||
|
||||
public static bool TryFind(int offset, out CacheEntry entry, out int entryIndex)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0));
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index - 1;
|
||||
}
|
||||
|
||||
if (index >= 0)
|
||||
{
|
||||
entry = _cacheEntries[index];
|
||||
entryIndex = index;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
entry = default;
|
||||
entryIndex = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
79
src/Ryujinx.Cpu/LightningJit/Cache/JitCacheInvalidation.cs
Normal file
79
src/Ryujinx.Cpu/LightningJit/Cache/JitCacheInvalidation.cs
Normal file
@ -0,0 +1,79 @@
|
||||
using ARMeilleure.Memory;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class JitCacheInvalidation
|
||||
{
|
||||
private static readonly int[] _invalidationCode = new int[]
|
||||
{
|
||||
unchecked((int)0xd53b0022), // mrs x2, ctr_el0
|
||||
unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
|
||||
unchecked((int)0x52800083), // mov w3, #0x4
|
||||
unchecked((int)0x12000c45), // and w5, w2, #0xf
|
||||
unchecked((int)0x1ac42064), // lsl w4, w3, w4
|
||||
unchecked((int)0x51000482), // sub w2, w4, #0x1
|
||||
unchecked((int)0x8a220002), // bic x2, x0, x2
|
||||
unchecked((int)0x1ac52063), // lsl w3, w3, w5
|
||||
unchecked((int)0xeb01005f), // cmp x2, x1
|
||||
unchecked((int)0x93407c84), // sxtw x4, w4
|
||||
unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
|
||||
unchecked((int)0xd50b7b22), // dc cvau, x2
|
||||
unchecked((int)0x8b040042), // add x2, x2, x4
|
||||
unchecked((int)0xeb02003f), // cmp x1, x2
|
||||
unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
|
||||
unchecked((int)0xd5033b9f), // dsb ish
|
||||
unchecked((int)0x51000462), // sub w2, w3, #0x1
|
||||
unchecked((int)0x93407c63), // sxtw x3, w3
|
||||
unchecked((int)0x8a220000), // bic x0, x0, x2
|
||||
unchecked((int)0xeb00003f), // cmp x1, x0
|
||||
unchecked((int)0x540000a9), // b.ls 64 <exit>
|
||||
unchecked((int)0xd50b7520), // ic ivau, x0
|
||||
unchecked((int)0x8b030000), // add x0, x0, x3
|
||||
unchecked((int)0xeb00003f), // cmp x1, x0
|
||||
unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
|
||||
unchecked((int)0xd5033b9f), // dsb ish
|
||||
unchecked((int)0xd5033fdf), // isb
|
||||
unchecked((int)0xd65f03c0), // ret
|
||||
};
|
||||
|
||||
private delegate void InvalidateCache(ulong start, ulong end);
|
||||
|
||||
private readonly InvalidateCache _invalidateCache;
|
||||
private readonly ReservedRegion _invalidateCacheCodeRegion;
|
||||
|
||||
private readonly bool _needsInvalidation;
|
||||
|
||||
public JitCacheInvalidation(IJitMemoryAllocator allocator)
|
||||
{
|
||||
// On macOS and Windows, a different path is used to write to the JIT cache, which does the invalidation.
|
||||
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
ulong size = (ulong)_invalidationCode.Length * sizeof(int);
|
||||
ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
|
||||
|
||||
size = (size + mask) & ~mask;
|
||||
|
||||
_invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
|
||||
_invalidateCacheCodeRegion.ExpandIfNeeded(size);
|
||||
|
||||
Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
|
||||
|
||||
_invalidateCacheCodeRegion.Block.MapAsRx(0, size);
|
||||
|
||||
_invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
|
||||
|
||||
_needsInvalidation = true;
|
||||
}
|
||||
}
|
||||
|
||||
public void Invalidate(IntPtr basePointer, ulong size)
|
||||
{
|
||||
if (_needsInvalidation)
|
||||
{
|
||||
_invalidateCache((ulong)basePointer, (ulong)basePointer + size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
16
src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs
Normal file
16
src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs
Normal file
@ -0,0 +1,16 @@
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Versioning;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
[SupportedOSPlatform("macos")]
|
||||
static partial class JitSupportDarwin
|
||||
{
|
||||
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
|
||||
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
|
||||
|
||||
[LibraryImport("libc", EntryPoint = "sys_icache_invalidate", SetLastError = true)]
|
||||
public static partial void SysIcacheInvalidate(IntPtr start, IntPtr len);
|
||||
}
|
||||
}
|
340
src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
Normal file
340
src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
Normal file
@ -0,0 +1,340 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Common;
|
||||
using Ryujinx.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class NoWxCache : IDisposable
|
||||
{
|
||||
private const int CodeAlignment = 4; // Bytes.
|
||||
private const int SharedCacheSize = 2047 * 1024 * 1024;
|
||||
private const int LocalCacheSize = 128 * 1024 * 1024;
|
||||
|
||||
// How many calls to the same function we allow until we pad the shared cache to force the function to become available there
|
||||
// and allow the guest to take the fast path.
|
||||
private const int MinCallsForPad = 8;
|
||||
|
||||
private class MemoryCache : IDisposable
|
||||
{
|
||||
private readonly ReservedRegion _region;
|
||||
private readonly CacheMemoryAllocator _cacheAllocator;
|
||||
|
||||
public CacheMemoryAllocator Allocator => _cacheAllocator;
|
||||
public IntPtr Pointer => _region.Block.Pointer;
|
||||
|
||||
public MemoryCache(IJitMemoryAllocator allocator, ulong size)
|
||||
{
|
||||
_region = new(allocator, size);
|
||||
_cacheAllocator = new((int)size);
|
||||
}
|
||||
|
||||
public int Allocate(int codeSize)
|
||||
{
|
||||
codeSize = AlignCodeSize(codeSize);
|
||||
|
||||
int allocOffset = _cacheAllocator.Allocate(codeSize);
|
||||
|
||||
if (allocOffset < 0)
|
||||
{
|
||||
throw new OutOfMemoryException("JIT Cache exhausted.");
|
||||
}
|
||||
|
||||
_region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
|
||||
|
||||
return allocOffset;
|
||||
}
|
||||
|
||||
public void Free(int offset, int size)
|
||||
{
|
||||
_cacheAllocator.Free(offset, size);
|
||||
}
|
||||
|
||||
public void ReprotectAsRw(int offset, int size)
|
||||
{
|
||||
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
_region.Block.MapAsRw((ulong)offset, (ulong)size);
|
||||
}
|
||||
|
||||
public void ReprotectAsRx(int offset, int size)
|
||||
{
|
||||
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
_region.Block.MapAsRx((ulong)offset, (ulong)size);
|
||||
|
||||
if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
|
||||
{
|
||||
JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new PlatformNotSupportedException();
|
||||
}
|
||||
}
|
||||
|
||||
private static int AlignCodeSize(int codeSize)
|
||||
{
|
||||
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
|
||||
}
|
||||
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_region.Dispose();
|
||||
_cacheAllocator.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly IStackWalker _stackWalker;
|
||||
private readonly Translator _translator;
|
||||
private readonly MemoryCache _sharedCache;
|
||||
private readonly MemoryCache _localCache;
|
||||
private readonly PageAlignedRangeList _pendingMap;
|
||||
private readonly object _lock;
|
||||
|
||||
class ThreadLocalCacheEntry
|
||||
{
|
||||
public readonly int Offset;
|
||||
public readonly int Size;
|
||||
public readonly IntPtr FuncPtr;
|
||||
private int _useCount;
|
||||
|
||||
public ThreadLocalCacheEntry(int offset, int size, IntPtr funcPtr)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
FuncPtr = funcPtr;
|
||||
_useCount = 0;
|
||||
}
|
||||
|
||||
public int IncrementUseCount()
|
||||
{
|
||||
return ++_useCount;
|
||||
}
|
||||
}
|
||||
|
||||
[ThreadStatic]
|
||||
private static Dictionary<ulong, ThreadLocalCacheEntry> _threadLocalCache;
|
||||
|
||||
public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator)
|
||||
{
|
||||
_stackWalker = stackWalker;
|
||||
_translator = translator;
|
||||
_sharedCache = new(allocator, SharedCacheSize);
|
||||
_localCache = new(allocator, LocalCacheSize);
|
||||
_pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction);
|
||||
_lock = new();
|
||||
}
|
||||
|
||||
public unsafe IntPtr Map(IntPtr framePointer, ReadOnlySpan<byte> code, ulong guestAddress, ulong guestSize)
|
||||
{
|
||||
if (TryGetThreadLocalFunction(guestAddress, out IntPtr funcPtr))
|
||||
{
|
||||
return funcPtr;
|
||||
}
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_pendingMap.Has(guestAddress) && !_translator.Functions.ContainsKey(guestAddress))
|
||||
{
|
||||
int funcOffset = _sharedCache.Allocate(code.Length);
|
||||
|
||||
funcPtr = _sharedCache.Pointer + funcOffset;
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
|
||||
TranslatedFunction function = new(funcPtr, guestSize);
|
||||
|
||||
_pendingMap.Add(funcOffset, code.Length, guestAddress, function);
|
||||
}
|
||||
|
||||
ClearThreadLocalCache(framePointer);
|
||||
|
||||
return AddThreadLocalFunction(code, guestAddress);
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe IntPtr MapPageAligned(ReadOnlySpan<byte> code)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
// Ensure we will get an aligned offset from the allocator.
|
||||
_pendingMap.Pad(_sharedCache.Allocator);
|
||||
|
||||
int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
|
||||
int funcOffset = _sharedCache.Allocate(sizeAligned);
|
||||
|
||||
Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
IntPtr funcPtr = _sharedCache.Pointer + funcOffset;
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
|
||||
_sharedCache.ReprotectAsRx(funcOffset, sizeAligned);
|
||||
|
||||
return funcPtr;
|
||||
}
|
||||
}
|
||||
|
||||
private bool TryGetThreadLocalFunction(ulong guestAddress, out IntPtr funcPtr)
|
||||
{
|
||||
if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out var entry))
|
||||
{
|
||||
if (entry.IncrementUseCount() >= MinCallsForPad)
|
||||
{
|
||||
// Function is being called often, let's make it available in the shared cache so that the guest code
|
||||
// can take the fast path and stop calling the emulator to get the function from the thread local cache.
|
||||
// To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX.
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
_pendingMap.Pad(_sharedCache.Allocator);
|
||||
}
|
||||
}
|
||||
|
||||
funcPtr = entry.FuncPtr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
funcPtr = IntPtr.Zero;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void ClearThreadLocalCache(IntPtr framePointer)
|
||||
{
|
||||
// Try to delete functions that are already on the shared cache
|
||||
// and no longer being executed.
|
||||
|
||||
if (_threadLocalCache == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
IEnumerable<ulong> callStack = _stackWalker.GetCallStack(
|
||||
framePointer,
|
||||
_localCache.Pointer,
|
||||
LocalCacheSize,
|
||||
_sharedCache.Pointer,
|
||||
SharedCacheSize);
|
||||
|
||||
List<(ulong, ThreadLocalCacheEntry)> toDelete = new();
|
||||
|
||||
foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache)
|
||||
{
|
||||
// We only want to delete if the function is already on the shared cache,
|
||||
// otherwise we will keep translating the same function over and over again.
|
||||
bool canDelete = !_pendingMap.Has(address);
|
||||
if (!canDelete)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can only delete if the function is not part of the current thread call stack,
|
||||
// otherwise we will crash the program when the thread returns to it.
|
||||
foreach (ulong funcAddress in callStack)
|
||||
{
|
||||
if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size)
|
||||
{
|
||||
canDelete = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (canDelete)
|
||||
{
|
||||
toDelete.Add((address, entry));
|
||||
}
|
||||
}
|
||||
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete)
|
||||
{
|
||||
_threadLocalCache.Remove(address);
|
||||
|
||||
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
|
||||
|
||||
_localCache.Free(entry.Offset, sizeAligned);
|
||||
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
|
||||
}
|
||||
}
|
||||
|
||||
public void ClearEntireThreadLocalCache()
|
||||
{
|
||||
// Thread is exiting, delete everything.
|
||||
|
||||
if (_threadLocalCache == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache)
|
||||
{
|
||||
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
|
||||
|
||||
_localCache.Free(entry.Offset, sizeAligned);
|
||||
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
|
||||
}
|
||||
|
||||
_threadLocalCache.Clear();
|
||||
_threadLocalCache = null;
|
||||
}
|
||||
|
||||
private unsafe IntPtr AddThreadLocalFunction(ReadOnlySpan<byte> code, ulong guestAddress)
|
||||
{
|
||||
int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
|
||||
int funcOffset = _localCache.Allocate(alignedSize);
|
||||
|
||||
Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
IntPtr funcPtr = _localCache.Pointer + funcOffset;
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
|
||||
(_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr));
|
||||
|
||||
_localCache.ReprotectAsRx(funcOffset, alignedSize);
|
||||
|
||||
return funcPtr;
|
||||
}
|
||||
|
||||
private void RegisterFunction(ulong address, TranslatedFunction func)
|
||||
{
|
||||
TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func);
|
||||
|
||||
Debug.Assert(oldFunc == func);
|
||||
|
||||
_translator.RegisterFunction(address, func);
|
||||
}
|
||||
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_localCache.Dispose();
|
||||
_sharedCache.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
}
|
||||
}
|
218
src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs
Normal file
218
src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs
Normal file
@ -0,0 +1,218 @@
|
||||
using Ryujinx.Common;
|
||||
using Ryujinx.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class PageAlignedRangeList
|
||||
{
|
||||
private readonly struct Range : IComparable<Range>
|
||||
{
|
||||
public int Offset { get; }
|
||||
public int Size { get; }
|
||||
|
||||
public Range(int offset, int size)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
}
|
||||
|
||||
public int CompareTo([AllowNull] Range other)
|
||||
{
|
||||
return Offset.CompareTo(other.Offset);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly Action<int, int> _alignedRangeAction;
|
||||
private readonly Action<ulong, TranslatedFunction> _alignedFunctionAction;
|
||||
private readonly List<(Range, ulong, TranslatedFunction)> _pendingFunctions;
|
||||
private readonly List<Range> _ranges;
|
||||
|
||||
public PageAlignedRangeList(Action<int, int> alignedRangeAction, Action<ulong, TranslatedFunction> alignedFunctionAction)
|
||||
{
|
||||
_alignedRangeAction = alignedRangeAction;
|
||||
_alignedFunctionAction = alignedFunctionAction;
|
||||
_pendingFunctions = new();
|
||||
_ranges = new();
|
||||
}
|
||||
|
||||
public bool Has(ulong address)
|
||||
{
|
||||
foreach ((_, ulong guestAddress, _) in _pendingFunctions)
|
||||
{
|
||||
if (guestAddress == address)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public void Add(int offset, int size, ulong address, TranslatedFunction function)
|
||||
{
|
||||
Range range = new(offset, size);
|
||||
|
||||
Insert(range);
|
||||
_pendingFunctions.Add((range, address, function));
|
||||
ProcessAlignedRanges();
|
||||
}
|
||||
|
||||
public void Pad(CacheMemoryAllocator allocator)
|
||||
{
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
for (int index = 0; index < _ranges.Count; index++)
|
||||
{
|
||||
Range range = _ranges[index];
|
||||
|
||||
int endOffset = range.Offset + range.Size;
|
||||
|
||||
int alignedStart = BitUtils.AlignDown(range.Offset, pageSize);
|
||||
int alignedEnd = BitUtils.AlignUp(endOffset, pageSize);
|
||||
int alignedSize = alignedEnd - alignedStart;
|
||||
|
||||
if (alignedStart < range.Offset)
|
||||
{
|
||||
allocator.ForceAllocation(alignedStart, range.Offset - alignedStart);
|
||||
}
|
||||
|
||||
if (alignedEnd > endOffset)
|
||||
{
|
||||
allocator.ForceAllocation(endOffset, alignedEnd - endOffset);
|
||||
}
|
||||
|
||||
_alignedRangeAction(alignedStart, alignedSize);
|
||||
_ranges.RemoveAt(index--);
|
||||
ProcessPendingFunctions(index, alignedEnd);
|
||||
}
|
||||
}
|
||||
|
||||
private void ProcessAlignedRanges()
|
||||
{
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
for (int index = 0; index < _ranges.Count; index++)
|
||||
{
|
||||
Range range = _ranges[index];
|
||||
|
||||
int alignedStart = BitUtils.AlignUp(range.Offset, pageSize);
|
||||
int alignedEnd = BitUtils.AlignDown(range.Offset + range.Size, pageSize);
|
||||
int alignedSize = alignedEnd - alignedStart;
|
||||
|
||||
if (alignedSize <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
_alignedRangeAction(alignedStart, alignedSize);
|
||||
SplitAt(ref index, alignedStart, alignedEnd);
|
||||
ProcessPendingFunctions(index, alignedEnd);
|
||||
}
|
||||
}
|
||||
|
||||
private void ProcessPendingFunctions(int rangeIndex, int alignedEnd)
|
||||
{
|
||||
if ((rangeIndex > 0 && rangeIndex == _ranges.Count) ||
|
||||
(rangeIndex >= 0 && rangeIndex < _ranges.Count && _ranges[rangeIndex].Offset >= alignedEnd))
|
||||
{
|
||||
rangeIndex--;
|
||||
}
|
||||
|
||||
int alignedStart;
|
||||
|
||||
if (rangeIndex >= 0)
|
||||
{
|
||||
alignedStart = _ranges[rangeIndex].Offset + _ranges[rangeIndex].Size;
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedStart = 0;
|
||||
}
|
||||
|
||||
if (rangeIndex < _ranges.Count - 1)
|
||||
{
|
||||
alignedEnd = _ranges[rangeIndex + 1].Offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedEnd = int.MaxValue;
|
||||
}
|
||||
|
||||
for (int index = 0; index < _pendingFunctions.Count; index++)
|
||||
{
|
||||
(Range range, ulong address, TranslatedFunction function) = _pendingFunctions[index];
|
||||
|
||||
if (range.Offset >= alignedStart && range.Offset + range.Size <= alignedEnd)
|
||||
{
|
||||
_alignedFunctionAction(address, function);
|
||||
_pendingFunctions.RemoveAt(index--);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Insert(Range range)
|
||||
{
|
||||
int index = _ranges.BinarySearch(range);
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
if (index < _ranges.Count)
|
||||
{
|
||||
Range next = _ranges[index];
|
||||
|
||||
int endOffs = range.Offset + range.Size;
|
||||
|
||||
if (next.Offset == endOffs)
|
||||
{
|
||||
range = new Range(range.Offset, range.Size + next.Size);
|
||||
_ranges.RemoveAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
Range prev = _ranges[index - 1];
|
||||
|
||||
if (prev.Offset + prev.Size == range.Offset)
|
||||
{
|
||||
range = new Range(range.Offset - prev.Size, range.Size + prev.Size);
|
||||
_ranges.RemoveAt(--index);
|
||||
}
|
||||
}
|
||||
|
||||
_ranges.Insert(index, range);
|
||||
}
|
||||
|
||||
private void SplitAt(ref int index, int alignedStart, int alignedEnd)
|
||||
{
|
||||
Range range = _ranges[index];
|
||||
|
||||
if (range.Offset < alignedStart)
|
||||
{
|
||||
_ranges[index++] = new(range.Offset, alignedStart - range.Offset);
|
||||
|
||||
if (range.Offset + range.Size > alignedEnd)
|
||||
{
|
||||
_ranges.Insert(index, new(alignedEnd, (range.Offset + range.Size) - alignedEnd));
|
||||
}
|
||||
}
|
||||
else if (range.Offset + range.Size > alignedEnd)
|
||||
{
|
||||
_ranges[index] = new(alignedEnd, (range.Offset + range.Size) - alignedEnd);
|
||||
}
|
||||
else if (range.Offset == alignedStart && range.Offset + range.Size == alignedEnd)
|
||||
{
|
||||
Debug.Assert(range.Offset == alignedStart && range.Offset + range.Size == alignedEnd);
|
||||
|
||||
_ranges.RemoveAt(index--);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/AbiConstants.cs
Normal file
15
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/AbiConstants.cs
Normal file
@ -0,0 +1,15 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
|
||||
{
|
||||
static class AbiConstants
|
||||
{
|
||||
// Some of those register have specific roles and can't be used as general purpose registers.
|
||||
// X18 - Reserved for platform specific usage.
|
||||
// X29 - Frame pointer.
|
||||
// X30 - Return address.
|
||||
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
|
||||
public const uint ReservedRegsMask = (1u << 18) | (1u << 29) | (1u << 30) | (1u << 31);
|
||||
|
||||
public const uint GprCalleeSavedRegsMask = 0x1ff80000; // X19 to X28
|
||||
public const uint FpSimdCalleeSavedRegsMask = 0xff00; // D8 to D15
|
||||
}
|
||||
}
|
30
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/ArmCondition.cs
Normal file
30
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/ArmCondition.cs
Normal file
@ -0,0 +1,30 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
|
||||
{
|
||||
enum ArmCondition
|
||||
{
|
||||
Eq = 0,
|
||||
Ne = 1,
|
||||
GeUn = 2,
|
||||
LtUn = 3,
|
||||
Mi = 4,
|
||||
Pl = 5,
|
||||
Vs = 6,
|
||||
Vc = 7,
|
||||
GtUn = 8,
|
||||
LeUn = 9,
|
||||
Ge = 10,
|
||||
Lt = 11,
|
||||
Gt = 12,
|
||||
Le = 13,
|
||||
Al = 14,
|
||||
Nv = 15,
|
||||
}
|
||||
|
||||
static class ArmConditionExtensions
|
||||
{
|
||||
public static ArmCondition Invert(this ArmCondition condition)
|
||||
{
|
||||
return (ArmCondition)((int)condition ^ 1);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
|
||||
{
|
||||
enum ArmExtensionType
|
||||
{
|
||||
Uxtb = 0,
|
||||
Uxth = 1,
|
||||
Uxtw = 2,
|
||||
Uxtx = 3,
|
||||
Sxtb = 4,
|
||||
Sxth = 5,
|
||||
Sxtw = 6,
|
||||
Sxtx = 7,
|
||||
}
|
||||
}
|
11
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/ArmShiftType.cs
Normal file
11
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/ArmShiftType.cs
Normal file
@ -0,0 +1,11 @@
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
|
||||
{
|
||||
enum ArmShiftType
|
||||
{
|
||||
Lsl = 0,
|
||||
Lsr = 1,
|
||||
Asr = 2,
|
||||
Ror = 3,
|
||||
}
|
||||
}
|
4777
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/Assembler.cs
Normal file
4777
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/Assembler.cs
Normal file
File diff suppressed because it is too large
Load Diff
67
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/CodeGenCommon.cs
Normal file
67
src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/CodeGenCommon.cs
Normal file
@ -0,0 +1,67 @@
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
|
||||
{
|
||||
static class CodeGenCommon
|
||||
{
|
||||
public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
|
||||
{
|
||||
return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR);
|
||||
}
|
||||
|
||||
public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR)
|
||||
{
|
||||
if (type == OperandType.I32)
|
||||
{
|
||||
value &= uint.MaxValue;
|
||||
value |= value << 32;
|
||||
}
|
||||
|
||||
return TryEncodeBitMask(value, out immN, out immS, out immR);
|
||||
}
|
||||
|
||||
public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
|
||||
{
|
||||
// Some special values also can't be encoded:
|
||||
// 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
|
||||
// A value with all bits set can't be encoded because it is reserved according to the spec, because:
|
||||
// Any value AND all ones will be equal itself, so it's effectively a no-op.
|
||||
// Any value OR all ones will be equal all ones, so one can just use MOV.
|
||||
// Any value XOR all ones will be equal its inverse, so one can just use MVN.
|
||||
if (value == 0 || value == ulong.MaxValue)
|
||||
{
|
||||
immN = 0;
|
||||
immS = 0;
|
||||
immR = 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not
|
||||
// been cut-in-half across the word boundary.
|
||||
int rotation = BitOperations.TrailingZeroCount(value & (value + 1));
|
||||
ulong rotatedValue = ulong.RotateRight(value, rotation);
|
||||
|
||||
// Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones
|
||||
// in element.
|
||||
int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1));
|
||||
int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue);
|
||||
|
||||
// Check the value is repeating; also ensures element size is a power of two.
|
||||
if (ulong.RotateRight(value, elementSize) != value)
|
||||
{
|
||||
immN = 0;
|
||||
immS = 0;
|
||||
immR = 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
immN = (elementSize >> 6) & 1;
|
||||
immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f;
|
||||
immR = (elementSize - rotation) & (elementSize - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,252 @@
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
|
||||
{
|
||||
readonly struct RegisterSaveRestore
|
||||
{
|
||||
private const int FpRegister = 29;
|
||||
private const int LrRegister = 30;
|
||||
|
||||
public const int Encodable9BitsOffsetLimit = 0x100;
|
||||
|
||||
private readonly uint _gprMask;
|
||||
private readonly uint _fpSimdMask;
|
||||
private readonly OperandType _fpSimdType;
|
||||
private readonly int _reservedStackSize;
|
||||
private readonly bool _hasCall;
|
||||
|
||||
public RegisterSaveRestore(
|
||||
uint gprMask,
|
||||
uint fpSimdMask = 0,
|
||||
OperandType fpSimdType = OperandType.FP64,
|
||||
bool hasCall = false,
|
||||
int reservedStackSize = 0)
|
||||
{
|
||||
_gprMask = gprMask;
|
||||
_fpSimdMask = fpSimdMask;
|
||||
_fpSimdType = fpSimdType;
|
||||
_reservedStackSize = reservedStackSize;
|
||||
_hasCall = hasCall;
|
||||
}
|
||||
|
||||
public int GetReservedStackOffset()
|
||||
{
|
||||
int gprCalleeSavedRegsCount = BitOperations.PopCount(_gprMask);
|
||||
int fpSimdCalleeSavedRegsCount = BitOperations.PopCount(_fpSimdMask);
|
||||
|
||||
return (_hasCall ? 16 : 0) + Align16(gprCalleeSavedRegsCount * 8 + fpSimdCalleeSavedRegsCount * _fpSimdType.GetSizeInBytes());
|
||||
}
|
||||
|
||||
public void WritePrologue(ref Assembler asm)
|
||||
{
|
||||
uint gprMask = _gprMask;
|
||||
uint fpSimdMask = _fpSimdMask;
|
||||
|
||||
int gprCalleeSavedRegsCount = BitOperations.PopCount(gprMask);
|
||||
int fpSimdCalleeSavedRegsCount = BitOperations.PopCount(fpSimdMask);
|
||||
|
||||
int reservedStackSize = Align16(_reservedStackSize);
|
||||
int calleeSaveRegionSize = Align16(gprCalleeSavedRegsCount * 8 + fpSimdCalleeSavedRegsCount * _fpSimdType.GetSizeInBytes()) + reservedStackSize;
|
||||
int offset = 0;
|
||||
|
||||
WritePrologueCalleeSavesPreIndexed(ref asm, ref gprMask, ref offset, calleeSaveRegionSize, OperandType.I64);
|
||||
|
||||
if (_fpSimdType == OperandType.V128 && (gprCalleeSavedRegsCount & 1) != 0)
|
||||
{
|
||||
offset += 8;
|
||||
}
|
||||
|
||||
WritePrologueCalleeSavesPreIndexed(ref asm, ref fpSimdMask, ref offset, calleeSaveRegionSize, _fpSimdType);
|
||||
|
||||
if (_hasCall)
|
||||
{
|
||||
Operand rsp = Register(Assembler.SpRegister);
|
||||
|
||||
if (offset != 0 || calleeSaveRegionSize + 16 < Encodable9BitsOffsetLimit)
|
||||
{
|
||||
asm.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, offset == 0 ? -(calleeSaveRegionSize + 16) : -16);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Sub(rsp, rsp, new Operand(OperandKind.Constant, OperandType.I64, (ulong)calleeSaveRegionSize));
|
||||
asm.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -16);
|
||||
}
|
||||
|
||||
asm.MovSp(Register(FpRegister), rsp);
|
||||
}
|
||||
}
|
||||
|
||||
private static void WritePrologueCalleeSavesPreIndexed(
|
||||
ref Assembler asm,
|
||||
ref uint mask,
|
||||
ref int offset,
|
||||
int calleeSaveRegionSize,
|
||||
OperandType type)
|
||||
{
|
||||
if ((BitOperations.PopCount(mask) & 1) != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
if (offset != 0)
|
||||
{
|
||||
asm.StrRiUn(Register(reg, type), Register(Assembler.SpRegister), offset);
|
||||
}
|
||||
else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
|
||||
{
|
||||
asm.StrRiPre(Register(reg, type), Register(Assembler.SpRegister), -calleeSaveRegionSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Sub(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
|
||||
asm.StrRiUn(Register(reg, type), Register(Assembler.SpRegister), 0);
|
||||
}
|
||||
|
||||
offset += type.GetSizeInBytes();
|
||||
}
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
int reg2 = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
mask &= ~(1u << reg2);
|
||||
|
||||
if (offset != 0)
|
||||
{
|
||||
asm.StpRiUn(Register(reg, type), Register(reg2, type), Register(Assembler.SpRegister), offset);
|
||||
}
|
||||
else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
|
||||
{
|
||||
asm.StpRiPre(Register(reg, type), Register(reg2, type), Register(Assembler.SpRegister), -calleeSaveRegionSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.Sub(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
|
||||
asm.StpRiUn(Register(reg, type), Register(reg2, type), Register(Assembler.SpRegister), 0);
|
||||
}
|
||||
|
||||
offset += type.GetSizeInBytes() * 2;
|
||||
}
|
||||
}
|
||||
|
||||
public void WriteEpilogue(ref Assembler asm)
|
||||
{
|
||||
uint gprMask = _gprMask;
|
||||
uint fpSimdMask = _fpSimdMask;
|
||||
|
||||
int gprCalleeSavedRegsCount = BitOperations.PopCount(gprMask);
|
||||
int fpSimdCalleeSavedRegsCount = BitOperations.PopCount(fpSimdMask);
|
||||
|
||||
bool misalignedVector = _fpSimdType == OperandType.V128 && (gprCalleeSavedRegsCount & 1) != 0;
|
||||
|
||||
int offset = gprCalleeSavedRegsCount * 8 + fpSimdCalleeSavedRegsCount * _fpSimdType.GetSizeInBytes();
|
||||
|
||||
if (misalignedVector)
|
||||
{
|
||||
offset += 8;
|
||||
}
|
||||
|
||||
int calleeSaveRegionSize = Align16(offset) + Align16(_reservedStackSize);
|
||||
|
||||
if (_hasCall)
|
||||
{
|
||||
Operand rsp = Register(Assembler.SpRegister);
|
||||
|
||||
if (offset != 0 || calleeSaveRegionSize + 16 < Encodable9BitsOffsetLimit)
|
||||
{
|
||||
asm.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, offset == 0 ? calleeSaveRegionSize + 16 : 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, 16);
|
||||
asm.Add(rsp, rsp, new Operand(OperandKind.Constant, OperandType.I64, (ulong)calleeSaveRegionSize));
|
||||
}
|
||||
}
|
||||
|
||||
WriteEpilogueCalleeSavesPostIndexed(ref asm, ref fpSimdMask, ref offset, calleeSaveRegionSize, _fpSimdType);
|
||||
|
||||
if (misalignedVector)
|
||||
{
|
||||
offset -= 8;
|
||||
}
|
||||
|
||||
WriteEpilogueCalleeSavesPostIndexed(ref asm, ref gprMask, ref offset, calleeSaveRegionSize, OperandType.I64);
|
||||
}
|
||||
|
||||
private static void WriteEpilogueCalleeSavesPostIndexed(
|
||||
ref Assembler asm,
|
||||
ref uint mask,
|
||||
ref int offset,
|
||||
int calleeSaveRegionSize,
|
||||
OperandType type)
|
||||
{
|
||||
while (mask != 0)
|
||||
{
|
||||
int reg = HighestBitSet(mask);
|
||||
|
||||
mask &= ~(1u << reg);
|
||||
|
||||
if (mask != 0)
|
||||
{
|
||||
int reg2 = HighestBitSet(mask);
|
||||
|
||||
mask &= ~(1u << reg2);
|
||||
|
||||
offset -= type.GetSizeInBytes() * 2;
|
||||
|
||||
if (offset != 0)
|
||||
{
|
||||
asm.LdpRiUn(Register(reg2, type), Register(reg, type), Register(Assembler.SpRegister), offset);
|
||||
}
|
||||
else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
|
||||
{
|
||||
asm.LdpRiPost(Register(reg2, type), Register(reg, type), Register(Assembler.SpRegister), calleeSaveRegionSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdpRiUn(Register(reg2, type), Register(reg, type), Register(Assembler.SpRegister), 0);
|
||||
asm.Add(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
offset -= type.GetSizeInBytes();
|
||||
|
||||
if (offset != 0)
|
||||
{
|
||||
asm.LdrRiUn(Register(reg, type), Register(Assembler.SpRegister), offset);
|
||||
}
|
||||
else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
|
||||
{
|
||||
asm.LdrRiPost(Register(reg, type), Register(Assembler.SpRegister), calleeSaveRegionSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm.LdrRiUn(Register(reg, type), Register(Assembler.SpRegister), 0);
|
||||
asm.Add(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int HighestBitSet(uint value)
|
||||
{
|
||||
return 31 - BitOperations.LeadingZeroCount(value);
|
||||
}
|
||||
|
||||
private static Operand Register(int register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand(register, RegisterType.Integer, type);
|
||||
}
|
||||
|
||||
private static int Align16(int value)
|
||||
{
|
||||
return (value + 0xf) & ~0xf;
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user