- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode

- Clean up ftol()/snapvector() mess
- Make use of SSE instructions for ftol()/snapvector() if available
- move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions
- Set FPU control word at program startup to get consistent behaviour on all platforms
This commit is contained in:
Thilo Schulz 2011-06-13 09:56:39 +00:00
parent 471182cba0
commit 03201aff22
22 changed files with 540 additions and 460 deletions

View file

@ -90,6 +90,14 @@ cvar_t *com_basegame;
cvar_t *com_homepath;
cvar_t *com_busyWait;
#if defined(idx64)
void (*Q_VMftol)(void);
#elif defined(id386)
long (QDECL *Q_ftol)(float f);
void (QDECL *Q_VMftol)(void);
void (QDECL *Q_SnapVector)(vec3_t vec);
#endif
// com_speeds times
int time_game;
int time_frontend; // renderer frontend time
@ -2565,6 +2573,53 @@ static void Com_DetectAltivec(void)
}
}
/*
=================
Com_DetectSSE
Find out whether we have SSE support for Q_ftol function
=================
*/
#if defined(id386) || defined(idx64)
static void Com_DetectSSE(void)
{
#ifndef idx64
cpuFeatures_t feat;
feat = Sys_GetProcessorFeatures();
if(feat & CF_SSE)
{
if(feat & CF_SSE2)
Q_SnapVector = qsnapvectorsse;
else
Q_SnapVector = qsnapvectorx87;
Q_ftol = qftolsse;
#endif
Q_VMftol = qvmftolsse;
Com_Printf("Have SSE support\n");
#ifndef idx64
}
else
{
Q_ftol = qftolx87;
Q_VMftol = qvmftolx87;
Q_SnapVector = qsnapvectorx87;
Com_Printf("No SSE support on this machine\n");
}
#endif
}
#else
#define Com_DetectSSE()
#endif
/*
=================
Com_InitRand
@ -2615,6 +2670,8 @@ void Com_Init( char *commandLine ) {
// Swap_Init ();
Cbuf_Init ();
Com_DetectSSE();
// override anything from the config files with command line args
Com_StartupVariable( NULL );

View file

@ -87,7 +87,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define OS_STRING "win_mingw64"
#endif
#define ID_INLINE inline
#define ID_INLINE __inline
#define PATH_SEP '\\'
#if defined( __WIN64__ )

View file

@ -419,6 +419,58 @@ extern vec3_t axisDefault[3];
#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
int Q_isnan(float x);
#ifdef idx64
extern long qftolsse(float f);
extern void qvmftolsse(void);
extern void qsnapvectorsse(vec3_t vec);
#define Q_ftol qftolsse
#define Q_SnapVector qsnapvectorsse
extern void (*Q_VMftol)(void);
#elif defined(id386)
extern long QDECL qftolx87(float f);
extern long QDECL qftolsse(float f);
extern void QDECL qvmftolx87(void);
extern void QDECL qvmftolsse(void);
extern void QDECL qsnapvectorx87(vec3_t vec);
extern void QDECL qsnapvectorsse(vec3_t vec);
extern long (QDECL *Q_ftol)(float f);
extern void (QDECL *Q_VMftol)(void);
extern void (QDECL *Q_SnapVector)(vec3_t vec);
#else
#define Q_ftol(f) lrintf((f))
#define Q_SnapVector(vec)\
do\
{\
vec3_t *temp = (vec);\
\
(*temp)[0] = round((*temp)[0]);\
(*temp)[1] = round((*temp)[1]);\
(*temp)[2] = round((*temp)[2]);\
} while(0)
#endif
/*
// if your system does not have lrintf() and round() you can try this block. Please also open a bug report at bugzilla.icculus.org
// or write a mail to the ioq3 mailing list.
#else
#define Q_ftol(f) ((long) (f))
#define Q_round(f) do { if((f) < 0) (f) -= 0.5f; else (f) += 0.5f; (f) = Q_ftol((f)); } while(0)
#define Q_SnapVector(vec) \
do\
{\
vec3_t *temp = (vec);\
\
Q_round((*temp)[0]);\
Q_round((*temp)[1]);\
Q_round((*temp)[2]);\
} while(0)
#endif
*/
#if idppc
static ID_INLINE float Q_rsqrt( float number ) {
@ -637,8 +689,6 @@ void MakeNormalVectors( const vec3_t forward, vec3_t right, vec3_t up );
void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]);
void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up);
void PerpendicularVector( vec3_t dst, const vec3_t src );
int Q_isnan( float x );
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))

View file

@ -67,29 +67,6 @@ static int pc = 0;
#define FTOL_PTR
#ifdef _MSC_VER
#if defined( FTOL_PTR )
int _ftol( float );
static void *ftolPtr = _ftol;
#endif
#else // _MSC_VER
#if defined( FTOL_PTR )
int qftol( void );
int qftol027F( void );
int qftol037F( void );
int qftol0E7F( void );
int qftol0F7F( void );
static void *ftolPtr = qftol0F7F;
#endif // FTOL_PTR
#endif
static int instruction, pass;
static int lastConst = 0;
static int oc0, oc1, pop0, pop1;
@ -111,15 +88,17 @@ typedef enum
static ELastCommand LastCommand;
static inline int iss8(int32_t v)
static int iss8(int32_t v)
{
return (SCHAR_MIN <= v && v <= SCHAR_MAX);
}
static inline int isu8(uint32_t v)
#if 0
static int isu8(uint32_t v)
{
return (v <= UCHAR_MAX);
}
#endif
static int NextConstant4(void)
{
@ -436,30 +415,37 @@ Uses asm to retrieve arguments from registers to work around different calling c
=================
*/
#if defined(_MSC_VER) && defined(idx64)
extern void qsyscall64(void);
extern uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
// Microsoft does not support inline assembler on x64 platforms. Meh.
void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
{
#else
static void DoSyscall(void)
{
vm_t *savedVM;
int syscallNum;
int programStack;
int *opStackBase;
int opStackOfs;
uint8_t opStackOfs;
intptr_t arg;
#endif
vm_t *savedVM;
#ifdef _MSC_VER
#ifndef idx64
__asm
{
mov dword ptr syscallNum, eax
mov dword ptr programStack, esi
mov dword ptr opStackOfs, ebx
#ifdef idx64
mov qword ptr opStackBase, rdi
mov qword ptr arg, rcx
#else
mov byte ptr opStackOfs, bl
mov dword ptr opStackBase, edi
mov dword ptr arg, ecx
#endif
}
#endif
#else
__asm__ volatile(
""
@ -539,8 +525,13 @@ Call to DoSyscall()
int EmitCallDoSyscall(vm_t *vm)
{
// use edx register to store DoSyscall address
#if defined(_MSC_VER) && defined(idx64)
EmitRexString(0x48, "BA"); // mov edx, qsyscall64
EmitPtr(qsyscall64);
#else
EmitRexString(0x48, "BA"); // mov edx, DoSyscall
EmitPtr(DoSyscall);
#endif
// Push important registers to stack as we can't really make
// any assumptions about calling conventions.
@ -1629,9 +1620,8 @@ void VM_Compile(vm_t *vm, vmHeader_t *header)
EmitString("DB 1C 9F"); // fistp dword ptr [edi + ebx * 4]
#else // FTOL_PTR
// call the library conversion function
EmitString("D9 04 9F"); // fld dword ptr [edi + ebx * 4]
EmitRexString(0x48, "BA"); // mov edx, ftolPtr
EmitPtr(ftolPtr);
EmitRexString(0x48, "BA"); // mov edx, Q_VMftol
EmitPtr(Q_VMftol);
EmitRexString(0x48, "FF D2"); // call edx
EmitCommand(LAST_COMMAND_MOV_STACK_EAX); // mov dword ptr [edi + ebx * 4], eax
#endif
@ -1746,12 +1736,12 @@ This function is called directly by the generated code
int VM_CallCompiled(vm_t *vm, int *args)
{
int stack[OPSTACK_SIZE + 7];
byte stack[OPSTACK_SIZE * 4 + 15];
void *entryPoint;
int programCounter;
int programStack, stackOnEntry;
byte *image;
int *opStack, *opStackOnEntry;
int *opStack;
int opStackOfs;
currentVM = vm;
@ -1784,35 +1774,16 @@ int VM_CallCompiled(vm_t *vm, int *args)
// off we go into generated code...
entryPoint = vm->codeBase + vm->entryOfs;
opStack = opStackOnEntry = PADP(stack, 8);
opStack = PADP(stack, 16);
*opStack = 0xDEADBEEF;
opStackOfs = 0;
#ifdef _MSC_VER
#ifdef idx64
opStackOfs = qvmcall64(&programStack, opStack, vm->instructionPointers, vm->dataBase);
#else
__asm
{
#ifdef idx64
// non-volatile registers according to x64 calling convention
push rsi
push rdi
push rbx
mov esi, dword ptr programStack
mov rdi, qword ptr opStack
mov ebx, dword ptr opStackOfs
mov r8, qword ptr vm->instructionPointers
mov r9, qword ptr vm->dataBase
call entryPoint
mov dword ptr opStackOfs, ebx
mov qword ptr opStack, rdi
mov dword ptr programStack, esi
pop rbx
pop rdi
pop rsi
#else
pushad
mov esi, dword ptr programStack
@ -1826,8 +1797,8 @@ int VM_CallCompiled(vm_t *vm, int *args)
mov dword ptr programStack, esi
popad
#endif
}
#endif
#elif defined(idx64)
__asm__ volatile(
"movq %5, %%rax\r\n"
@ -1855,7 +1826,7 @@ int VM_CallCompiled(vm_t *vm, int *args)
);
#endif
if(opStack != opStackOnEntry || opStackOfs != 1 || *opStack != 0xDEADBEEF)
if(opStackOfs != 1 || *opStack != 0xDEADBEEF)
{
Com_Error(ERR_DROP, "opStack corrupted in compiled code");
}