* Fix the MinGW and (hopefully) OS X builds
* Remove custom memcpy/memset code
This commit is contained in:
parent
2d9d10772f
commit
6e24cfe7d3
5 changed files with 22 additions and 684 deletions
|
@ -2857,316 +2857,6 @@ void Com_Shutdown (void) {
|
|||
|
||||
}
|
||||
|
||||
#if I_WANT_A_CUSTOM_MEMCPY && !defined(_WIN32)
|
||||
void Com_Memcpy (void* dest, const void* src, const size_t count)
|
||||
{
|
||||
memcpy(dest, src, count);
|
||||
}
|
||||
|
||||
void Com_Memset (void* dest, const int val, const size_t count)
|
||||
{
|
||||
memset(dest, val, count);
|
||||
}
|
||||
|
||||
#elif I_WANT_A_CUSTOM_MEMCPY && defined(_WIN32)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PRE_READ, // prefetch assuming that buffer is used for reading only
|
||||
PRE_WRITE, // prefetch assuming that buffer is used for writing only
|
||||
PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing
|
||||
} e_prefetch;
|
||||
|
||||
void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
|
||||
|
||||
#define EMMS_INSTRUCTION __asm emms
|
||||
|
||||
void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
|
||||
__asm
|
||||
{
|
||||
mov edx,dest
|
||||
mov eax,constant
|
||||
mov ecx,count
|
||||
and ecx,~7
|
||||
jz padding
|
||||
sub ecx,8
|
||||
jmp loopu
|
||||
align 16
|
||||
loopu:
|
||||
test [edx+ecx*4 + 28],ebx // fetch next block destination to L1 cache
|
||||
mov [edx+ecx*4 + 0],eax
|
||||
mov [edx+ecx*4 + 4],eax
|
||||
mov [edx+ecx*4 + 8],eax
|
||||
mov [edx+ecx*4 + 12],eax
|
||||
mov [edx+ecx*4 + 16],eax
|
||||
mov [edx+ecx*4 + 20],eax
|
||||
mov [edx+ecx*4 + 24],eax
|
||||
mov [edx+ecx*4 + 28],eax
|
||||
sub ecx,8
|
||||
jge loopu
|
||||
padding: mov ecx,count
|
||||
mov ebx,ecx
|
||||
and ecx,7
|
||||
jz outta
|
||||
and ebx,~7
|
||||
lea edx,[edx+ebx*4] // advance dest pointer
|
||||
test [edx+0],eax // fetch destination to L1 cache
|
||||
cmp ecx,4
|
||||
jl skip4
|
||||
mov [edx+0],eax
|
||||
mov [edx+4],eax
|
||||
mov [edx+8],eax
|
||||
mov [edx+12],eax
|
||||
add edx,16
|
||||
sub ecx,4
|
||||
skip4: cmp ecx,2
|
||||
jl skip2
|
||||
mov [edx+0],eax
|
||||
mov [edx+4],eax
|
||||
add edx,8
|
||||
sub ecx,2
|
||||
skip2: cmp ecx,1
|
||||
jl outta
|
||||
mov [edx+0],eax
|
||||
outta:
|
||||
}
|
||||
}
|
||||
|
||||
// optimized memory copy routine that handles all alignment
|
||||
// cases and block sizes efficiently
|
||||
void Com_Memcpy (void* dest, const void* src, const size_t count) {
|
||||
Com_Prefetch (src, count, PRE_READ);
|
||||
__asm
|
||||
{
|
||||
push edi
|
||||
push esi
|
||||
mov ecx,count
|
||||
cmp ecx,0 // count = 0 check (just to be on the safe side)
|
||||
je outta
|
||||
mov edx,dest
|
||||
mov ebx,src
|
||||
cmp ecx,32 // padding only?
|
||||
jl padding
|
||||
|
||||
mov edi,ecx
|
||||
and edi,~31 // edi = count&~31
|
||||
sub edi,32
|
||||
|
||||
align 16
|
||||
loopMisAligned:
|
||||
mov eax,[ebx + edi + 0 + 0*8]
|
||||
mov esi,[ebx + edi + 4 + 0*8]
|
||||
mov [edx+edi+0 + 0*8],eax
|
||||
mov [edx+edi+4 + 0*8],esi
|
||||
mov eax,[ebx + edi + 0 + 1*8]
|
||||
mov esi,[ebx + edi + 4 + 1*8]
|
||||
mov [edx+edi+0 + 1*8],eax
|
||||
mov [edx+edi+4 + 1*8],esi
|
||||
mov eax,[ebx + edi + 0 + 2*8]
|
||||
mov esi,[ebx + edi + 4 + 2*8]
|
||||
mov [edx+edi+0 + 2*8],eax
|
||||
mov [edx+edi+4 + 2*8],esi
|
||||
mov eax,[ebx + edi + 0 + 3*8]
|
||||
mov esi,[ebx + edi + 4 + 3*8]
|
||||
mov [edx+edi+0 + 3*8],eax
|
||||
mov [edx+edi+4 + 3*8],esi
|
||||
sub edi,32
|
||||
jge loopMisAligned
|
||||
|
||||
mov edi,ecx
|
||||
and edi,~31
|
||||
add ebx,edi // increase src pointer
|
||||
add edx,edi // increase dst pointer
|
||||
and ecx,31 // new count
|
||||
jz outta // if count = 0, get outta here
|
||||
|
||||
padding:
|
||||
cmp ecx,16
|
||||
jl skip16
|
||||
mov eax,dword ptr [ebx]
|
||||
mov dword ptr [edx],eax
|
||||
mov eax,dword ptr [ebx+4]
|
||||
mov dword ptr [edx+4],eax
|
||||
mov eax,dword ptr [ebx+8]
|
||||
mov dword ptr [edx+8],eax
|
||||
mov eax,dword ptr [ebx+12]
|
||||
mov dword ptr [edx+12],eax
|
||||
sub ecx,16
|
||||
add ebx,16
|
||||
add edx,16
|
||||
skip16:
|
||||
cmp ecx,8
|
||||
jl skip8
|
||||
mov eax,dword ptr [ebx]
|
||||
mov dword ptr [edx],eax
|
||||
mov eax,dword ptr [ebx+4]
|
||||
sub ecx,8
|
||||
mov dword ptr [edx+4],eax
|
||||
add ebx,8
|
||||
add edx,8
|
||||
skip8:
|
||||
cmp ecx,4
|
||||
jl skip4
|
||||
mov eax,dword ptr [ebx] // here 4-7 bytes
|
||||
add ebx,4
|
||||
sub ecx,4
|
||||
mov dword ptr [edx],eax
|
||||
add edx,4
|
||||
skip4: // 0-3 remaining bytes
|
||||
cmp ecx,2
|
||||
jl skip2
|
||||
mov ax,word ptr [ebx] // two bytes
|
||||
cmp ecx,3 // less than 3?
|
||||
mov word ptr [edx],ax
|
||||
jl outta
|
||||
mov al,byte ptr [ebx+2] // last byte
|
||||
mov byte ptr [edx+2],al
|
||||
jmp outta
|
||||
skip2:
|
||||
cmp ecx,1
|
||||
jl outta
|
||||
mov al,byte ptr [ebx]
|
||||
mov byte ptr [edx],al
|
||||
outta:
|
||||
pop esi
|
||||
pop edi
|
||||
}
|
||||
}
|
||||
|
||||
void Com_Memset (void* dest, const int val, const size_t count)
|
||||
{
|
||||
unsigned int fillval;
|
||||
|
||||
if (count < 8)
|
||||
{
|
||||
__asm
|
||||
{
|
||||
mov edx,dest
|
||||
mov eax, val
|
||||
mov ah,al
|
||||
mov ebx,eax
|
||||
and ebx, 0xffff
|
||||
shl eax,16
|
||||
add eax,ebx // eax now contains pattern
|
||||
mov ecx,count
|
||||
cmp ecx,4
|
||||
jl skip4
|
||||
mov [edx],eax // copy first dword
|
||||
add edx,4
|
||||
sub ecx,4
|
||||
skip4: cmp ecx,2
|
||||
jl skip2
|
||||
mov word ptr [edx],ax // copy 2 bytes
|
||||
add edx,2
|
||||
sub ecx,2
|
||||
skip2: cmp ecx,0
|
||||
je skip1
|
||||
mov byte ptr [edx],al // copy single byte
|
||||
skip1:
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
fillval = val;
|
||||
|
||||
fillval = fillval|(fillval<<8);
|
||||
fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern
|
||||
|
||||
_copyDWord ((unsigned int*)(dest),fillval, count/4);
|
||||
|
||||
__asm // padding of 0-3 bytes
|
||||
{
|
||||
mov ecx,count
|
||||
mov eax,ecx
|
||||
and ecx,3
|
||||
jz skipA
|
||||
and eax,~3
|
||||
mov ebx,dest
|
||||
add ebx,eax
|
||||
mov eax,fillval
|
||||
cmp ecx,2
|
||||
jl skipB
|
||||
mov word ptr [ebx],ax
|
||||
cmp ecx,2
|
||||
je skipA
|
||||
mov byte ptr [ebx+2],al
|
||||
jmp skipA
|
||||
skipB:
|
||||
cmp ecx,0
|
||||
je skipA
|
||||
mov byte ptr [ebx],al
|
||||
skipA:
|
||||
}
|
||||
}
|
||||
|
||||
qboolean Com_Memcmp (const void *src0, const void *src1, const unsigned int count)
|
||||
{
|
||||
unsigned int i;
|
||||
// MMX version anyone?
|
||||
|
||||
if (count >= 16)
|
||||
{
|
||||
unsigned int *dw = (unsigned int*)(src0);
|
||||
unsigned int *sw = (unsigned int*)(src1);
|
||||
|
||||
unsigned int nm2 = count/16;
|
||||
for (i = 0; i < nm2; i+=4)
|
||||
{
|
||||
unsigned int tmp = (dw[i+0]-sw[i+0])|(dw[i+1]-sw[i+1])|
|
||||
(dw[i+2]-sw[i+2])|(dw[i+3]-sw[i+3]);
|
||||
if (tmp)
|
||||
return qfalse;
|
||||
}
|
||||
}
|
||||
if (count & 15)
|
||||
{
|
||||
byte *d = (byte*)src0;
|
||||
byte *s = (byte*)src1;
|
||||
for (i = count & 0xfffffff0; i < count; i++)
|
||||
if (d[i]!=s[i])
|
||||
return qfalse;
|
||||
}
|
||||
|
||||
return qtrue;
|
||||
}
|
||||
|
||||
void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
|
||||
{
|
||||
// write buffer prefetching is performed only if
|
||||
// the processor benefits from it. Read and read/write
|
||||
// prefetching is always performed.
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case PRE_WRITE : break;
|
||||
case PRE_READ:
|
||||
case PRE_READ_WRITE:
|
||||
|
||||
__asm
|
||||
{
|
||||
mov ebx,s
|
||||
mov ecx,bytes
|
||||
cmp ecx,4096 // clamp to 4kB
|
||||
jle skipClamp
|
||||
mov ecx,4096
|
||||
skipClamp:
|
||||
add ecx,0x1f
|
||||
shr ecx,5 // number of cache lines
|
||||
jz skip
|
||||
jmp loopie
|
||||
|
||||
align 16
|
||||
loopie: test byte ptr [ebx],al
|
||||
add ebx,32
|
||||
dec ecx
|
||||
jnz loopie
|
||||
skip:
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue