OpenGL2: Add GPU vertex skinning for IQM models

Using GPU vertex skinning is significantly faster than CPU vertex
skinning. Especially since OpenGL2 has to run R_VaoPackNormal() and
R_VaoPackTangent() each vertex each frame which causes CPU vertex
skinning to be significantly slower than OpenGL1 renderer.
This commit is contained in:
Zack Middleton 2018-07-27 17:40:25 -05:00
parent cccd283be8
commit 11337c9fa2
10 changed files with 533 additions and 34 deletions

View file

@ -916,6 +916,170 @@ qboolean R_LoadIQM( model_t *mod, void *buffer, int filesize, const char *mod_na
}
}
// Create VAO surfaces
if ( iqmData->num_surfaces && iqmData->num_joints <= glRefConfig.glslMaxAnimatedBones )
{
srfVaoIQModel_t *vaoSurf;
srfIQModel_t *surf;
iqmData->numVaoSurfaces = iqmData->num_surfaces;
iqmData->vaoSurfaces = ri.Hunk_Alloc(sizeof(*iqmData->vaoSurfaces) * iqmData->numVaoSurfaces, h_low);
vaoSurf = iqmData->vaoSurfaces;
surf = iqmData->surfaces;
for (i = 0; i < iqmData->num_surfaces; i++, vaoSurf++, surf++)
{
uint32_t offset_xyz, offset_st, offset_normal, offset_tangent;
uint32_t offset_blendindexes, offset_blendweights, stride;
uint32_t dataSize, dataOfs;
uint8_t *data;
glIndex_t indexes[SHADER_MAX_INDEXES];
glIndex_t *ptr;
int *tri;
offset_xyz = 0;
offset_st = offset_xyz + sizeof(float) * 3;
offset_normal = offset_st + sizeof(float) * 2;
offset_tangent = offset_normal + sizeof(int16_t) * 4;
if ( iqmData->num_joints )
{
offset_blendindexes = offset_tangent + sizeof(int16_t) * 4;
offset_blendweights = offset_blendindexes + sizeof(byte) * 4;
if ( vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_FLOAT ) {
stride = offset_blendweights + sizeof(float) * 4;
} else {
stride = offset_blendweights + sizeof(byte) * 4;
}
}
else
{
stride = offset_tangent + sizeof(int16_t) * 4;
}
dataSize = surf->num_vertexes * stride;
data = ri.Malloc(dataSize);
dataOfs = 0;
for ( j = 0; j < surf->num_vertexes; j++ )
{
int vtx = surf->first_vertex + j;
// xyz
memcpy(data + dataOfs, &iqmData->positions[vtx*3], sizeof(float) * 3);
dataOfs += sizeof(float) * 3;
// st
memcpy(data + dataOfs, &iqmData->texcoords[vtx*2], sizeof(float) * 2);
dataOfs += sizeof(float) * 2;
// normal
R_VaoPackNormal((int16_t*)(data + dataOfs), &iqmData->normals[vtx*3]);
dataOfs += sizeof(int16_t) * 4;
// tangent
R_VaoPackTangent((int16_t*)(data + dataOfs), &iqmData->tangents[vtx*4]);
dataOfs += sizeof(int16_t) * 4;
if ( iqmData->num_joints )
{
// blendindexes
memcpy(data + dataOfs, &blendIndexes[vtx*4], sizeof(byte) * 4);
dataOfs += sizeof(byte) * 4;
// blendweights
if ( vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_FLOAT ) {
memcpy(data + dataOfs, &blendWeights.f[vtx*4], sizeof(float) * 4);
dataOfs += sizeof(float) * 4;
} else {
memcpy(data + dataOfs, &blendWeights.b[vtx*4], sizeof(byte) * 4);
dataOfs += sizeof(byte) * 4;
}
}
}
tri = iqmData->triangles + 3 * surf->first_triangle;
ptr = indexes;
for( j = 0; j < surf->num_triangles; j++ ) {
*ptr++ = (*tri++ - surf->first_vertex);
*ptr++ = (*tri++ - surf->first_vertex);
*ptr++ = (*tri++ - surf->first_vertex);
}
vaoSurf->surfaceType = SF_VAO_IQM;
vaoSurf->iqmData = iqmData;
vaoSurf->iqmSurface = surf;
vaoSurf->numIndexes = surf->num_triangles * 3;
vaoSurf->numVerts = surf->num_vertexes;
vaoSurf->vao = R_CreateVao(va("staticIQMMesh_VAO '%s'", surf->name), data, dataSize, (byte *)indexes, surf->num_triangles * 3 * sizeof(indexes[0]), VAO_USAGE_STATIC);
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_TEXCOORD].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_NORMAL ].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_TANGENT ].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].count = 3;
vaoSurf->vao->attribs[ATTR_INDEX_TEXCOORD].count = 2;
vaoSurf->vao->attribs[ATTR_INDEX_NORMAL ].count = 4;
vaoSurf->vao->attribs[ATTR_INDEX_TANGENT ].count = 4;
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].type = GL_FLOAT;
vaoSurf->vao->attribs[ATTR_INDEX_TEXCOORD].type = GL_FLOAT;
vaoSurf->vao->attribs[ATTR_INDEX_NORMAL ].type = GL_SHORT;
vaoSurf->vao->attribs[ATTR_INDEX_TANGENT ].type = GL_SHORT;
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].normalized = GL_FALSE;
vaoSurf->vao->attribs[ATTR_INDEX_TEXCOORD].normalized = GL_FALSE;
vaoSurf->vao->attribs[ATTR_INDEX_NORMAL ].normalized = GL_TRUE;
vaoSurf->vao->attribs[ATTR_INDEX_TANGENT ].normalized = GL_TRUE;
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].offset = offset_xyz;
vaoSurf->vao->attribs[ATTR_INDEX_TEXCOORD].offset = offset_st;
vaoSurf->vao->attribs[ATTR_INDEX_NORMAL ].offset = offset_normal;
vaoSurf->vao->attribs[ATTR_INDEX_TANGENT ].offset = offset_tangent;
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].stride = stride;
vaoSurf->vao->attribs[ATTR_INDEX_TEXCOORD].stride = stride;
vaoSurf->vao->attribs[ATTR_INDEX_NORMAL ].stride = stride;
vaoSurf->vao->attribs[ATTR_INDEX_TANGENT ].stride = stride;
if ( iqmData->num_joints )
{
vaoSurf->vao->attribs[ATTR_INDEX_BONE_INDEXES].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].enabled = 1;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_INDEXES].count = 4;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].count = 4;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_INDEXES].type = GL_UNSIGNED_BYTE;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_INDEXES].normalized = GL_FALSE;
if ( vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_FLOAT ) {
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].type = GL_FLOAT;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].normalized = GL_FALSE;
} else {
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].type = GL_UNSIGNED_BYTE;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].normalized = GL_TRUE;
}
vaoSurf->vao->attribs[ATTR_INDEX_BONE_INDEXES].offset = offset_blendindexes;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].offset = offset_blendweights;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_INDEXES].stride = stride;
vaoSurf->vao->attribs[ATTR_INDEX_BONE_WEIGHTS].stride = stride;
}
Vao_SetVertexPointers(vaoSurf->vao);
ri.Free(data);
}
}
return qtrue;
}
@ -1017,6 +1181,7 @@ Add all surfaces of this model
void R_AddIQMSurfaces( trRefEntity_t *ent ) {
iqmData_t *data;
srfIQModel_t *surface;
void *drawSurf;
int i, j;
qboolean personalModel;
int cull;
@ -1097,6 +1262,12 @@ void R_AddIQMSurfaces( trRefEntity_t *ent ) {
shader = surface->shader;
}
if ( data->numVaoSurfaces ) {
drawSurf = &data->vaoSurfaces[i];
} else {
drawSurf = surface;
}
// we will add shadows even if the main object isn't visible in the view
// stencil shadows can't do personal models unless I polyhedron clip
@ -1105,7 +1276,7 @@ void R_AddIQMSurfaces( trRefEntity_t *ent ) {
&& fogNum == 0
&& !(ent->e.renderfx & ( RF_NOSHADOW | RF_DEPTHHACK ) )
&& shader->sort == SS_OPAQUE ) {
R_AddDrawSurf( (void *)surface, tr.shadowShader, 0, 0, 0, 0 );
R_AddDrawSurf( drawSurf, tr.shadowShader, 0, 0, 0, 0 );
}
// projection shadows work fine with personal models
@ -1113,11 +1284,11 @@ void R_AddIQMSurfaces( trRefEntity_t *ent ) {
&& fogNum == 0
&& (ent->e.renderfx & RF_SHADOW_PLANE )
&& shader->sort == SS_OPAQUE ) {
R_AddDrawSurf( (void *)surface, tr.projectionShadowShader, 0, 0, 0, 0 );
R_AddDrawSurf( drawSurf, tr.projectionShadowShader, 0, 0, 0, 0 );
}
if( !personalModel ) {
R_AddDrawSurf( (void *)surface, shader, fogNum, 0, 0, cubemapIndex );
R_AddDrawSurf( drawSurf, shader, fogNum, 0, 0, cubemapIndex );
}
surface++;
@ -1418,6 +1589,73 @@ void RB_IQMSurfaceAnim( surfaceType_t *surface ) {
tess.numVertexes += surf->num_vertexes;
}
/*
=================
RB_IQMSurfaceAnimVao
=================
*/
void RB_IQMSurfaceAnimVao(srfVaoIQModel_t * surface)
{
iqmData_t *data = surface->iqmData;
if (ShaderRequiresCPUDeforms(tess.shader))
{
RB_IQMSurfaceAnim((surfaceType_t*)surface->iqmSurface);
return;
}
if(!surface->vao)
return;
//RB_CheckVao(surface->vao);
RB_EndSurface();
RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
R_BindVao(surface->vao);
tess.useInternalVao = qfalse;
tess.numIndexes = surface->numIndexes;
tess.numVertexes = surface->numVerts;
glState.boneAnimation = data->num_poses;
if ( glState.boneAnimation ) {
float jointMats[IQM_MAX_JOINTS * 12];
int frame = data->num_frames ? backEnd.currentEntity->e.frame % data->num_frames : 0;
int oldframe = data->num_frames ? backEnd.currentEntity->e.oldframe % data->num_frames : 0;
float backlerp = backEnd.currentEntity->e.backlerp;
int i;
// compute interpolated joint matrices
ComputePoseMats( surface->iqmData, frame, oldframe, backlerp, jointMats );
// convert row-major order 3x4 matrix to column-major order 4x4 matrix
for ( i = 0; i < data->num_poses; i++ ) {
glState.boneMatrix[i][0] = jointMats[i*12+0];
glState.boneMatrix[i][1] = jointMats[i*12+4];
glState.boneMatrix[i][2] = jointMats[i*12+8];
glState.boneMatrix[i][3] = 0.0f;
glState.boneMatrix[i][4] = jointMats[i*12+1];
glState.boneMatrix[i][5] = jointMats[i*12+5];
glState.boneMatrix[i][6] = jointMats[i*12+9];
glState.boneMatrix[i][7] = 0.0f;
glState.boneMatrix[i][8] = jointMats[i*12+2];
glState.boneMatrix[i][9] = jointMats[i*12+6];
glState.boneMatrix[i][10] = jointMats[i*12+10];
glState.boneMatrix[i][11] = 0.0f;
glState.boneMatrix[i][12] = jointMats[i*12+3];
glState.boneMatrix[i][13] = jointMats[i*12+7];
glState.boneMatrix[i][14] = jointMats[i*12+11];
glState.boneMatrix[i][15] = 1.0f;
}
}
RB_EndSurface();
glState.boneAnimation = 0;
}
int R_IQMLerpTag( orientation_t *tag, iqmData_t *data,
int startFrame, int endFrame,
float frac, const char *tagName ) {