From b3e30dbca04e4230e9071a52d84c9336dab1a3ec Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 30 Sep 2025 01:02:00 +0200 Subject: [PATCH 01/11] cmake: add OpenMP support --- cmake/DaemonFlags.cmake | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cmake/DaemonFlags.cmake b/cmake/DaemonFlags.cmake index 6a77e5a421..9aea2aea82 100644 --- a/cmake/DaemonFlags.cmake +++ b/cmake/DaemonFlags.cmake @@ -259,6 +259,10 @@ if (USE_FLOAT_EXCEPTIONS) add_definitions(-DDAEMON_USE_FLOAT_EXCEPTIONS) endif() +if (NOT NACL AND BUILD_CLIENT) + option(USE_OPENMP "Use OpenMP to parallelize some tasks" OFF) +endif() + if (MSVC) set_c_cxx_flag("/MP") @@ -267,6 +271,14 @@ if (MSVC) set_cxx_flag("/std:c++23preview") endif() + if (NOT NACL AND BUILD_CLIENT AND USE_OPENMP) + try_cxx_flag(OPENMP "/openmp") + + if (NOT FLAG_OPENMP) + message(WARNING "Missing OpenMP") + endif() + endif() + if (USE_FAST_MATH) set_c_cxx_flag("/fp:fast") else() @@ -362,6 +374,14 @@ else() endif() endif() + if (NOT NACL AND BUILD_CLIENT AND USE_OPENMP) + try_cxx_flag(FOPENMP "-fopenmp") + + if (NOT FLAG_FOPENMP) + message(WARNING "Missing OpenMP") + endif() + endif() + if (NACL AND USE_NACL_SAIGO AND SAIGO_ARCH STREQUAL "arm") # Saigo produces broken arm builds when optimizing them. # See: https://github.com/Unvanquished/Unvanquished/issues/3297 From 5a24cb3acef77be64ada59b9d285fabd02e8b6ef Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 30 Sep 2025 01:14:00 +0200 Subject: [PATCH 02/11] framework: add Omp facilities --- src.cmake | 2 + src/engine/framework/OmpSystem.cpp | 104 +++++++++++++++++++++++++++++ src/engine/framework/OmpSystem.h | 41 ++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 src/engine/framework/OmpSystem.cpp create mode 100644 src/engine/framework/OmpSystem.h diff --git a/src.cmake b/src.cmake index 7430a577b9..f5fc4b485b 100644 --- a/src.cmake +++ b/src.cmake @@ -188,6 +188,8 @@ set(ENGINELIST ${ENGINE_DIR}/framework/CvarSystem.h ${ENGINE_DIR}/framework/LogSystem.cpp ${ENGINE_DIR}/framework/LogSystem.h + ${ENGINE_DIR}/framework/OmpSystem.cpp + ${ENGINE_DIR}/framework/OmpSystem.h ${ENGINE_DIR}/framework/Resource.cpp ${ENGINE_DIR}/framework/Resource.h ${ENGINE_DIR}/framework/System.cpp diff --git a/src/engine/framework/OmpSystem.cpp b/src/engine/framework/OmpSystem.cpp new file mode 100644 index 0000000000..2c9776509a --- /dev/null +++ b/src/engine/framework/OmpSystem.cpp @@ -0,0 +1,104 @@ +/* +=========================================================================== +Daemon BSD Source Code +Copyright (c) 2025, Daemon Developers +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +=========================================================================== +*/ + +#include + +#include "CvarSystem.h" +#include "OmpSystem.h" + +#if defined(_OPENMP) +#include "omp.h" +#endif + +#if defined(_OPENMP) +static Cvar::Range> common_ompThreads( + "common.ompThreads", "OpenMP threads", Cvar::NONE, 0, 0, 32 ); +#endif + +namespace Omp { +#if defined(_OPENMP) +static int ompMaxThreads = 1; +#endif + +static int ompThreads = 1; + +static void ReadMaxThreads() +{ +#if defined(_OPENMP) + ompMaxThreads = omp_get_max_threads(); +#endif +} + +void EnlistThreads() +{ +#if defined(_OPENMP) + omp_set_num_threads( ompThreads ); +#endif +} + +void SetupThreads() +{ +#if defined(_OPENMP) + if ( common_ompThreads.Get() ) + { + ompThreads = common_ompThreads.Get(); + return; + } + + if ( ompMaxThreads <= 4 ) + { + ompThreads = ompMaxThreads; + return; + } + + if ( ompMaxThreads <= 16 ) + { + ompThreads = ompMaxThreads - ( ompMaxThreads / 4 ); + return; + } + + ompThreads = 16; +#endif + + EnlistThreads(); +} + +void Init() +{ + ReadMaxThreads(); + SetupThreads(); + EnlistThreads(); +} + +int GetThreads() +{ + return ompThreads; +} +} diff --git a/src/engine/framework/OmpSystem.h b/src/engine/framework/OmpSystem.h new file mode 100644 index 0000000000..4e93f331f2 --- /dev/null +++ b/src/engine/framework/OmpSystem.h @@ -0,0 +1,41 @@ +/* +=========================================================================== +Daemon BSD Source Code +Copyright (c) 2025, Daemon Developers +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +=========================================================================== +*/ + +#ifndef COMMON_OMP_SYSTEM_H_ +#define COMMON_OMP_SYSTEM_H_ + +namespace Omp { +void EnlistThreads(); +void SetupThreads(); +void Init(); +int GetThreads(); +}; + +#endif // COMMON_OMP_SYSTEM_H_ From 0685244208d2eb0892dd3fd20526f06475d929cc Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 30 Sep 2025 01:33:00 +0200 Subject: [PATCH 03/11] engine: initialize Omp --- src/engine/framework/System.cpp | 3 +++ src/engine/qcommon/common.cpp | 3 +++ src/engine/renderer/tr_backend.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/engine/framework/System.cpp b/src/engine/framework/System.cpp index 2d66f2a791..255e6f8ced 100644 --- a/src/engine/framework/System.cpp +++ b/src/engine/framework/System.cpp @@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ConsoleHistory.h" #include "CommandSystem.h" #include "LogSystem.h" +#include "OmpSystem.h" #include "System.h" #include "CrashDump.h" #include "CvarSystem.h" @@ -829,6 +830,8 @@ static void SetCvarsWithInitFlag(cmdlineArgs_t& cmdlineArgs) // Initialize the engine static void Init(int argc, char** argv) { + Omp::Init(); + cmdlineArgs_t cmdlineArgs; #ifdef _WIN32 diff --git a/src/engine/qcommon/common.cpp b/src/engine/qcommon/common.cpp index d6f6609598..d96e7ea368 100644 --- a/src/engine/qcommon/common.cpp +++ b/src/engine/qcommon/common.cpp @@ -47,6 +47,7 @@ Maryland 20850 USA. #include "framework/CommandSystem.h" #include "framework/CvarSystem.h" #include "framework/LogSystem.h" +#include "framework/OmpSystem.h" #include "framework/System.h" #include "sys/sys_events.h" #include @@ -783,6 +784,8 @@ static Cvar::Cvar showTraceStats("common.showTraceStats", "are physics tra void Com_Frame() { + Omp::SetupThreads(); + int msec, minMsec; static int lastTime = 0; //int key; diff --git a/src/engine/renderer/tr_backend.cpp b/src/engine/renderer/tr_backend.cpp index 43f74375a0..72a69ea7fb 100644 --- a/src/engine/renderer/tr_backend.cpp +++ b/src/engine/renderer/tr_backend.cpp @@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // tr_backend.c +#include "framework/OmpSystem.h" #include "tr_local.h" #include "gl_shader.h" #include "Material.h" @@ -3829,6 +3830,8 @@ RB_RenderThread */ void RB_RenderThread() { + Omp::EnlistThreads(); + const void *data; // wait for either a rendering command or a quit command From 0f437d31e945f96d774cab9767d2517f2adb06df Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 30 Sep 2025 05:29:00 +0200 Subject: [PATCH 04/11] tr_surface: prepare Tess_SurfaceIQM() for parallelism --- src/engine/renderer/tr_surface.cpp | 85 +++++++++++++++++------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index d12279d07f..a051b7cc22 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1239,8 +1239,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { float *modelTangent = model->tangents + 3 * firstVertex; float *modelBitangent = model->bitangents + 3 * firstVertex; float *modelTexcoord = model->texcoords + 2 * firstVertex; - shaderVertex_t *tessVertex = tess.verts + tess.numVertexes; - shaderVertex_t *lastVertex = tessVertex + surf->num_vertexes; + shaderVertex_t *modelTessVertex = tess.verts + tess.numVertexes; // Deform the vertices by the lerped bones. if ( model->num_joints > 0 && model->blendWeights && model->blendIndexes ) @@ -1252,33 +1251,36 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex; byte *modelBlendWeight = model->blendWeights + 4 * firstVertex; - for ( ; tessVertex < lastVertex; tessVertex++, - modelPosition += 3, modelNormal += 3, - modelTangent += 3, modelBitangent += 3, - modelTexcoord += 2 ) + for ( size_t i = 0; i < surf->num_vertexes; i++ ) { - vec3_t position = {}; + shaderVertex_t *tessVertex = modelTessVertex + i; + + float *vertexPosition = modelPosition + 3 * i; + float *vertexTexcoord = modelTexcoord + 2 * i; - byte *lastBlendIndex = modelBlendIndex + 4; + byte *blendIndex = modelBlendIndex + 4 * i; + byte *lastBlendIndex = blendIndex + 4; + byte *blendWeight = modelBlendWeight + 4 * i; - for ( ; modelBlendIndex < lastBlendIndex; modelBlendIndex++, - modelBlendWeight++ ) + vec3_t position = {}; + + for ( ; blendIndex < lastBlendIndex; blendIndex++, blendWeight++ ) { - if ( *modelBlendWeight == 0 ) + if ( *blendWeight == 0 ) { continue; } - float weight = *modelBlendWeight * weightFactor; + float weight = *blendWeight * weightFactor; vec3_t tmp; - TransformPoint( &bones[ *modelBlendIndex ], modelPosition, tmp ); + TransformPoint( &bones[ *blendIndex ], vertexPosition, tmp ); VectorMA( position, weight, tmp, position ); } VectorCopy( position, tessVertex->xyz ); - Vector2Copy( modelTexcoord, tessVertex->texCoords ); + Vector2Copy( vertexTexcoord, tessVertex->texCoords ); } } else @@ -1286,36 +1288,42 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex; byte *modelBlendWeight = model->blendWeights + 4 * firstVertex; - for ( ; tessVertex < lastVertex; tessVertex++, - modelPosition += 3, modelNormal += 3, - modelTangent += 3, modelBitangent += 3, - modelTexcoord += 2 ) + for ( size_t i = 0; i < surf->num_vertexes; i++ ) { - vec3_t position = {}, tangent = {}, binormal = {}, normal = {}; + shaderVertex_t *tessVertex = modelTessVertex + i; - byte *lastBlendIndex = modelBlendIndex + 4; + float *vertexPosition = modelPosition + 3 * i; + float *vertexNormal = modelNormal + 3 * i; + float *vertexTangent = modelTangent + 3 * i; + float *vertexBitangent = modelBitangent + 3 * i; + float *vertexTexcoord = modelTexcoord + 2 * i; - for ( ; modelBlendIndex < lastBlendIndex; modelBlendIndex++, - modelBlendWeight++ ) + byte *blendIndex = modelBlendIndex + 4 * i; + byte *lastBlendIndex = blendIndex + 4; + byte *blendWeight = modelBlendWeight + 4 * i; + + vec3_t position = {}, tangent = {}, binormal = {}, normal = {}; + + for ( ; blendIndex < lastBlendIndex; blendIndex++, blendWeight++ ) { - if ( *modelBlendWeight == 0 ) + if ( *blendWeight == 0 ) { continue; } - float weight = *modelBlendWeight * weightFactor; + float weight = *blendWeight * weightFactor; vec3_t tmp; - TransformPoint( &bones[ *modelBlendIndex ], modelPosition, tmp ); + TransformPoint( &bones[ *blendIndex ], vertexPosition, tmp ); VectorMA( position, weight, tmp, position ); - TransformNormalVector( &bones[ *modelBlendIndex ], modelNormal, tmp ); + TransformNormalVector( &bones[ *blendIndex ], vertexNormal, tmp ); VectorMA( normal, weight, tmp, normal ); - TransformNormalVector( &bones[ *modelBlendIndex ], modelTangent, tmp ); + TransformNormalVector( &bones[ *blendIndex ], vertexTangent, tmp ); VectorMA( tangent, weight, tmp, tangent ); - TransformNormalVector( &bones[ *modelBlendIndex ], modelBitangent, tmp ); + TransformNormalVector( &bones[ *blendIndex ], vertexBitangent, tmp ); VectorMA( binormal, weight, tmp, binormal ); } @@ -1326,7 +1334,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { R_TBNtoQtangentsFast( tangent, binormal, normal, tessVertex->qtangents ); - Vector2Copy( modelTexcoord, tessVertex->texCoords ); + Vector2Copy( vertexTexcoord, tessVertex->texCoords ); } } } @@ -1334,16 +1342,21 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { { float scale = model->internalScale * backEnd.currentEntity->e.skeleton.scale; - for ( ; tessVertex < lastVertex; tessVertex++, - modelPosition += 3, modelNormal += 3, - modelTangent += 3, modelBitangent += 3, - modelTexcoord += 2 ) + for ( size_t i = 0; i < surf->num_vertexes; i++ ) { - VectorScale( modelPosition, scale, tessVertex->xyz ); + shaderVertex_t *tessVertex = modelTessVertex + i; + + float *vertexPosition = modelPosition + 3 * i; + float *vertexNormal = modelNormal + 3 * i; + float *vertexTangent = modelTangent + 3 * i; + float *vertexBitangent = modelBitangent + 3 * i; + float *vertexTexcoord = modelTexcoord + 2 * i; + + VectorScale( vertexPosition, scale, tessVertex->xyz ); - R_TBNtoQtangentsFast( modelTangent, modelBitangent, modelNormal, tessVertex->qtangents ); + R_TBNtoQtangentsFast( vertexTangent, vertexBitangent, vertexNormal, tessVertex->qtangents ); - Vector2Copy( modelTexcoord, tessVertex->texCoords ); + Vector2Copy( vertexTexcoord, tessVertex->texCoords ); } } From 85176268ad83161df01bb85e54e38245f4d2a142 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Tue, 30 Sep 2025 05:39:00 +0200 Subject: [PATCH 05/11] tr_surface: parallelize Tess_SurfaceIQM() --- src/engine/renderer/tr_surface.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index a051b7cc22..af94e89cbe 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1251,6 +1251,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex; byte *modelBlendWeight = model->blendWeights + 4 * firstVertex; + #pragma omp parallel for for ( size_t i = 0; i < surf->num_vertexes; i++ ) { shaderVertex_t *tessVertex = modelTessVertex + i; @@ -1288,6 +1289,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex; byte *modelBlendWeight = model->blendWeights + 4 * firstVertex; + #pragma omp parallel for for ( size_t i = 0; i < surf->num_vertexes; i++ ) { shaderVertex_t *tessVertex = modelTessVertex + i; @@ -1342,6 +1344,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) { { float scale = model->internalScale * backEnd.currentEntity->e.skeleton.scale; + #pragma omp parallel for for ( size_t i = 0; i < surf->num_vertexes; i++ ) { shaderVertex_t *tessVertex = modelTessVertex + i; From 3fe43a32cb92cee24eefede306c4d236032cb4f3 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Thu, 2 Oct 2025 02:49:00 +0200 Subject: [PATCH 06/11] tr_surface: prepare Tess_SurfaceMD5() for parallelism --- src/engine/renderer/tr_surface.cpp | 63 ++++++++++++++++-------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index af94e89cbe..7a0347c3c3 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1051,66 +1051,69 @@ static void Tess_SurfaceMD5( md5Surface_t *srf ) tessIndex[ 2 ] = tess.numVertexes + surfaceTriangle->indexes[ 2 ]; } - shaderVertex_t *tessVertex = tess.verts + tess.numVertexes; - shaderVertex_t *lastVertex = tessVertex + srf->numVerts; + shaderVertex_t *modelTessVertex = tess.verts + tess.numVertexes; // Deform the vertices by the lerped bones. if ( tess.skipTangents ) { - for ( ; tessVertex < lastVertex; tessVertex++, - surfaceVertex++ ) + for ( size_t i = 0; i < srf->numVerts; i++ ) { - vec3_t position = {}; + shaderVertex_t *tessVertex = modelTessVertex + i; + md5Vertex_t *vertex = surfaceVertex + i; - float *boneWeight = surfaceVertex->boneWeights; - float *lastWeight = boneWeight + surfaceVertex->numWeights; - uint32_t *boneIndex = surfaceVertex->boneIndexes; - vec4_t *surfacePosition = &surfaceVertex->position; + vec4_t *vertexPosition = &vertex->position; + + float *boneWeight = vertex->boneWeights; + float *lastWeight = boneWeight + vertex->numWeights; + uint32_t *boneIndex = vertex->boneIndexes; + + vec3_t position = {}; - for ( ; boneWeight < lastWeight; boneWeight++, - boneIndex++ ) + for ( ; boneWeight < lastWeight; boneWeight++, boneIndex++ ) { vec3_t tmp; - TransformPoint( &bones[ *boneIndex ], *surfacePosition, tmp ); + TransformPoint( &bones[ *boneIndex ], *vertexPosition, tmp ); VectorMA( position, *boneWeight, tmp, position ); } VectorCopy( position, tessVertex->xyz ); - Vector2Copy( surfaceVertex->texCoords, tessVertex->texCoords ); + Vector2Copy( vertex->texCoords, tessVertex->texCoords ); } } else { - for ( ; tessVertex < lastVertex; tessVertex++, - surfaceVertex++ ) + for ( size_t i = 0; i < srf->numVerts; i++ ) { - vec3_t tangent = {}, binormal = {}, normal = {}, position = {}; + shaderVertex_t *tessVertex = modelTessVertex + i; + md5Vertex_t *vertex = surfaceVertex + i; + + vec4_t *vertexPosition = &vertex->position; + vec4_t *vertexNormal = &vertex->normal; + vec4_t *vertexTangent = &vertex->tangent; + vec4_t *vertexBinormal = &vertex->binormal; - float *boneWeight = surfaceVertex->boneWeights; - float *lastWeight = boneWeight + surfaceVertex->numWeights; - uint32_t *boneIndex = surfaceVertex->boneIndexes; - vec4_t *surfacePosition = &surfaceVertex->position; - vec4_t *surfaceNormal = &surfaceVertex->normal; - vec4_t *surfaceTangent = &surfaceVertex->tangent; - vec4_t *surfaceBinormal = &surfaceVertex->binormal; + float *boneWeight = vertex->boneWeights; + float *lastWeight = boneWeight + vertex->numWeights; + uint32_t *boneIndex = vertex->boneIndexes; + + vec3_t tangent = {}, binormal = {}, normal = {}, position = {}; - for ( ; boneWeight < lastWeight; boneWeight++, - boneIndex++ ) + for ( ; boneWeight < lastWeight; boneWeight++, boneIndex++ ) { vec3_t tmp; - TransformPoint( &bones[ *boneIndex ], *surfacePosition, tmp ); + TransformPoint( &bones[ *boneIndex ], *vertexPosition, tmp ); VectorMA( position, *boneWeight, tmp, position ); - TransformNormalVector( &bones[ *boneIndex ], *surfaceNormal, tmp ); + TransformNormalVector( &bones[ *boneIndex ], *vertexNormal, tmp ); VectorMA( normal, *boneWeight, tmp, normal ); - TransformNormalVector( &bones[ *boneIndex ], *surfaceTangent, tmp ); + TransformNormalVector( &bones[ *boneIndex ], *vertexTangent, tmp ); VectorMA( tangent, *boneWeight, tmp, tangent ); - TransformNormalVector( &bones[ *boneIndex ], *surfaceBinormal, tmp ); + TransformNormalVector( &bones[ *boneIndex ], *vertexBinormal, tmp ); VectorMA( binormal, *boneWeight, tmp, binormal ); } @@ -1121,7 +1124,7 @@ static void Tess_SurfaceMD5( md5Surface_t *srf ) R_TBNtoQtangentsFast( tangent, binormal, normal, tessVertex->qtangents ); - Vector2Copy( surfaceVertex->texCoords, tessVertex->texCoords ); + Vector2Copy( vertex->texCoords, tessVertex->texCoords ); } } From 90632efc378fccf8a62337c9c7a1ff060558c615 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Thu, 2 Oct 2025 02:59:00 +0200 Subject: [PATCH 07/11] tr_surface: parallelize Tess_SurfaceMD5() --- src/engine/renderer/tr_surface.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index 7a0347c3c3..120543dc52 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1056,6 +1056,7 @@ static void Tess_SurfaceMD5( md5Surface_t *srf ) // Deform the vertices by the lerped bones. if ( tess.skipTangents ) { + #pragma omp parallel for for ( size_t i = 0; i < srf->numVerts; i++ ) { shaderVertex_t *tessVertex = modelTessVertex + i; @@ -1084,6 +1085,7 @@ static void Tess_SurfaceMD5( md5Surface_t *srf ) } else { + #pragma omp parallel for for ( size_t i = 0; i < srf->numVerts; i++ ) { shaderVertex_t *tessVertex = modelTessVertex + i; From 344f02e65d431a62d0c06a2236adae5e4cec78c5 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Fri, 3 Oct 2025 03:15:44 +0200 Subject: [PATCH 08/11] tr_init: report OpenMP support in /gfxinfo --- src/engine/renderer/tr_init.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/engine/renderer/tr_init.cpp b/src/engine/renderer/tr_init.cpp index 7d4b90a3a5..e9730d49cd 100644 --- a/src/engine/renderer/tr_init.cpp +++ b/src/engine/renderer/tr_init.cpp @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // tr_init.c -- functions that are not called every frame #include "tr_local.h" #include "framework/CvarSystem.h" +#include "framework/OmpSystem.h" #include "DetectGLVendors.h" #include "Material.h" #include "GeometryCache.h" @@ -1057,6 +1058,21 @@ ScreenshotCmd screenshotPNGRegistration("screenshotPNG", ssFormat_t::SSF_PNG, "p Log::Notice("Using dual processor acceleration." ); } +#if defined(_OPENMP) + int ompThreads = Omp::GetThreads(); + + if ( ompThreads == 1 ) + { + Log::Notice("%sNot using OpenMP parallelism: only one thread.", Color::ToString( Color::Red ) ); + } + else + { + Log::Notice("%sUsing OpenMP parallelism with %d threads.", Color::ToString( Color::Green ), ompThreads ); + } +#else + Log::Notice("%sNot using OpenMP parallelism: unavailable.", Color::ToString( Color::Red ) ); +#endif + if ( r_finish->integer ) { Log::Notice("Forcing glFinish." ); From d971703db9b18b464b867dc1646f39d94ace997d Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Wed, 7 Jan 2026 21:44:29 +0100 Subject: [PATCH 09/11] cmake: remove unused CompileFeatures --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 35cf7fa5f9..36b65e869f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -846,7 +846,7 @@ endmacro() function(AddApplication) set(oneValueArgs Target ExecutableName) - set(multiValueArgs ApplicationMain Definitions Flags CompileFeatures Files Libs Tests) + set(multiValueArgs ApplicationMain Definitions Flags Files Libs Tests) cmake_parse_arguments(A "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) # Reuse object files between the real application and the test one From 444d3930821ed4a0dac0220237bff075cc7ddc1f Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Wed, 7 Jan 2026 21:01:42 +0100 Subject: [PATCH 10/11] cmake: make AddApplication() able to pass linker flags --- CMakeLists.txt | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 36b65e869f..14b5f270c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -827,7 +827,8 @@ macro(AddApplicationInternal Target Executable) add_dependencies(${Target} runtime_deps) endif() - set_property(TARGET ${Target} APPEND PROPERTY COMPILE_OPTIONS ${A_Flags}) + set_property(TARGET ${Target} APPEND PROPERTY COMPILE_OPTIONS ${A_CompileFlags}) + set_property(TARGET ${Target} APPEND PROPERTY LINK_OPTIONS ${A_LinkFlags}) set_property(TARGET ${Target} APPEND PROPERTY INCLUDE_DIRECTORIES ${ENGINE_DIR} ${MOUNT_DIR} ${LIB_DIR}) set_property(TARGET ${Target} APPEND PROPERTY COMPILE_DEFINITIONS ${A_Definitions}) set_target_properties(${Target} PROPERTIES OUTPUT_NAME "${Executable}" PREFIX "" FOLDER "engine") @@ -846,13 +847,14 @@ endmacro() function(AddApplication) set(oneValueArgs Target ExecutableName) - set(multiValueArgs ApplicationMain Definitions Flags Files Libs Tests) + set(multiValueArgs ApplicationMain Definitions CompileFlags LinkFlags Files Libs Tests) cmake_parse_arguments(A "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) # Reuse object files between the real application and the test one add_library(${A_Target}-objects OBJECT EXCLUDE_FROM_ALL ${A_Files} ${PCH_FILE}) target_link_libraries(${A_Target}-objects engine-lib ${A_Libs} ${LIBS_BASE} ${CPP23SupportLibrary}) - set_property(TARGET ${A_Target}-objects APPEND PROPERTY COMPILE_OPTIONS ${A_Flags}) + set_property(TARGET ${A_Target}-objects APPEND PROPERTY COMPILE_OPTIONS ${A_CompileFlags}) + set_property(TARGET ${A_Target}-objects APPEND PROPERTY LINK_OPTIONS ${A_LinkFlags}) set_property(TARGET ${A_Target}-objects APPEND PROPERTY INCLUDE_DIRECTORIES ${ENGINE_DIR} ${MOUNT_DIR} ${LIB_DIR}) set_property(TARGET ${A_Target}-objects APPEND PROPERTY COMPILE_DEFINITIONS ${A_Definitions}) @@ -893,7 +895,7 @@ if (NOT NACL) ExecutableName dummyapp Definitions USELESS_DEFINITION_TO_AVOID_PCH_ISSUE ApplicationMain ${ENGINE_DIR}/null/NullApplication.cpp - Flags ${WARNINGS} + CompileFlags ${WARNINGS} Files ${COMMON_DIR}/Util.h # must be nonempty Tests ${ENGINETESTLIST} ) @@ -920,7 +922,7 @@ if (BUILD_CLIENT) ExecutableName ${CLIENT_EXECUTABLE_NAME} ApplicationMain ${ENGINE_DIR}/client/ClientApplication.cpp Definitions ${Definitions} - Flags ${WARNINGS} + CompileFlags ${WARNINGS} Files ${WIN_RC} ${BUILDINFOLIST} ${QCOMMONLIST} ${SERVERLIST} ${CLIENTBASELIST} ${CLIENTLIST} Libs ${LIBS_CLIENT} ${LIBS_CLIENTBASE} ${LIBS_ENGINE} Tests ${CLIENTTESTLIST} @@ -938,7 +940,7 @@ if (BUILD_SERVER) ExecutableName daemonded ApplicationMain ${ENGINE_DIR}/server/ServerApplication.cpp Definitions BUILD_ENGINE BUILD_SERVER - Flags ${WARNINGS} + CompileFlags ${WARNINGS} Files ${WIN_RC} ${BUILDINFOLIST} ${QCOMMONLIST} ${SERVERLIST} ${DEDSERVERLIST} Libs ${LIBS_ENGINE} Tests ${ENGINETESTLIST} @@ -951,7 +953,7 @@ if (BUILD_TTY_CLIENT) ExecutableName daemon-tty ApplicationMain ${ENGINE_DIR}/client/ClientApplication.cpp Definitions BUILD_ENGINE BUILD_TTY_CLIENT - Flags ${WARNINGS} + CompileFlags ${WARNINGS} Files ${WIN_RC} ${BUILDINFOLIST} ${QCOMMONLIST} ${SERVERLIST} ${CLIENTBASELIST} ${TTYCLIENTLIST} Libs ${LIBS_CLIENTBASE} ${LIBS_ENGINE} Tests ${ENGINETESTLIST} From 0caff873bfc3d96e7666b24e5088d3066fdb7825 Mon Sep 17 00:00:00 2001 From: Thomas Debesse Date: Wed, 7 Jan 2026 20:55:46 +0100 Subject: [PATCH 11/11] cmake: only link the graphical client with OpenMP --- CMakeLists.txt | 5 +++-- cmake/DaemonFlags.cmake | 12 ++++++++---- src.cmake | 15 +++++++++++++-- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 14b5f270c7..7a6c43474d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -896,7 +896,7 @@ if (NOT NACL) Definitions USELESS_DEFINITION_TO_AVOID_PCH_ISSUE ApplicationMain ${ENGINE_DIR}/null/NullApplication.cpp CompileFlags ${WARNINGS} - Files ${COMMON_DIR}/Util.h # must be nonempty + Files ${DUMMYAPPLIST} Tests ${ENGINETESTLIST} ) endif() @@ -922,7 +922,8 @@ if (BUILD_CLIENT) ExecutableName ${CLIENT_EXECUTABLE_NAME} ApplicationMain ${ENGINE_DIR}/client/ClientApplication.cpp Definitions ${Definitions} - CompileFlags ${WARNINGS} + CompileFlags ${WARNINGS};${OPENMP_FLAG} + LinkFlags ${OPENMP_FLAG} Files ${WIN_RC} ${BUILDINFOLIST} ${QCOMMONLIST} ${SERVERLIST} ${CLIENTBASELIST} ${CLIENTLIST} Libs ${LIBS_CLIENT} ${LIBS_CLIENTBASE} ${LIBS_ENGINE} Tests ${CLIENTTESTLIST} diff --git a/cmake/DaemonFlags.cmake b/cmake/DaemonFlags.cmake index 9aea2aea82..a4efd83e72 100644 --- a/cmake/DaemonFlags.cmake +++ b/cmake/DaemonFlags.cmake @@ -272,9 +272,11 @@ if (MSVC) endif() if (NOT NACL AND BUILD_CLIENT AND USE_OPENMP) - try_cxx_flag(OPENMP "/openmp") + check_CXX_compiler_flag("/openmp" FLAG_OPENMP) - if (NOT FLAG_OPENMP) + if (FLAG_OPENMP) + set(OPENMP_FLAG "/openmp") + else() message(WARNING "Missing OpenMP") endif() endif() @@ -375,9 +377,11 @@ else() endif() if (NOT NACL AND BUILD_CLIENT AND USE_OPENMP) - try_cxx_flag(FOPENMP "-fopenmp") + check_CXX_compiler_flag("-fopenmp" FLAG_FOPENMP) - if (NOT FLAG_FOPENMP) + if (FLAG_FOPENMP) + set(OPENMP_FLAG "-fopenmp") + else() message(WARNING "Missing OpenMP") endif() endif() diff --git a/src.cmake b/src.cmake index f5fc4b485b..115970b241 100644 --- a/src.cmake +++ b/src.cmake @@ -188,8 +188,6 @@ set(ENGINELIST ${ENGINE_DIR}/framework/CvarSystem.h ${ENGINE_DIR}/framework/LogSystem.cpp ${ENGINE_DIR}/framework/LogSystem.h - ${ENGINE_DIR}/framework/OmpSystem.cpp - ${ENGINE_DIR}/framework/OmpSystem.h ${ENGINE_DIR}/framework/Resource.cpp ${ENGINE_DIR}/framework/Resource.h ${ENGINE_DIR}/framework/System.cpp @@ -209,6 +207,11 @@ set(ENGINELIST ${ENGINE_DIR}/RefAPI.h ) +set(OMPLIST + ${ENGINE_DIR}/framework/OmpSystem.cpp + ${ENGINE_DIR}/framework/OmpSystem.h +) + if (WIN32) set(ENGINELIST ${ENGINELIST} ${ENGINE_DIR}/sys/con_passive.cpp @@ -273,6 +276,7 @@ set(CLIENTBASELIST ) set(CLIENTLIST + ${OMPLIST} ${ENGINE_DIR}/audio/ALObjects.cpp ${ENGINE_DIR}/audio/ALObjects.h ${ENGINE_DIR}/audio/Audio.cpp @@ -306,6 +310,7 @@ set(CLIENTTESTLIST ${ENGINETESTLIST} ) set(TTYCLIENTLIST + ${OMPLIST} ${ENGINE_DIR}/null/NullAudio.cpp ${ENGINE_DIR}/null/NullKeyboard.cpp ${ENGINE_DIR}/null/null_input.cpp @@ -313,9 +318,15 @@ set(TTYCLIENTLIST ) set(DEDSERVERLIST + ${OMPLIST} ${ENGINE_DIR}/null/NullKeyboard.cpp ${ENGINE_DIR}/null/null_client.cpp ${ENGINE_DIR}/null/null_input.cpp ) +set(DUMMYAPPLIST + ${OMPLIST} + ${COMMON_DIR}/Util.h +) + set(WIN_RC ${ENGINE_DIR}/sys/windows-resource/icon.rc)