diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bc4e69b0..11ec47ca 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,7 +17,7 @@ variables: # # Pick a pipeline on https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/ # - MESA_PIPELINE_ID: 1622883 + MESA_PIPELINE_ID: 1646089 MESA_PROJECT_PATH: mesa/mesa S3_JWT_FILE: /s3_jwt @@ -59,7 +59,7 @@ include: # IMPORTANT: Use a recent Mesa Git revision # The commit ref must be in sync with the pipeline picked above # It can be found on the pipeline page below the commit message - ref: 7fc6af99ea4ef86e96b098dcd29a52d33fdd915f + ref: cc4492204894c0488e96a93f51b546345ae32f88 file: - '/.gitlab-ci/image-tags.yml' diff --git a/.gitlab-ci/expectations/virt/traces-virgl.yml b/.gitlab-ci/expectations/virt/traces-virgl.yml index c50de53a..7ce54d5a 100644 --- a/.gitlab-ci/expectations/virt/traces-virgl.yml +++ b/.gitlab-ci/expectations/virt/traces-virgl.yml @@ -18,7 +18,7 @@ traces: checksum: 7691e87a06e5c8baf9e0c0ca6381775b 0ad/0ad-v2.trace: gl-virgl: - checksum: 6379222413b517861b70e2550337795e + checksum: 9c91dc2a05e140f8233e7f5f3a4b4f11 gputest/gimark-v2.trace: gl-virgl: label: [crash] @@ -30,7 +30,7 @@ traces: checksum: 837b834310e2e6bd784e3202c6c0bed7 gputest/pixmark-volplosion-v2.trace: gl-virgl: - checksum: d006b8b713db2dc0a968d95dd4ff10b7 + checksum: bcaa3938ca6cdc3a6ac0f4da518ff9a9 gputest/plot3d-v2.trace: gl-virgl: checksum: c4a35b0635bbe3fd6693c33814ff5059 @@ -45,7 +45,7 @@ traces: checksum: fd4fadc9c537e0f10f82c767fc95ff5e humus/DynamicBranching3-v2.trace: gl-virgl: - checksum: b96191174fc9487a8b399fad734cd2e7 + checksum: 401178849862615598c7962f0d497e20 humus/HDR-v2.trace: gl-virgl: checksum: a81ae5282a5d2612f05fc4da2c84e7a0 @@ -81,10 +81,10 @@ traces: checksum: 67161dca3527e5769bc02d84bb9a7bce supertuxkart/supertuxkart-mansion-egl-gles-v2.trace: gl-virgl: - checksum: 1f3b873a320587256d7392ddc8162ae2 + checksum: 0809d2721da25b08f71ab361dac75a7d xonotic/xonotic-keybench-high-v2.trace: gl-virgl: - checksum: 2d2f319615062eb7cc10dfa8d7a66333 + checksum: dc84733cd92584b50dcb7f6b9e57ccce valve/counterstrike-v2.trace: gl-virgl: checksum: 0d32696d18e2cea024a11c252e059002 @@ -94,10 +94,10 @@ traces: label: [skip, flakes] valve/half-life-2-v2.trace: gl-virgl: - checksum: 4299337c370be2523421d17cab387778 + checksum: f88af44c333473dd37661a3da0708226 valve/portal-2-v2.trace: gl-virgl: - checksum: 8974f30da49259a3e5f9ff268e366787 + checksum: 6a793fde53b7d43e28805cfbd48996fa supertuxkart/supertuxkart-antediluvian-abyss.rdc: gl-virgl: label: [crash] @@ -109,7 +109,7 @@ traces: label: [crash] godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc: gl-virgl: - checksum: 9855a7ccdab2cd66a59964aba43bf7df + checksum: 7ba4f0c52a719d94b805653d31ce22d7 ror/ror-default.trace: gl-virgl: label: [crash] diff --git a/.gitlab-ci/expectations/virt/virgl-gles-fails.txt b/.gitlab-ci/expectations/virt/virgl-gles-fails.txt index 791ccde6..8bde4b6f 100644 --- a/.gitlab-ci/expectations/virt/virgl-gles-fails.txt +++ b/.gitlab-ci/expectations/virt/virgl-gles-fails.txt @@ -3097,6 +3097,8 @@ KHR-GL30.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singles KHR-GL30.texture_lod_bias.texture_lod_bias_all,Fail KHR-GL31.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit,Fail KHR-GL32.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit,Fail +KHR-GL32.packed_depth_stencil.blit.depth24_stencil8,Fail +KHR-GL32.packed_depth_stencil.blit.depth32f_stencil8,Fail KHR-GL32.texture_lod_bias.texture_lod_bias_all,Fail spec@!opengl 1.1@depthstencil-default_fb-blit samples=6,Fail spec@!opengl 1.1@depthstencil-default_fb-blit samples=8,Fail diff --git a/meson.build b/meson.build index 431c5f4f..2bf8397d 100644 --- a/meson.build +++ b/meson.build @@ -54,6 +54,9 @@ if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.1') error('When using GCC, version 4.1 or later is required.') endif +with_host_windows = host_machine.system() == 'windows' +with_host_darwin = host_machine.system() == 'darwin' + warnings = [ '-Werror=enum-int-mismatch', '-Werror=implicit-function-declaration', @@ -79,9 +82,6 @@ add_project_arguments(cc.get_supported_arguments(flags), language : 'c') prog_python = import('python').find_installation('python3') -with_host_windows = host_machine.system() == 'windows' -with_host_darwin = host_machine.system() == 'darwin' - not_found = dependency('', required: false) gbm_dep = not_found thread_dep = dependency('threads') @@ -362,6 +362,13 @@ with_venus = get_option('venus') with_render_server = with_venus with_render_server_worker = get_option('render-server-worker') render_server_install_dir = get_option('prefix') / get_option('libexecdir') +if with_venus and with_host_darwin + add_languages('objc', required: true) + objcc = meson.get_compiler('objc') + add_project_arguments(objcc.get_supported_arguments(warnings), language : 'objc') + add_project_arguments(objcc.get_supported_arguments(flags), language : 'objc') +endif + if with_venus venus_dep = [] if get_option('vulkan-dload') @@ -380,7 +387,11 @@ if with_venus if with_host_darwin venus_dep += declare_dependency( - compile_args : ['-DVK_USE_PLATFORM_METAL_EXT'], + compile_args : ['-DVK_USE_PLATFORM_METAL_EXT', '-DVK_ENABLE_BETA_EXTENSIONS'], + dependencies : [ + dependency('appleframeworks', modules: ['Metal'], required: true), + dependency('appleframeworks', modules: ['Foundation'], required: true), + ], ) endif @@ -437,6 +448,10 @@ configure_file(input : 'config.h.meson', add_project_arguments('-imacros', meson.current_build_dir() / 'config.h', language : 'c') add_project_arguments('-DHAVE_CONFIG_H=1', language : 'c') +if with_venus and with_host_darwin + add_project_arguments('-imacros', meson.current_build_dir() / 'config.h', language : 'objc') + add_project_arguments('-DHAVE_CONFIG_H=1', language : 'objc') +endif inc_configuration = include_directories(['.', 'src']) diff --git a/server/main.c b/server/main.c index 83f22bc7..3ef5a937 100644 --- a/server/main.c +++ b/server/main.c @@ -23,6 +23,9 @@ * When a worker is a process, * - On Linux, it's a subprocess forked from the server process. It returns * from render_server_main and enters render_context_main. + * - On macOS, the worker is posix_spawn'd with --worker-context-* args. + * render_server_main parses these, skips the server loop, and returns + * with ctx_args.valid set for render_context_main. */ int main(int argc, char **argv) diff --git a/server/render_client.c b/server/render_client.c index 87bab87e..dbb6e0f7 100644 --- a/server/render_client.c +++ b/server/render_client.c @@ -131,7 +131,8 @@ render_client_create_context(struct render_client *client, if (rec->worker) ctx_fd = -1; /* ownership transferred */ #else - rec->worker = render_worker_create(srv->worker_jail, NULL, NULL, 0); + rec->worker = render_worker_create(srv->worker_jail, NULL, + &ctx_args, sizeof(ctx_args)); #endif if (!rec->worker) { render_log("failed to create a context worker"); diff --git a/server/render_worker.c b/server/render_worker.c index 01621cff..b8df351d 100644 --- a/server/render_worker.c +++ b/server/render_worker.c @@ -432,6 +432,83 @@ render_worker_jail_detach_workers(struct render_worker_jail *jail) render_worker_jail_remove_worker(jail, worker); } +#if defined(ENABLE_RENDER_SERVER_WORKER_PROCESS) && defined(__APPLE__) + +#include +#include +#include +#include +#include + +#include "render_context.h" + +/* On macOS, fork() without exec() inherits stale XPC connections + * (Mach port based), breaking Metal shader compilation in worker + * subprocesses. posix_spawn() atomically creates a fresh process + * image without an intermediate forked state. + * + * POSIX_SPAWN_CLOEXEC_DEFAULT closes all fds by default in the child; + * only ctx_fd is explicitly inherited via addinherit_np. + */ + +static pid_t +render_worker_spawn(const struct render_context_args *ctx_args) +{ + /* for devenv without installing server */ + char *const server_path = getenv("RENDER_SERVER_EXEC_PATH"); + const char *exec_path = server_path ? server_path : RENDER_SERVER_EXEC_PATH; + + char fd_str[16]; + char id_str[16]; + char flags_str[16]; + snprintf(fd_str, sizeof(fd_str), "%d", ctx_args->ctx_fd); + snprintf(id_str, sizeof(id_str), "%u", ctx_args->ctx_id); + snprintf(flags_str, sizeof(flags_str), "%u", ctx_args->init_flags); + + char *const argv[] = { + (char *)exec_path, + "--worker-context-fd", + fd_str, + "--worker-context-id", + id_str, + "--worker-context-init-flags", + flags_str, + "--worker-context-name", + (char *)ctx_args->ctx_name, + NULL, + }; + + posix_spawnattr_t attr; + if (posix_spawnattr_init(&attr) != 0) + return -1; + posix_spawnattr_setflags(&attr, POSIX_SPAWN_CLOEXEC_DEFAULT); + + posix_spawn_file_actions_t file_actions; + if (posix_spawn_file_actions_init(&file_actions) != 0) { + posix_spawnattr_destroy(&attr); + return -1; + } + posix_spawn_file_actions_addinherit_np(&file_actions, STDIN_FILENO); + posix_spawn_file_actions_addinherit_np(&file_actions, STDOUT_FILENO); + posix_spawn_file_actions_addinherit_np(&file_actions, STDERR_FILENO); + posix_spawn_file_actions_addinherit_np(&file_actions, ctx_args->ctx_fd); + + pid_t pid; + int ret = posix_spawn(&pid, exec_path, &file_actions, &attr, argv, *_NSGetEnviron()); + + posix_spawn_file_actions_destroy(&file_actions); + posix_spawnattr_destroy(&attr); + + if (ret != 0) { + render_log("posix_spawn failed: %s", strerror(ret)); + return -1; + } + + return pid; +} + +#endif /* ENABLE_RENDER_SERVER_WORKER_PROCESS && __APPLE__ */ + struct render_worker * render_worker_create(struct render_worker_jail *jail, int (*thread_func)(void *thread_data), @@ -451,8 +528,17 @@ render_worker_create(struct render_worker_jail *jail, bool ok; #if defined(ENABLE_RENDER_SERVER_WORKER_PROCESS) +#ifdef __APPLE__ + { + const struct render_context_args *ctx_args = + (const struct render_context_args *)worker->thread_data; + worker->pid = render_worker_spawn(ctx_args); + ok = worker->pid >= 0; + } +#else worker->pid = fork(); ok = worker->pid >= 0; +#endif (void)thread_func; #elif defined(ENABLE_RENDER_SERVER_WORKER_THREAD) ok = thrd_create(&worker->thread, thread_func, worker->thread_data) == thrd_success; diff --git a/src/mesa/util/anon_file.c b/src/mesa/util/anon_file.c index 4f8a5fb1..9963dd00 100644 --- a/src/mesa/util/anon_file.c +++ b/src/mesa/util/anon_file.c @@ -29,12 +29,14 @@ #include "anon_file.h" -#include -#include #include +#include +#include #include +#include -#if defined(HAVE_MEMFD_CREATE) || defined(__FreeBSD__) || defined(__OpenBSD__) +#if defined(HAVE_MEMFD_CREATE) || defined(__FreeBSD__) || \ + defined(__OpenBSD__) || defined(__APPLE__) #include #elif defined(__ANDROID__) #include @@ -43,7 +45,8 @@ #include #endif -#if !(defined(__FreeBSD__) || defined(HAVE_MEMFD_CREATE) || defined(HAVE_MKOSTEMP) || defined(__ANDROID__)) +#if !(defined(__FreeBSD__) || defined(HAVE_MEMFD_CREATE) || \ + defined(HAVE_MKOSTEMP) || defined(__ANDROID__) || defined(__APPLE__)) static int set_cloexec_or_close(int fd) { @@ -67,7 +70,8 @@ set_cloexec_or_close(int fd) } #endif -#if !(defined(__FreeBSD__) || defined(HAVE_MEMFD_CREATE) || defined(__ANDROID__)) +#if !(defined(__FreeBSD__) || defined(HAVE_MEMFD_CREATE) || \ + defined(__ANDROID__) || defined(__APPLE__)) static int create_tmpfile_cloexec(char *tmpname) { @@ -124,6 +128,22 @@ os_create_anonymous_file(off_t size, const char *debug_name) fd = syscall(SYS_memfd_create, debug_name, MFD_CLOEXEC | MFD_ALLOW_SEALING); #elif defined(__FreeBSD__) fd = shm_open(SHM_ANON, O_CREAT | O_RDWR | O_CLOEXEC, 0600); +#elif defined(__APPLE__) + const char *tag = (debug_name && debug_name[0]) ? debug_name : "mesa"; + const unsigned int nonce = arc4random(); + for (unsigned int i = 0; i < 32; i++) { + char shm_name[64]; + snprintf(shm_name, sizeof(shm_name), "/%s-%d-%x-%x", tag, getpid(), nonce, + i); + fd = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0600); + if (fd >= 0) { + shm_unlink(shm_name); + break; + } + + if (errno != EEXIST) + break; + } #elif defined(__OpenBSD__) char template[] = "/tmp/mesa-XXXXXXXXXX"; fd = shm_mkstemp(template); diff --git a/src/meson.build b/src/meson.build index 03b5b12c..c8ce43ed 100644 --- a/src/meson.build +++ b/src/meson.build @@ -205,6 +205,9 @@ if with_venus virgl_sources += venus_sources virgl_sources += venus_codegen virgl_depends += [venus_dep] + if with_host_darwin + virgl_sources += ['venus/vkr_metal_helpers.m'] + endif endif if with_drm_renderers @@ -250,6 +253,7 @@ libvirgl = static_library( virgl_sources, include_directories: [inc_gallium, inc_configuration, 'venus', 'drm'], dependencies : [virgl_depends, drm_uapi_dep], + objc_args : ['-fno-objc-arc'], ) libvirgl_inc = [ diff --git a/src/proxy/proxy_context.c b/src/proxy/proxy_context.c index dc23ef58..2780a540 100644 --- a/src/proxy/proxy_context.c +++ b/src/proxy/proxy_context.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "server/render_protocol.h" @@ -310,10 +311,14 @@ validate_resource_fd_shm(int fd, uint64_t expected_size) } #endif - const uint64_t size = lseek(fd, 0, SEEK_END); - if (size != expected_size) { - proxy_log("failed to validate shm size(%" PRIu64 ") expected(%" PRIu64 ")", size, - expected_size); + struct stat st; + if (fstat(fd, &st) < 0) { + proxy_log("failed to fstat shm fd"); + return false; + } + if ((uint64_t)st.st_size < expected_size) { + proxy_log("shm size(%" PRIu64 ") smaller than expected(%" PRIu64 ")", + (uint64_t)st.st_size, expected_size); return false; } diff --git a/src/venus/venus-protocol/vulkan_beta.h b/src/venus/venus-protocol/vulkan_beta.h new file mode 100644 index 00000000..147a3f3c --- /dev/null +++ b/src/venus/venus-protocol/vulkan_beta.h @@ -0,0 +1,375 @@ +#ifndef VULKAN_BETA_H_ +#define VULKAN_BETA_H_ 1 + +/* +** Copyright 2015-2026 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + + + +// VK_KHR_portability_subset is a preprocessor guard. Do not pass it to API calls. +#define VK_KHR_portability_subset 1 +#define VK_KHR_PORTABILITY_SUBSET_SPEC_VERSION 1 +#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" +typedef struct VkPhysicalDevicePortabilitySubsetFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 constantAlphaColorBlendFactors; + VkBool32 events; + VkBool32 imageViewFormatReinterpretation; + VkBool32 imageViewFormatSwizzle; + VkBool32 imageView2DOn3DImage; + VkBool32 multisampleArrayImage; + VkBool32 mutableComparisonSamplers; + VkBool32 pointPolygons; + VkBool32 samplerMipLodBias; + VkBool32 separateStencilMaskRef; + VkBool32 shaderSampleRateInterpolationFunctions; + VkBool32 tessellationIsolines; + VkBool32 tessellationPointMode; + VkBool32 triangleFans; + VkBool32 vertexAttributeAccessBeyondStride; +} VkPhysicalDevicePortabilitySubsetFeaturesKHR; + +typedef struct VkPhysicalDevicePortabilitySubsetPropertiesKHR { + VkStructureType sType; + void* pNext; + uint32_t minVertexInputBindingStrideAlignment; +} VkPhysicalDevicePortabilitySubsetPropertiesKHR; + + + +// VK_AMDX_shader_enqueue is a preprocessor guard. Do not pass it to API calls. +#define VK_AMDX_shader_enqueue 1 +#define VK_AMDX_SHADER_ENQUEUE_SPEC_VERSION 2 +#define VK_AMDX_SHADER_ENQUEUE_EXTENSION_NAME "VK_AMDX_shader_enqueue" +#define VK_SHADER_INDEX_UNUSED_AMDX (~0U) +typedef struct VkPhysicalDeviceShaderEnqueueFeaturesAMDX { + VkStructureType sType; + void* pNext; + VkBool32 shaderEnqueue; + VkBool32 shaderMeshEnqueue; +} VkPhysicalDeviceShaderEnqueueFeaturesAMDX; + +typedef struct VkPhysicalDeviceShaderEnqueuePropertiesAMDX { + VkStructureType sType; + void* pNext; + uint32_t maxExecutionGraphDepth; + uint32_t maxExecutionGraphShaderOutputNodes; + uint32_t maxExecutionGraphShaderPayloadSize; + uint32_t maxExecutionGraphShaderPayloadCount; + uint32_t executionGraphDispatchAddressAlignment; + uint32_t maxExecutionGraphWorkgroupCount[3]; + uint32_t maxExecutionGraphWorkgroups; +} VkPhysicalDeviceShaderEnqueuePropertiesAMDX; + +typedef struct VkExecutionGraphPipelineScratchSizeAMDX { + VkStructureType sType; + void* pNext; + VkDeviceSize minSize; + VkDeviceSize maxSize; + VkDeviceSize sizeGranularity; +} VkExecutionGraphPipelineScratchSizeAMDX; + +typedef struct VkExecutionGraphPipelineCreateInfoAMDX { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + const VkPipelineLibraryCreateInfoKHR* pLibraryInfo; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkExecutionGraphPipelineCreateInfoAMDX; + +typedef union VkDeviceOrHostAddressConstAMDX { + VkDeviceAddress deviceAddress; + const void* hostAddress; +} VkDeviceOrHostAddressConstAMDX; + +typedef struct VkDispatchGraphInfoAMDX { + uint32_t nodeIndex; + uint32_t payloadCount; + VkDeviceOrHostAddressConstAMDX payloads; + uint64_t payloadStride; +} VkDispatchGraphInfoAMDX; + +typedef struct VkDispatchGraphCountInfoAMDX { + uint32_t count; + VkDeviceOrHostAddressConstAMDX infos; + uint64_t stride; +} VkDispatchGraphCountInfoAMDX; + +typedef struct VkPipelineShaderStageNodeCreateInfoAMDX { + VkStructureType sType; + const void* pNext; + const char* pName; + uint32_t index; +} VkPipelineShaderStageNodeCreateInfoAMDX; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateExecutionGraphPipelinesAMDX)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkExecutionGraphPipelineCreateInfoAMDX* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)(VkDevice device, VkPipeline executionGraph, VkExecutionGraphPipelineScratchSizeAMDX* pSizeInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)(VkDevice device, VkPipeline executionGraph, const VkPipelineShaderStageNodeCreateInfoAMDX* pNodeInfo, uint32_t* pNodeIndex); +typedef void (VKAPI_PTR *PFN_vkCmdInitializeGraphScratchMemoryAMDX)(VkCommandBuffer commandBuffer, VkPipeline executionGraph, VkDeviceAddress scratch, VkDeviceSize scratchSize); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectCountAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, VkDeviceAddress countInfo); + +#ifndef VK_NO_PROTOTYPES +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateExecutionGraphPipelinesAMDX( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkExecutionGraphPipelineCreateInfoAMDX* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetExecutionGraphPipelineScratchSizeAMDX( + VkDevice device, + VkPipeline executionGraph, + VkExecutionGraphPipelineScratchSizeAMDX* pSizeInfo); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetExecutionGraphPipelineNodeIndexAMDX( + VkDevice device, + VkPipeline executionGraph, + const VkPipelineShaderStageNodeCreateInfoAMDX* pNodeInfo, + uint32_t* pNodeIndex); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdInitializeGraphScratchMemoryAMDX( + VkCommandBuffer commandBuffer, + VkPipeline executionGraph, + VkDeviceAddress scratch, + VkDeviceSize scratchSize); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphAMDX( + VkCommandBuffer commandBuffer, + VkDeviceAddress scratch, + VkDeviceSize scratchSize, + const VkDispatchGraphCountInfoAMDX* pCountInfo); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphIndirectAMDX( + VkCommandBuffer commandBuffer, + VkDeviceAddress scratch, + VkDeviceSize scratchSize, + const VkDispatchGraphCountInfoAMDX* pCountInfo); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphIndirectCountAMDX( + VkCommandBuffer commandBuffer, + VkDeviceAddress scratch, + VkDeviceSize scratchSize, + VkDeviceAddress countInfo); +#endif +#endif + + +// VK_NV_cuda_kernel_launch is a preprocessor guard. Do not pass it to API calls. +#define VK_NV_cuda_kernel_launch 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCudaModuleNV) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCudaFunctionNV) +#define VK_NV_CUDA_KERNEL_LAUNCH_SPEC_VERSION 2 +#define VK_NV_CUDA_KERNEL_LAUNCH_EXTENSION_NAME "VK_NV_cuda_kernel_launch" +typedef struct VkCudaModuleCreateInfoNV { + VkStructureType sType; + const void* pNext; + size_t dataSize; + const void* pData; +} VkCudaModuleCreateInfoNV; + +typedef struct VkCudaFunctionCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkCudaModuleNV module; + const char* pName; +} VkCudaFunctionCreateInfoNV; + +typedef struct VkCudaLaunchInfoNV { + VkStructureType sType; + const void* pNext; + VkCudaFunctionNV function; + uint32_t gridDimX; + uint32_t gridDimY; + uint32_t gridDimZ; + uint32_t blockDimX; + uint32_t blockDimY; + uint32_t blockDimZ; + uint32_t sharedMemBytes; + size_t paramCount; + const void* const * pParams; + size_t extraCount; + const void* const * pExtras; +} VkCudaLaunchInfoNV; + +typedef struct VkPhysicalDeviceCudaKernelLaunchFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 cudaKernelLaunchFeatures; +} VkPhysicalDeviceCudaKernelLaunchFeaturesNV; + +typedef struct VkPhysicalDeviceCudaKernelLaunchPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t computeCapabilityMinor; + uint32_t computeCapabilityMajor; +} VkPhysicalDeviceCudaKernelLaunchPropertiesNV; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateCudaModuleNV)(VkDevice device, const VkCudaModuleCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCudaModuleNV* pModule); +typedef VkResult (VKAPI_PTR *PFN_vkGetCudaModuleCacheNV)(VkDevice device, VkCudaModuleNV module, size_t* pCacheSize, void* pCacheData); +typedef VkResult (VKAPI_PTR *PFN_vkCreateCudaFunctionNV)(VkDevice device, const VkCudaFunctionCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCudaFunctionNV* pFunction); +typedef void (VKAPI_PTR *PFN_vkDestroyCudaModuleNV)(VkDevice device, VkCudaModuleNV module, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDestroyCudaFunctionNV)(VkDevice device, VkCudaFunctionNV function, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkCmdCudaLaunchKernelNV)(VkCommandBuffer commandBuffer, const VkCudaLaunchInfoNV* pLaunchInfo); + +#ifndef VK_NO_PROTOTYPES +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateCudaModuleNV( + VkDevice device, + const VkCudaModuleCreateInfoNV* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCudaModuleNV* pModule); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetCudaModuleCacheNV( + VkDevice device, + VkCudaModuleNV module, + size_t* pCacheSize, + void* pCacheData); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateCudaFunctionNV( + VkDevice device, + const VkCudaFunctionCreateInfoNV* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCudaFunctionNV* pFunction); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkDestroyCudaModuleNV( + VkDevice device, + VkCudaModuleNV module, + const VkAllocationCallbacks* pAllocator); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkDestroyCudaFunctionNV( + VkDevice device, + VkCudaFunctionNV function, + const VkAllocationCallbacks* pAllocator); +#endif + +#ifndef VK_ONLY_EXPORTED_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdCudaLaunchKernelNV( + VkCommandBuffer commandBuffer, + const VkCudaLaunchInfoNV* pLaunchInfo); +#endif +#endif + + +// VK_NV_displacement_micromap is a preprocessor guard. Do not pass it to API calls. +#define VK_NV_displacement_micromap 1 +#define VK_NV_DISPLACEMENT_MICROMAP_SPEC_VERSION 2 +#define VK_NV_DISPLACEMENT_MICROMAP_EXTENSION_NAME "VK_NV_displacement_micromap" + +typedef enum VkDisplacementMicromapFormatNV { + VK_DISPLACEMENT_MICROMAP_FORMAT_64_TRIANGLES_64_BYTES_NV = 1, + VK_DISPLACEMENT_MICROMAP_FORMAT_256_TRIANGLES_128_BYTES_NV = 2, + VK_DISPLACEMENT_MICROMAP_FORMAT_1024_TRIANGLES_128_BYTES_NV = 3, + VK_DISPLACEMENT_MICROMAP_FORMAT_MAX_ENUM_NV = 0x7FFFFFFF +} VkDisplacementMicromapFormatNV; +typedef struct VkPhysicalDeviceDisplacementMicromapFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 displacementMicromap; +} VkPhysicalDeviceDisplacementMicromapFeaturesNV; + +typedef struct VkPhysicalDeviceDisplacementMicromapPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t maxDisplacementMicromapSubdivisionLevel; +} VkPhysicalDeviceDisplacementMicromapPropertiesNV; + +typedef struct VkAccelerationStructureTrianglesDisplacementMicromapNV { + VkStructureType sType; + void* pNext; + VkFormat displacementBiasAndScaleFormat; + VkFormat displacementVectorFormat; + VkDeviceOrHostAddressConstKHR displacementBiasAndScaleBuffer; + VkDeviceSize displacementBiasAndScaleStride; + VkDeviceOrHostAddressConstKHR displacementVectorBuffer; + VkDeviceSize displacementVectorStride; + VkDeviceOrHostAddressConstKHR displacedMicromapPrimitiveFlags; + VkDeviceSize displacedMicromapPrimitiveFlagsStride; + VkIndexType indexType; + VkDeviceOrHostAddressConstKHR indexBuffer; + VkDeviceSize indexStride; + uint32_t baseTriangle; + uint32_t usageCountsCount; + const VkMicromapUsageEXT* pUsageCounts; + const VkMicromapUsageEXT* const* ppUsageCounts; + VkMicromapEXT micromap; +} VkAccelerationStructureTrianglesDisplacementMicromapNV; + + + +// VK_AMDX_dense_geometry_format is a preprocessor guard. Do not pass it to API calls. +#define VK_AMDX_dense_geometry_format 1 +#define VK_AMDX_DENSE_GEOMETRY_FORMAT_SPEC_VERSION 1 +#define VK_AMDX_DENSE_GEOMETRY_FORMAT_EXTENSION_NAME "VK_AMDX_dense_geometry_format" +#define VK_COMPRESSED_TRIANGLE_FORMAT_DGF1_BYTE_ALIGNMENT_AMDX 128U +#define VK_COMPRESSED_TRIANGLE_FORMAT_DGF1_BYTE_STRIDE_AMDX 128U + +typedef enum VkCompressedTriangleFormatAMDX { + VK_COMPRESSED_TRIANGLE_FORMAT_DGF1_AMDX = 0, + VK_COMPRESSED_TRIANGLE_FORMAT_MAX_ENUM_AMDX = 0x7FFFFFFF +} VkCompressedTriangleFormatAMDX; +typedef struct VkPhysicalDeviceDenseGeometryFormatFeaturesAMDX { + VkStructureType sType; + void* pNext; + VkBool32 denseGeometryFormat; +} VkPhysicalDeviceDenseGeometryFormatFeaturesAMDX; + +typedef struct VkAccelerationStructureDenseGeometryFormatTrianglesDataAMDX { + VkStructureType sType; + const void* pNext; + VkDeviceOrHostAddressConstKHR compressedData; + VkDeviceSize dataSize; + uint32_t numTriangles; + uint32_t numVertices; + uint32_t maxPrimitiveIndex; + uint32_t maxGeometryIndex; + VkCompressedTriangleFormatAMDX format; +} VkAccelerationStructureDenseGeometryFormatTrianglesDataAMDX; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/venus/vkr_allocator.c b/src/venus/vkr_allocator.c index ed250833..3461e6ce 100644 --- a/src/venus/vkr_allocator.c +++ b/src/venus/vkr_allocator.c @@ -46,6 +46,7 @@ #define VKR_ALLOCATOR_MAX_DEVICE_COUNT 4 struct vkr_inst_proc_table { + PFN_vkEnumerateInstanceExtensionProperties EnumerateInstanceExtensionProperties; PFN_vkCreateInstance CreateInstance; PFN_vkDestroyInstance DestroyInstance; PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices; @@ -187,6 +188,17 @@ vkr_allocator_fini(void) vkr_allocator_initialized = false; } +static void +vkr_allocator_global_proc_table_init(PFN_vkGetInstanceProcAddr get_proc_addr, + struct vkr_inst_proc_table *vk) +{ +#define VN_GIPA(cmd) (PFN_##cmd) get_proc_addr(VK_NULL_HANDLE, #cmd) + vk->EnumerateInstanceExtensionProperties = + VN_GIPA(vkEnumerateInstanceExtensionProperties); + vk->CreateInstance = VN_GIPA(vkCreateInstance); +#undef VN_GIPA +} + static void vkr_allocator_inst_proc_table_init(VkInstance inst_handle, PFN_vkGetInstanceProcAddr get_proc_addr, @@ -231,23 +243,40 @@ vkr_allocator_init(void) /* Get vkGetInstanceProcAddr from libvulkan */ PFN_vkGetInstanceProcAddr get_proc_addr = vkr_allocator.vulkan_library.GetInstanceProcAddr; + vkr_allocator_global_proc_table_init(get_proc_addr, vk); + + const char *inst_ext_names[4]; + uint32_t inst_ext_count = 0; + VkInstanceCreateFlags inst_flags = 0; + +#ifdef __APPLE__ + if (vkr_library_has_portability_enumeration( + vk->EnumerateInstanceExtensionProperties)) { + inst_flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; + inst_ext_names[inst_ext_count++] = VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME; + } +#endif /* __APPLE__ */ VkApplicationInfo app_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .apiVersion = VK_API_VERSION_1_1, }; + assert(inst_ext_count <= ARRAY_SIZE(inst_ext_names)); VkInstanceCreateInfo inst_info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .flags = inst_flags, .pApplicationInfo = &app_info, + .enabledExtensionCount = inst_ext_count, + .ppEnabledExtensionNames = inst_ext_names, }; - vk->CreateInstance = - (PFN_vkCreateInstance)get_proc_addr(VK_NULL_HANDLE, "vkCreateInstance"); - res = vk->CreateInstance(&inst_info, NULL, &vkr_allocator.instance); + VkInstance inst_handle; + res = vk->CreateInstance(&inst_info, NULL, &inst_handle); if (res != VK_SUCCESS) goto fail; + vkr_allocator.instance = inst_handle; vkr_allocator_inst_proc_table_init(vkr_allocator.instance, get_proc_addr, vk); vkr_allocator.device_count = VKR_ALLOCATOR_MAX_DEVICE_COUNT; diff --git a/src/venus/vkr_context.c b/src/venus/vkr_context.c index 2704bfd8..2ea58988 100644 --- a/src/venus/vkr_context.c +++ b/src/venus/vkr_context.c @@ -282,19 +282,25 @@ vkr_context_create_resource_from_shm(struct vkr_context *ctx, { assert(!vkr_context_get_resource(ctx, res_id)); - int fd = os_create_anonymous_file(blob_size, "vkr-shmem"); + /* Round up to host page size. The VMM maps this resource with + * MAP_FIXED which requires page-aligned sizes. + */ + const size_t page_size = getpagesize(); + const uint64_t alloc_size = (blob_size + page_size - 1) & ~(page_size - 1); + + int fd = os_create_anonymous_file(alloc_size, "vkr-shmem"); if (fd < 0) return false; - void *mmap_ptr = mmap(NULL, blob_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + void *mmap_ptr = mmap(NULL, alloc_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); if (mmap_ptr == MAP_FAILED) { close(fd); return false; } - if (!vkr_context_import_resource_internal(ctx, res_id, blob_size, + if (!vkr_context_import_resource_internal(ctx, res_id, alloc_size, VIRGL_RESOURCE_FD_SHM, -1, mmap_ptr)) { - munmap(mmap_ptr, blob_size); + munmap(mmap_ptr, alloc_size); close(fd); return false; } diff --git a/src/venus/vkr_device.c b/src/venus/vkr_device.c index 91441804..3e943bfd 100644 --- a/src/venus/vkr_device.c +++ b/src/venus/vkr_device.c @@ -11,6 +11,7 @@ #include "vkr_context.h" #include "vkr_descriptor_set.h" #include "vkr_device_memory.h" +#include "vkr_metal_helpers.h" #include "vkr_physical_device.h" #include "vkr_queue.h" @@ -130,6 +131,9 @@ vkr_dispatch_vkCreateDevice(struct vn_dispatch_context *dispatch, /* append extensions for our own use */ const char **exts = NULL; uint32_t ext_count = args->pCreateInfo->enabledExtensionCount; + ext_count += physical_dev->EXT_external_memory_metal; + ext_count += physical_dev->EXT_metal_objects; + ext_count += physical_dev->KHR_portability_subset; ext_count += physical_dev->KHR_external_memory_fd; ext_count += physical_dev->EXT_external_memory_dma_buf; ext_count += physical_dev->KHR_external_fence_fd; @@ -139,10 +143,17 @@ vkr_dispatch_vkCreateDevice(struct vn_dispatch_context *dispatch, args->ret = VK_ERROR_OUT_OF_HOST_MEMORY; return; } - for (uint32_t i = 0; i < args->pCreateInfo->enabledExtensionCount; i++) - exts[i] = args->pCreateInfo->ppEnabledExtensionNames[i]; - ext_count = args->pCreateInfo->enabledExtensionCount; + ext_count = 0; + for (uint32_t i = 0; i < args->pCreateInfo->enabledExtensionCount; i++) + exts[ext_count++] = args->pCreateInfo->ppEnabledExtensionNames[i]; + + if (physical_dev->EXT_external_memory_metal) + exts[ext_count++] = "VK_EXT_external_memory_metal"; + if (physical_dev->EXT_metal_objects) + exts[ext_count++] = "VK_EXT_metal_objects"; + if (physical_dev->KHR_portability_subset) + exts[ext_count++] = "VK_KHR_portability_subset"; if (physical_dev->KHR_external_memory_fd) exts[ext_count++] = "VK_KHR_external_memory_fd"; if (physical_dev->EXT_external_memory_dma_buf) @@ -177,6 +188,10 @@ vkr_dispatch_vkCreateDevice(struct vn_dispatch_context *dispatch, args->pCreateInfo->ppEnabledExtensionNames, args->pCreateInfo->enabledExtensionCount); + if (physical_dev->EXT_external_memory_metal) + dev->mtl_device = + vkr_metal_get_device(dev->base.handle.device, vk->GetDeviceProcAddr); + free(exts); args->ret = vkr_device_create_queues(ctx, dev, args->pCreateInfo->queueCreateInfoCount, diff --git a/src/venus/vkr_device.h b/src/venus/vkr_device.h index 189df6f5..f7de555c 100644 --- a/src/venus/vkr_device.h +++ b/src/venus/vkr_device.h @@ -26,6 +26,8 @@ struct vkr_device { mtx_t object_mutex; struct list_head objects; + + void *mtl_device; }; VKR_DEFINE_OBJECT_CAST(device, VK_OBJECT_TYPE_DEVICE, VkDevice) diff --git a/src/venus/vkr_device_memory.c b/src/venus/vkr_device_memory.c index d0f29419..97338b63 100644 --- a/src/venus/vkr_device_memory.c +++ b/src/venus/vkr_device_memory.c @@ -10,6 +10,7 @@ #include "venus-protocol/vn_protocol_renderer_transport.h" #include "vkr_device_memory_gen.h" +#include "vkr_metal_helpers.h" #include "vkr_physical_device.h" static bool @@ -297,6 +298,33 @@ vkr_dispatch_vkAllocateMemory(struct vn_dispatch_context *dispatch, int udmabuf_fd = -1; void *gbm_bo = NULL; VkExportMemoryAllocateInfo local_export_info; + + /* macOS: use shared memory + Metal buffer for HOST_VISIBLE cross-process sharing. */ + struct vkr_mtl_shm *mtl_shm = NULL; +#ifdef __APPLE__ + VkImportMemoryMetalHandleInfoEXT local_metal_import = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_METAL_HANDLE_INFO_EXT, + }; + + if ((property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + physical_dev->EXT_external_memory_metal && !res_info) { + assert(!res_info); + mtl_shm = vkr_mtl_shm_alloc(dev->mtl_device, alloc_info->allocationSize); + if (!mtl_shm) { + args->ret = VK_ERROR_OUT_OF_HOST_MEMORY; + return; + } + + /* Chain Metal import into alloc_info */ + local_metal_import.pNext = alloc_info->pNext; + local_metal_import.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_EXT; + local_metal_import.handle = mtl_shm->mtl_buffer; + alloc_info->pNext = &local_metal_import; + alloc_info->allocationSize = mtl_shm->shm_size; + + valid_fd_types = 1 << VIRGL_RESOURCE_FD_SHM; + } else +#endif /* __APPLE__ */ if ((property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && !res_info) { /* An implementation can support dma_buf import along with opaque fd export/import. * If the client driver is using external memory and requesting dma_buf, without @@ -388,6 +416,7 @@ vkr_dispatch_vkAllocateMemory(struct vn_dispatch_context *dispatch, close(local_import_info.fd); if (gbm_bo) vkr_gbm_bo_destroy(gbm_bo); + vkr_mtl_shm_free(mtl_shm); return; } @@ -397,6 +426,7 @@ vkr_dispatch_vkAllocateMemory(struct vn_dispatch_context *dispatch, mem->valid_fd_types = valid_fd_types; mem->udmabuf_fd = udmabuf_fd; mem->gbm_bo = gbm_bo; + mem->mtl_shm = mtl_shm; mem->allocation_size = alloc_info->allocationSize; mem->memory_type_index = mem_type_index; } @@ -505,6 +535,7 @@ vkr_context_init_device_memory_dispatch(struct vkr_context *ctx) void vkr_device_memory_release(struct vkr_device_memory *mem) { + vkr_mtl_shm_free(mem->mtl_shm); if (mem->gbm_bo) vkr_gbm_bo_destroy(mem->gbm_bo); if (mem->udmabuf_fd >= 0) @@ -542,6 +573,16 @@ vkr_device_memory_export_blob(struct vkr_device_memory *mem, : VIRGL_RENDERER_MAP_CACHE_WC; } + if (mem->mtl_shm && mem->mtl_shm->shm_fd >= 0) { + mem->exported = true; + *out_blob = (struct virgl_context_blob){ + .type = VIRGL_RESOURCE_FD_SHM, + .u.fd = os_dupfd_cloexec(mem->mtl_shm->shm_fd), + .map_info = map_info, + }; + return out_blob->u.fd >= 0; + } + const bool can_export_dma_buf = mem->valid_fd_types & (1 << VIRGL_RESOURCE_FD_DMABUF); const bool can_export_opaque = mem->valid_fd_types & (1 << VIRGL_RESOURCE_FD_OPAQUE); enum virgl_resource_fd_type fd_type; diff --git a/src/venus/vkr_device_memory.h b/src/venus/vkr_device_memory.h index 6e4e12e7..32599664 100644 --- a/src/venus/vkr_device_memory.h +++ b/src/venus/vkr_device_memory.h @@ -9,6 +9,7 @@ #include "vkr_common.h" struct gbm_bo; +struct vkr_mtl_shm; struct vkr_device_memory { struct vkr_object base; @@ -26,6 +27,9 @@ struct vkr_device_memory { /* udmabuf backing non-external mappable memory */ int udmabuf_fd; + /* Metal buffer backed by POSIX shared memory */ + struct vkr_mtl_shm *mtl_shm; + uint64_t allocation_size; uint32_t memory_type_index; diff --git a/src/venus/vkr_instance.c b/src/venus/vkr_instance.c index a5002a35..27bf2dca 100644 --- a/src/venus/vkr_instance.c +++ b/src/venus/vkr_instance.c @@ -177,6 +177,14 @@ vkr_dispatch_vkCreateInstance(struct vn_dispatch_context *dispatch, create_info->pNext = &messenger_create_info; } +#ifdef __APPLE__ + if (vkr_library_has_portability_enumeration( + vk->EnumerateInstanceExtensionProperties)) { + create_info->flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; + ext_names[ext_count++] = VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME; + } +#endif /* __APPLE__ */ + assert(layer_count <= ARRAY_SIZE(layer_names)); create_info->enabledLayerCount = layer_count; create_info->ppEnabledLayerNames = layer_names; diff --git a/src/venus/vkr_library.c b/src/venus/vkr_library.c index 5cd39962..dc1f42c4 100644 --- a/src/venus/vkr_library.c +++ b/src/venus/vkr_library.c @@ -4,10 +4,11 @@ */ #include "vkr_common.h" -#include "vkr_library.h" #include +#include "vkr_library.h" + void vkr_library_preload_icd(void) { @@ -41,9 +42,17 @@ vkr_library_load(struct vulkan_library *lib) if (lib->handle) return true; +#ifdef __APPLE__ + lib->handle = dlopen("libvulkan.dylib", RTLD_NOW | RTLD_LOCAL); + if (lib->handle == NULL) + lib->handle = dlopen("libvulkan.1.dylib", RTLD_NOW | RTLD_LOCAL); + if (lib->handle == NULL) + lib->handle = dlopen("libMoltenVK.dylib", RTLD_NOW | RTLD_LOCAL); +#else lib->handle = dlopen("libvulkan.so.1", RTLD_NOW | RTLD_LOCAL); if (lib->handle == NULL) lib->handle = dlopen("libvulkan.so", RTLD_NOW | RTLD_LOCAL); +#endif if (lib->handle == NULL) { vkr_log("failed to open libvulkan: %s", dlerror()); return false; @@ -89,3 +98,36 @@ vkr_library_unload(struct vulkan_library *lib) } #endif /* ENABLE_VULKAN_DLOAD */ + +bool +vkr_library_has_portability_enumeration( + PFN_vkEnumerateInstanceExtensionProperties enum_inst_ext_props) +{ + uint32_t property_count = 0; + VkExtensionProperties *properties; + bool has_portability_enumeration = false; + + VkResult ret = enum_inst_ext_props(NULL, &property_count, NULL); + if (ret != VK_SUCCESS) + return false; + + properties = calloc(property_count, sizeof(*properties)); + if (!properties) + return false; + + ret = enum_inst_ext_props(NULL, &property_count, properties); + if (ret != VK_SUCCESS) { + free(properties); + return false; + } + + for (uint32_t i = 0; i < property_count; i++) { + if (!strcmp(properties[i].extensionName, + VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME)) { + has_portability_enumeration = true; + break; + } + } + free(properties); + return has_portability_enumeration; +} diff --git a/src/venus/vkr_library.h b/src/venus/vkr_library.h index 718f8918..48e1e09d 100644 --- a/src/venus/vkr_library.h +++ b/src/venus/vkr_library.h @@ -44,4 +44,8 @@ vkr_library_unload(struct vulkan_library *lib) #endif /* ENABLE_VULKAN_DLOAD */ +bool +vkr_library_has_portability_enumeration( + PFN_vkEnumerateInstanceExtensionProperties enum_inst_ext_props); + #endif /* VKR_LIBRARY_H */ diff --git a/src/venus/vkr_metal_helpers.h b/src/venus/vkr_metal_helpers.h new file mode 100644 index 00000000..4f5208bd --- /dev/null +++ b/src/venus/vkr_metal_helpers.h @@ -0,0 +1,72 @@ +/* + * Copyright 2026 Lucas Amaral + * SPDX-License-Identifier: MIT + */ + +#ifndef VKR_METAL_HELPERS_H +#define VKR_METAL_HELPERS_H + +#include + +/* + * Metal shared memory (opaque, allocated/freed in vkr_metal_helpers.m) + */ +struct vkr_mtl_shm { + int shm_fd; + void *shm_ptr; + size_t shm_size; + void *mtl_buffer; +}; + +#ifdef __APPLE__ + +#include + +/* + * Metal helper functions (implemented in vkr_metal_helpers.m) + */ + +/* Return the MTLDevice backing a VkDevice. + * Uses vkExportMetalObjectsEXT to query the device, or NULL on failure. + */ +void * +vkr_metal_get_device(VkDevice vk_device, PFN_vkGetDeviceProcAddr GetDeviceProcAddr); + +/* Allocate Metal shared memory: create anonymous SHM file, mmap it, + * wrap as MTLBuffer. Returns a populated vkr_mtl_shm, or NULL on failure. + * Caller must free with vkr_mtl_shm_free(). + */ +struct vkr_mtl_shm * +vkr_mtl_shm_alloc(void *mtl_device, uint64_t size); + +/* Release all resources held by a vkr_mtl_shm and free the struct. */ +void +vkr_mtl_shm_free(struct vkr_mtl_shm *shm); + +#else /* !__APPLE__ */ + +static inline void * +vkr_metal_get_device(VkDevice vk_device, PFN_vkGetDeviceProcAddr GetDeviceProcAddr) +{ + (void)vk_device; + (void)GetDeviceProcAddr; + return NULL; +} + +static inline struct vkr_mtl_shm * +vkr_mtl_shm_alloc(void *mtl_device, uint64_t size) +{ + (void)mtl_device; + (void)size; + return NULL; +} + +static inline void +vkr_mtl_shm_free(struct vkr_mtl_shm *shm) +{ + (void)shm; +} + +#endif /* __APPLE__ */ + +#endif /* VKR_METAL_HELPERS_H */ diff --git a/src/venus/vkr_metal_helpers.m b/src/venus/vkr_metal_helpers.m new file mode 100644 index 00000000..f0373ed1 --- /dev/null +++ b/src/venus/vkr_metal_helpers.m @@ -0,0 +1,99 @@ +/* + * Copyright 2026 Lucas Amaral + * SPDX-License-Identifier: MIT + */ + +#ifdef __APPLE__ + +#include "vkr_common.h" + +#import +#include +#include +#include + +#include "util/anon_file.h" +#include "venus-protocol/vulkan_metal.h" + +#include "vkr_metal_helpers.h" + +void * +vkr_metal_get_device(VkDevice vk_device, PFN_vkGetDeviceProcAddr GetDeviceProcAddr) +{ + PFN_vkExportMetalObjectsEXT pfn = (PFN_vkExportMetalObjectsEXT)GetDeviceProcAddr( + vk_device, "vkExportMetalObjectsEXT"); + if (!pfn) + return NULL; + + VkExportMetalDeviceInfoEXT device_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_METAL_DEVICE_INFO_EXT, + }; + VkExportMetalObjectsInfoEXT export_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_METAL_OBJECTS_INFO_EXT, + .pNext = &device_info, + }; + pfn(vk_device, &export_info); + return (void *)device_info.mtlDevice; +} + +struct vkr_mtl_shm * +vkr_mtl_shm_alloc(void *mtl_device, uint64_t size) +{ + if (!mtl_device) + return NULL; + + const size_t page_size = getpagesize(); + const size_t aligned_size = (size + page_size - 1) & ~(page_size - 1); + + int shm_fd = os_create_anonymous_file(aligned_size, "vkr-metal-mem"); + if (shm_fd < 0) + return NULL; + + void *shm_ptr = + mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + if (shm_ptr == MAP_FAILED) { + close(shm_fd); + return NULL; + } + + id device = (id)mtl_device; + id buffer = [device newBufferWithBytesNoCopy:shm_ptr + length:aligned_size + options:MTLResourceStorageModeShared + deallocator:nil]; + if (!buffer) { + munmap(shm_ptr, aligned_size); + close(shm_fd); + return NULL; + } + + struct vkr_mtl_shm *shm = calloc(1, sizeof(*shm)); + if (!shm) { + CFRelease(buffer); + munmap(shm_ptr, aligned_size); + close(shm_fd); + return NULL; + } + + shm->shm_fd = shm_fd; + shm->shm_ptr = shm_ptr; + shm->shm_size = aligned_size; + shm->mtl_buffer = (void *)buffer; + return shm; +} + +void +vkr_mtl_shm_free(struct vkr_mtl_shm *shm) +{ + if (!shm) + return; + if (shm->mtl_buffer) + CFRelease(shm->mtl_buffer); + if (shm->shm_ptr) + munmap(shm->shm_ptr, shm->shm_size); + if (shm->shm_fd >= 0) + close(shm->shm_fd); + free(shm); +} + +#endif /* __APPLE__ */ diff --git a/src/venus/vkr_physical_device.c b/src/venus/vkr_physical_device.c index 043d5845..65fdf4c4 100644 --- a/src/venus/vkr_physical_device.c +++ b/src/venus/vkr_physical_device.c @@ -282,6 +282,12 @@ vkr_physical_device_init_extensions(struct vkr_physical_device *physical_dev) physical_dev->EXT_external_memory_dma_buf = true; else if (!strcmp(props->extensionName, "VK_KHR_external_fence_fd")) physical_dev->KHR_external_fence_fd = true; + else if (!strcmp(props->extensionName, "VK_EXT_external_memory_metal")) + physical_dev->EXT_external_memory_metal = true; + else if (!strcmp(props->extensionName, "VK_EXT_metal_objects")) + physical_dev->EXT_metal_objects = true; + else if (!strcmp(props->extensionName, "VK_KHR_portability_subset")) + physical_dev->KHR_portability_subset = true; const uint32_t spec_ver = vkr_extension_get_spec_version(props->extensionName); if (spec_ver) { @@ -305,6 +311,35 @@ vkr_physical_device_init_extensions(struct vkr_physical_device *physical_dev) physical_dev->KHR_external_fence_fd = false; } + /* On macOS, VK_KHR_external_memory_fd is emulated via Metal shared memory. + * MoltenVK doesn't natively support it, but virglrenderer implements + * fd-based memory export using Metal buffers backed by POSIX SHM. + * + * Inject it into the advertised list so the guest Venus driver accepts + * the physical device (it's a hard requirement in vn_physical_device.c). + * The guest never enables it in vkCreateDevice — Mesa's Venus driver + * uses the advertised flag for capability detection only. + * + * Do NOT mark physical_dev->KHR_external_memory_fd = true here — that + * flag tracks native MoltenVK support and gates the host-side + * vkCreateDevice extension list (MoltenVK rejects it with -7). + * + * TODO: Remove after mesa!40478 has had sufficient distro uptake. + */ + if (physical_dev->EXT_external_memory_metal && !physical_dev->KHR_external_memory_fd) { + VkExtensionProperties *new_exts = + realloc(exts, sizeof(*exts) * (advertised_count + 1)); + if (new_exts) { + exts = new_exts; + strcpy(new_exts[advertised_count].extensionName, + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); + new_exts[advertised_count].specVersion = 0; + advertised_count++; + } else { + vkr_log("failed to inject VK_KHR_external_memory_fd"); + } + } + physical_dev->extensions = realloc(exts, sizeof(*exts) * advertised_count); physical_dev->extension_count = advertised_count; } diff --git a/src/venus/vkr_physical_device.h b/src/venus/vkr_physical_device.h index b0b66918..139b11ad 100644 --- a/src/venus/vkr_physical_device.h +++ b/src/venus/vkr_physical_device.h @@ -23,10 +23,14 @@ struct vkr_physical_device { bool KHR_external_memory_fd; bool EXT_external_memory_dma_buf; + bool KHR_portability_subset; bool KHR_external_fence_fd; bool KHR_external_semaphore_fd; + bool EXT_external_memory_metal; + bool EXT_metal_objects; + VkPhysicalDeviceMemoryProperties memory_properties; VkPhysicalDeviceIDProperties id_properties; bool is_dma_buf_fd_export_supported; diff --git a/src/virglrenderer.c b/src/virglrenderer.c index 79717fcc..2e64d82e 100644 --- a/src/virglrenderer.c +++ b/src/virglrenderer.c @@ -1298,13 +1298,14 @@ int virgl_renderer_resource_map_fixed(uint32_t res_handle, void *addr) void *map = NULL; struct virgl_context *ctx = NULL; struct virgl_resource *res = virgl_resource_lookup(res_handle); - enum virgl_resource_fd_type fd_type = res->fd_type; - enum virgl_resource_fd_type export_fd_type = res->fd_type; - int fd = res->fd; if (!res) return -EINVAL; + enum virgl_resource_fd_type fd_type = res->fd_type; + enum virgl_resource_fd_type export_fd_type = res->fd_type; + int fd = res->fd; + if (fd_type == VIRGL_RESOURCE_OPAQUE_HANDLE) { ctx = virgl_context_lookup(res->opaque_handle_context_id); if (!ctx) diff --git a/src/vrend/vrend_renderer.c b/src/vrend/vrend_renderer.c index 4bf88e09..94c75b10 100644 --- a/src/vrend/vrend_renderer.c +++ b/src/vrend/vrend_renderer.c @@ -982,6 +982,13 @@ bool vrend_format_is_bgra(enum virgl_formats format) { format == VIRGL_FORMAT_B8G8R8A8_SRGB); } +static bool +vrend_resource_resource_is_imported(const struct vrend_resource *res) +{ + return has_bit(res->storage_bits, VREND_STORAGE_EGL_IMAGE) || + has_bit(res->storage_bits, VREND_STORAGE_GL_MEMOBJ); +} + static GLuint vrend_resource_get_internal_format_override(const struct vrend_resource *res) { /* Some shared resources imported to guest mesa as EGL images occupy 24bpp instead of more common 32bpp. @@ -1010,25 +1017,17 @@ static GLuint vrend_resource_get_internal_format_override(const struct vrend_res static bool vrend_resource_supports_view(const struct vrend_resource *res, UNUSED enum virgl_formats view_format) { - /* Texture views on eglimage-backed bgr* resources are not supported and - * lead to unexpected format interpretation since internally allocated - * bgr* resources use GL_RGBA8 internal format, while eglimage-backed - * resources use BGRA8, but GL lacks an equivalent internalformat enum. + /* Texture views on imported bgr* resources are not supported and lead to + * unexpected format interpretation since internally allocated bgr* resources + * use GL_RGBA8 internal format, while imported resources use BGRA8, but GL + * lacks an equivalent internalformat enum. * * For views that don't require colorspace conversion, we can add swizzles * instead. For views that do require colorspace conversion, manual srgb * decode/encode is required. */ return !(vrend_format_is_bgra(res->base.format) && - has_bit(res->storage_bits, VREND_STORAGE_EGL_IMAGE)) && - (vrend_resource_get_internal_format_override(res) == GL_NONE); -} - -static inline bool -vrend_resource_needs_redblue_swizzle(struct vrend_resource *res, - enum virgl_formats view_format) -{ - return !vrend_resource_supports_view(res, view_format) && - vrend_format_is_bgra(res->base.format) ^ vrend_format_is_bgra(view_format); + vrend_resource_resource_is_imported(res)) && + (vrend_resource_get_internal_format_override(res) == GL_NONE); } static inline bool @@ -2752,7 +2751,8 @@ int vrend_create_sampler_view(struct vrend_context *ctx, for (enum pipe_swizzle i = 0; i < 4; ++i) view->gl_swizzle[i] = to_gl_swizzle(swizzle[i]); - if (res->is_imported && vrend_format_is_bgra(view->texture->base.format)) { + if (!vrend_resource_supports_view(view->texture, view->format) && + vrend_format_is_bgra(view->format)) { /* Swap R/B channel for vulkan imported texture. */ GLenum tmp = view->gl_swizzle[0]; view->gl_swizzle[0] = view->gl_swizzle[2]; @@ -3130,7 +3130,8 @@ static void vrend_hw_emit_framebuffer_state(struct vrend_sub_context *sub_ctx) * be necessary, e.g. for rgb* views on bgr* resources. Ensure this * happens by adding a shader swizzle to the final write of such surfaces. */ - if (vrend_resource_needs_redblue_swizzle(surf->texture, surf->format)) + if (!vrend_resource_supports_view(surf->texture, surf->format) && + vrend_format_is_bgra(surf->format)) sub_ctx->swizzle_output_rgb_to_bgr |= 1 << i; /* glTextureView() on eglimage-backed bgr* textures for is not supported. @@ -4706,7 +4707,8 @@ static void vrend_clear_prepare(struct vrend_sub_context *sub_ctx, if (surf && vrend_format_is_emulated_alpha(surf->format)) { glClearColor(colorf[3], 0.0, 0.0, 0.0); } else if (surf && - vrend_resource_needs_redblue_swizzle(surf->texture, surf->format)) { + (!vrend_resource_supports_view(surf->texture, surf->format) && + vrend_format_is_bgra(surf->format))) { VREND_DEBUG(dbg_bgra, sub_ctx->parent, "swizzling glClearColor() since rendering surface is an externally-stored BGR* resource\n"); glClearColor(colorf[2], colorf[1], colorf[0], colorf[3]); } else { @@ -7770,6 +7772,9 @@ int vrend_renderer_init(const struct vrend_if_cbs *cbs, uint32_t flags) return 0; cleanup_and_fail: + if (flags & VREND_USE_THREAD_SYNC) + vrend_free_sync_thread(); + vrend_renderer_fini(); fail: return EINVAL; @@ -10829,6 +10834,14 @@ static GLuint vrend_make_view(struct vrend_resource *res, enum virgl_formats for return view_id; } +static inline bool +vrend_blit_resource_needs_redblue_swizzle(struct vrend_resource *res, + enum virgl_formats view_format) +{ + return !vrend_resource_supports_view(res, view_format) && + vrend_format_is_bgra(res->base.format) ^ vrend_format_is_bgra(view_format); +} + static bool vrend_blit_needs_redblue_swizzle(struct vrend_resource *src_res, struct vrend_resource *dst_res, const struct pipe_blit_info *info) @@ -10836,8 +10849,8 @@ static bool vrend_blit_needs_redblue_swizzle(struct vrend_resource *src_res, /* EGL-backed bgr* resources are always stored with BGR* internal format, * despite Virgl's use of the GL_RGBA8 internal format, so special care must * be taken when determining the swizzling. */ - bool src_needs_swizzle = vrend_resource_needs_redblue_swizzle(src_res, info->src.format); - bool dst_needs_swizzle = vrend_resource_needs_redblue_swizzle(dst_res, info->dst.format); + bool src_needs_swizzle = vrend_blit_resource_needs_redblue_swizzle(src_res, info->src.format); + bool dst_needs_swizzle = vrend_blit_resource_needs_redblue_swizzle(dst_res, info->dst.format); return src_needs_swizzle ^ dst_needs_swizzle; } @@ -12967,7 +12980,8 @@ void *vrend_renderer_get_cursor_contents(struct pipe_resource *pres, } for (h = 0; h < res->base.height0; h++) { - uint32_t doff = (res->base.height0 - h - 1) * res->base.width0 * blsize; + uint32_t dh = res->y_0_top ? (res->base.height0 - h - 1) : (h); + uint32_t doff = dh * res->base.width0 * blsize; uint32_t soff = h * res->base.width0 * blsize; memcpy(data2 + doff, data + soff, res->base.width0 * blsize); @@ -13483,7 +13497,6 @@ vrend_renderer_pipe_resource_set_type(struct vrend_context *ctx, glTexParameteri(gr->target, GL_TEXTURE_TILING_EXT, GL_LINEAR_TILING_EXT); glTexStorageMem2DEXT(gr->target, 1, internalformat, width, height, mem_object, 0); glBindTexture(gr->target, 0); - gr->is_imported = true; } res->pipe_resource = &gr->base; } diff --git a/src/vrend/vrend_renderer.h b/src/vrend/vrend_renderer.h index adb1213a..d732e7f1 100644 --- a/src/vrend/vrend_renderer.h +++ b/src/vrend/vrend_renderer.h @@ -114,7 +114,6 @@ struct vrend_resource { uint32_t blob_id; struct list_head head; - bool is_imported; }; #define VIRGL_TEXTURE_NEED_SWIZZLE (1 << 0) diff --git a/src/vrend/vrend_shader.c b/src/vrend/vrend_shader.c index 6a26fdfe..d4e815b6 100644 --- a/src/vrend/vrend_shader.c +++ b/src/vrend/vrend_shader.c @@ -7570,7 +7570,8 @@ static void emit_ios_tcs(const struct dump_ctx *ctx, ctx->separable_program); } else emit_ios_generic(ctx, glsl_strbufs, generic_ios, texcoord_ios, io_out, "", &ctx->outputs[i], "out", "[]"); - } else if (ctx->outputs[i].invariant || ctx->outputs[i].precise) { + } else if ((ctx->outputs[i].invariant || ctx->outputs[i].precise) && + !ctx->outputs[i].glsl_gl_block) { emit_hdrf(glsl_strbufs, "%s%s;\n", ctx->outputs[i].precise ? "precise " : (ctx->outputs[i].invariant ? "invariant " : ""),