From cd33b1c161309733f04a1e5ca3806dfa7d665f18 Mon Sep 17 00:00:00 2001 From: scott Date: Sun, 5 Apr 2026 14:21:32 -0400 Subject: [PATCH] Fix MIOpen MLIR kernel compilation crash during benchmark search Two changes: - ulimits nofile=65536: MIOpen exhaustive search compiles many MLIR kernels in parallel, each opening temp files in /tmp. Default container limit (1024) is too low and ld.lld fails with 'too many open files'. - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0: disables the MLIR-based ImplicitGEMM solvers that generate the failing kernels, leaving Direct/Winograd/GEMM. Co-Authored-By: Claude Sonnet 4.6 --- docker-compose.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cee00aa..50b7492 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,10 @@ services: shm_size: 8g security_opt: - seccomp=unconfined + ulimits: + nofile: + soft: 65536 + hard: 65536 volumes: - ./config.yaml:/app/config.yaml - ./voices:/app/voices @@ -26,11 +30,12 @@ services: - HF_HUB_ENABLE_HF_TRANSFER=1 # Required for RX 6700 XT (gfx1031) - not natively supported in ROCm 7.2. - HSA_OVERRIDE_GFX_VERSION=10.3.0 - # Disable MIOpen's SQLite cache. Without this, cudnn.benchmark triggers an - # exhaustive kernel search and then crashes trying to write results to SQLite. - # PyTorch's own in-memory benchmark cache still works so warmup results are - # reused for all subsequent requests within the same container run. + # Disable MIOpen's SQLite cache — avoids crashes writing benchmark results. + # PyTorch's in-memory benchmark cache still applies within a container run. - MIOPEN_DISABLE_CACHE=1 + # Disable MLIR-based ImplicitGEMM solvers. These compile MLIR kernels on the + # fly and hit 'too many open files' during the exhaustive benchmark search. + - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0 # - HF_TOKEN=your_token_here volumes: