diff --git a/engine.py b/engine.py
index dd42a88..1cafcb2 100644
--- a/engine.py
+++ b/engine.py
@@ -45,6 +45,15 @@ def load_model() -> bool:
             _is_turbo = False
 
         _sample_rate = 24000
+
+        # Enable MIOpen algorithm benchmarking. Without this, PyTorch picks
+        # convolution algorithms heuristically and passes ptr=0/size=0 workspace
+        # to MIOpen, forcing a slow fallback on every conv op. With benchmark=True,
+        # PyTorch evaluates algorithms with proper workspace on first use and caches
+        # the best result (persisted via MIOPEN_USER_DB_PATH volume mount).
+        if torch.cuda.is_available():
+            torch.backends.cudnn.benchmark = True
+
         _patch_timing(chatterbox_model)
         logger.info("Model loaded successfully")
         return True