From 967ed41239253e95dd5128291d2931c38808fb48 Mon Sep 17 00:00:00 2001
From: scott <scott@sdgarren.com>
Date: Sun, 5 Apr 2026 20:30:49 -0400
Subject: [PATCH] =?UTF-8?q?Revert=20FP16=20autocast=20=E2=80=94=20increase?=
 =?UTF-8?q?s=20TTFA=20on=20first=20request?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

autocast triggers fp16 kernel selection at first call for each tensor
shape. Since the warmup uses short text, real requests re-trigger
selection and are slower net. Keeping FP32 + conditionals cache.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 engine.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/engine.py b/engine.py
index 55b457b..766ed87 100644
--- a/engine.py
+++ b/engine.py
@@ -116,8 +116,7 @@ def synthesize(
         kwargs["cfg_weight"] = cfg_weight
 
     with torch.inference_mode():
-        with torch.amp.autocast(device_type="cuda", dtype=torch.float16):
-            wav = chatterbox_model.generate(text=text, **kwargs)
+        wav = chatterbox_model.generate(text=text, **kwargs)
 
     if torch.cuda.is_available():
         torch.cuda.synchronize()