From f20699aed3ad7c54541bbb2e8afd9316d762437b Mon Sep 17 00:00:00 2001 From: scott Date: Sun, 5 Apr 2026 13:34:21 -0400 Subject: [PATCH] Add fp16 autocast to synthesis for faster GPU throughput The 6700 XT has significantly higher fp16 throughput than fp32. autocast("cuda") uses fp16 for matmuls and convolutions (HiFiGAN, S3 tokenizer, flow matching) while keeping fp32 for precision-sensitive ops like softmax and layer norm. Co-Authored-By: Claude Sonnet 4.6 --- engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine.py b/engine.py index 1cafcb2..b1f6cbb 100644 --- a/engine.py +++ b/engine.py @@ -122,7 +122,7 @@ def synthesize( kwargs["exaggeration"] = exaggeration kwargs["cfg_weight"] = cfg_weight - with torch.inference_mode(): + with torch.inference_mode(), torch.autocast("cuda", dtype=torch.float16): wav = chatterbox_model.generate(text=text, **kwargs) if torch.cuda.is_available():