From 52f7eb740bbef9ba87fc74d615f0fd6509110072 Mon Sep 17 00:00:00 2001 From: "Kirill R." Date: Sun, 23 Feb 2025 19:30:25 +0300 Subject: [PATCH] Add Result.text_index to be able to map segments to paragraphs (#111) * Add Result.text_index to be able to map segments to paragraphs * Fix speed re: #105 --- kokoro/pipeline.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kokoro/pipeline.py b/kokoro/pipeline.py index 7eb1a96..d7969a2 100644 --- a/kokoro/pipeline.py +++ b/kokoro/pipeline.py @@ -316,6 +316,7 @@ class KPipeline: phonemes: str tokens: Optional[List[en.MToken]] = None output: Optional[KModel.Output] = None + text_index: Optional[int] = None @property def audio(self) -> Optional[torch.FloatTensor]: @@ -342,7 +343,7 @@ class KPipeline: self, text: Union[str, List[str]], voice: Optional[str] = None, - speed: Number = 1, + speed: float = 1, split_pattern: Optional[str] = r'\n+', model: Optional[KModel] = None ) -> Generator['KPipeline.Result', None, None]: @@ -356,7 +357,7 @@ class KPipeline: text = re.split(split_pattern, text.strip()) if split_pattern else [text] # Process each segment - for graphemes in text: + for graphemes_index, graphemes in enumerate(text): if not graphemes.strip(): # Skip empty segments continue @@ -373,7 +374,7 @@ class KPipeline: output = KPipeline.infer(model, ps, pack, speed) if model else None if output is not None and output.pred_dur is not None: KPipeline.join_timestamps(tks, output.pred_dur) - yield self.Result(graphemes=gs, phonemes=ps, tokens=tks, output=output) + yield self.Result(graphemes=gs, phonemes=ps, tokens=tks, output=output, text_index=graphemes_index) # Non-English processing with chunking else: @@ -419,5 +420,5 @@ class KPipeline: ps = ps[:510] output = KPipeline.infer(model, ps, pack, speed) if model else None - yield self.Result(graphemes=chunk, phonemes=ps, output=output) + yield self.Result(graphemes=chunk, phonemes=ps, output=output, text_index=graphemes_index)