Add Result.text_index to be able to map segments to paragraphs (#111)

* Add Result.text_index to be able to map segments to paragraphs

* Fix speed re: #105
This commit is contained in:
Kirill R.
2025-02-23 19:30:25 +03:00
committed by GitHub
parent 2dd9df6779
commit 52f7eb740b

View File

@@ -316,6 +316,7 @@ class KPipeline:
phonemes: str phonemes: str
tokens: Optional[List[en.MToken]] = None tokens: Optional[List[en.MToken]] = None
output: Optional[KModel.Output] = None output: Optional[KModel.Output] = None
text_index: Optional[int] = None
@property @property
def audio(self) -> Optional[torch.FloatTensor]: def audio(self) -> Optional[torch.FloatTensor]:
@@ -342,7 +343,7 @@ class KPipeline:
self, self,
text: Union[str, List[str]], text: Union[str, List[str]],
voice: Optional[str] = None, voice: Optional[str] = None,
speed: Number = 1, speed: float = 1,
split_pattern: Optional[str] = r'\n+', split_pattern: Optional[str] = r'\n+',
model: Optional[KModel] = None model: Optional[KModel] = None
) -> Generator['KPipeline.Result', None, None]: ) -> Generator['KPipeline.Result', None, None]:
@@ -356,7 +357,7 @@ class KPipeline:
text = re.split(split_pattern, text.strip()) if split_pattern else [text] text = re.split(split_pattern, text.strip()) if split_pattern else [text]
# Process each segment # Process each segment
for graphemes in text: for graphemes_index, graphemes in enumerate(text):
if not graphemes.strip(): # Skip empty segments if not graphemes.strip(): # Skip empty segments
continue continue
@@ -373,7 +374,7 @@ class KPipeline:
output = KPipeline.infer(model, ps, pack, speed) if model else None output = KPipeline.infer(model, ps, pack, speed) if model else None
if output is not None and output.pred_dur is not None: if output is not None and output.pred_dur is not None:
KPipeline.join_timestamps(tks, output.pred_dur) KPipeline.join_timestamps(tks, output.pred_dur)
yield self.Result(graphemes=gs, phonemes=ps, tokens=tks, output=output) yield self.Result(graphemes=gs, phonemes=ps, tokens=tks, output=output, text_index=graphemes_index)
# Non-English processing with chunking # Non-English processing with chunking
else: else:
@@ -419,5 +420,5 @@ class KPipeline:
ps = ps[:510] ps = ps[:510]
output = KPipeline.infer(model, ps, pack, speed) if model else None output = KPipeline.infer(model, ps, pack, speed) if model else None
yield self.Result(graphemes=chunk, phonemes=ps, output=output) yield self.Result(graphemes=chunk, phonemes=ps, output=output, text_index=graphemes_index)