paddlespeech.t2s.datasets.am_batch_fn module

class paddlespeech.t2s.datasets.am_batch_fn.ErnieSATCollateFn(mlm_prob: float = 0.8, mean_phn_span: int = 8, seg_emb: bool = False, text_masking: bool = False)[source]

Bases: object

Functor class of common_collate_fn()

Methods

__call__(exmaples)

Call self as a function.

class paddlespeech.t2s.datasets.am_batch_fn.StarGANv2VCCollateFn(latent_dim: int = 16, max_mel_length: int = 192)[source]

Bases: object

Functor class of common_collate_fn()

Methods

__call__(exmaples)

Call self as a function.

random_clip
starganv2_vc_batch_fn

random_clip(mel: array)[source]

starganv2_vc_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.build_erniesat_collate_fn(mlm_prob: float = 0.8, mean_phn_span: int = 8, seg_emb: bool = False, text_masking: bool = False)[source]

paddlespeech.t2s.datasets.am_batch_fn.build_starganv2_vc_collate_fn(latent_dim: int = 16, max_mel_length: int = 192)[source]

paddlespeech.t2s.datasets.am_batch_fn.diffsinger_multi_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.diffsinger_single_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.erniesat_batch_fn(examples, mlm_prob: float = 0.8, mean_phn_span: int = 8, seg_emb: bool = False, text_masking: bool = False)[source]

paddlespeech.t2s.datasets.am_batch_fn.fastspeech2_multi_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.fastspeech2_multi_spk_batch_fn_static(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.fastspeech2_single_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.fastspeech2_single_spk_batch_fn_static(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.jets_multi_spk_batch_fn(examples)[source]

Returns:

Dict[str, Any]:

text (Tensor): Text index tensor (B, T_text).
text_lengths (Tensor): Text length tensor (B,).
feats (Tensor): Feature tensor (B, T_feats, aux_channels).
feats_lengths (Tensor): Feature length tensor (B,).
durations (Tensor): Feature tensor (B, T_text,).
durations_lengths (Tensor): Durations length tensor (B,).
pitch (Tensor): Feature tensor (B, pitch_length,).
energy (Tensor): Feature tensor (B, energy_length,).
speech (Tensor): Speech waveform tensor (B, T_wav).
spk_id (Optional[Tensor]): Speaker index tensor (B,) or (B, 1).
spk_emb (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim).

paddlespeech.t2s.datasets.am_batch_fn.jets_single_spk_batch_fn(examples)[source]

Returns:

Dict[str, Any]:

text (Tensor): Text index tensor (B, T_text).
text_lengths (Tensor): Text length tensor (B,).
feats (Tensor): Feature tensor (B, T_feats, aux_channels).
feats_lengths (Tensor): Feature length tensor (B,).
durations (Tensor): Feature tensor (B, T_text,).
durations_lengths (Tensor): Durations length tensor (B,).
pitch (Tensor): Feature tensor (B, pitch_length,).
energy (Tensor): Feature tensor (B, energy_length,).
speech (Tensor): Speech waveform tensor (B, T_wav).

paddlespeech.t2s.datasets.am_batch_fn.speedyspeech_multi_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.speedyspeech_multi_spk_batch_fn_static(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.speedyspeech_single_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.speedyspeech_single_spk_batch_fn_static(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.tacotron2_multi_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.tacotron2_single_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.transformer_single_spk_batch_fn(examples)[source]

paddlespeech.t2s.datasets.am_batch_fn.vits_multi_spk_batch_fn(examples)[source]

Returns:

Dict[str, Any]:

text (Tensor): Text index tensor (B, T_text).
text_lengths (Tensor): Text length tensor (B,).
feats (Tensor): Feature tensor (B, T_feats, aux_channels).
feats_lengths (Tensor): Feature length tensor (B,).
speech (Tensor): Speech waveform tensor (B, T_wav).
spk_id (Optional[Tensor]): Speaker index tensor (B,) or (B, 1).
spk_emb (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim).

paddlespeech.t2s.datasets.am_batch_fn.vits_single_spk_batch_fn(examples)[source]

Returns:

Dict[str, Any]:

text (Tensor): Text index tensor (B, T_text).
text_lengths (Tensor): Text length tensor (B,).
feats (Tensor): Feature tensor (B, T_feats, aux_channels).
feats_lengths (Tensor): Feature length tensor (B,).
speech (Tensor): Speech waveform tensor (B, T_wav).