Source code for paddlespeech.vector.io.embedding_norm

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict

import paddle


[docs]class InputNormalization: spk_dict_mean: Dict[int, paddle.Tensor] spk_dict_std: Dict[int, paddle.Tensor] spk_dict_count: Dict[int, int] def __init__( self, mean_norm=True, std_norm=True, norm_type="global", ): """Do feature or embedding mean and std norm Args: mean_norm (bool, optional): mean norm flag. Defaults to True. std_norm (bool, optional): std norm flag. Defaults to True. norm_type (str, optional): norm type. Defaults to "global". """ super().__init__() self.training = True self.mean_norm = mean_norm self.std_norm = std_norm self.norm_type = norm_type self.glob_mean = paddle.to_tensor([0], dtype="float32") self.glob_std = paddle.to_tensor([0], dtype="float32") self.spk_dict_mean = {} self.spk_dict_std = {} self.spk_dict_count = {} self.weight = 1.0 self.count = 0 self.eps = 1e-10 def __call__(self, x, lengths, spk_ids=paddle.to_tensor([], dtype="float32")): """Returns the tensor with the surrounding context. Args: x (paddle.Tensor): A batch of tensors. lengths (paddle.Tensor): A batch of tensors containing the relative length of each sentence (e.g, [0.7, 0.9, 1.0]). It is used to avoid computing stats on zero-padded steps. spk_ids (paddle.Tensor, optional): tensor containing the ids of each speaker (e.g, [0 10 6]). It is used to perform per-speaker normalization when norm_type='speaker'. Defaults to paddle.to_tensor([], dtype="float32"). Returns: paddle.Tensor: The normalized feature or embedding """ N_batches = x.shape[0] current_means = [] current_stds = [] for snt_id in range(N_batches): # Avoiding padded time steps # actual size is the actual time data length actual_size = paddle.round(lengths[snt_id] * x.shape[1]).astype("int32") # computing actual time data statistics # we extract the snt_id embedding from the x # and the target paddle.Tensor will reduce an 0-axis # so we need unsqueeze operation to recover the all axis current_mean, current_std = self._compute_current_stats( x[snt_id, 0:actual_size, ...].unsqueeze(0)) current_means.append(current_mean) current_stds.append(current_std) if self.norm_type == "global": current_mean = paddle.mean(paddle.stack(current_means), axis=0) current_std = paddle.mean(paddle.stack(current_stds), axis=0) if self.norm_type == "global": if self.training: if self.count == 0: self.glob_mean = current_mean self.glob_std = current_std else: self.weight = 1 / (self.count + 1) self.glob_mean = ( 1 - self.weight ) * self.glob_mean + self.weight * current_mean self.glob_std = ( 1 - self.weight ) * self.glob_std + self.weight * current_std self.glob_mean.detach() self.glob_std.detach() self.count = self.count + 1 x = (x - self.glob_mean) / (self.glob_std) return x def _compute_current_stats(self, x): """Returns the tensor with the surrounding context. Args: x (paddle.Tensor): A batch of tensors. Returns: the statistics of the data """ # Compute current mean if self.mean_norm: current_mean = paddle.mean(x, axis=0).detach() else: current_mean = paddle.to_tensor([0.0], dtype="float32") # Compute current std if self.std_norm: current_std = paddle.std(x, axis=0).detach() else: current_std = paddle.to_tensor([1.0], dtype="float32") # Improving numerical stability of std current_std = paddle.maximum(current_std, self.eps * paddle.ones_like(current_std)) return current_mean, current_std def _statistics_dict(self): """Fills the dictionary containing the normalization statistics. """ state = {} state["count"] = self.count state["glob_mean"] = self.glob_mean state["glob_std"] = self.glob_std state["spk_dict_mean"] = self.spk_dict_mean state["spk_dict_std"] = self.spk_dict_std state["spk_dict_count"] = self.spk_dict_count return state def _load_statistics_dict(self, state): """Loads the dictionary containing the statistics. Arguments --------- state : dict A dictionary containing the normalization statistics. """ self.count = state["count"] if isinstance(state["glob_mean"], int): self.glob_mean = state["glob_mean"] self.glob_std = state["glob_std"] else: self.glob_mean = state["glob_mean"] # .to(self.device_inp) self.glob_std = state["glob_std"] # .to(self.device_inp) # Loading the spk_dict_mean in the right device self.spk_dict_mean = {} for spk in state["spk_dict_mean"]: self.spk_dict_mean[spk] = state["spk_dict_mean"][spk] # Loading the spk_dict_std in the right device self.spk_dict_std = {} for spk in state["spk_dict_std"]: self.spk_dict_std[spk] = state["spk_dict_std"][spk] self.spk_dict_count = state["spk_dict_count"] return state
[docs] def to(self, device): """Puts the needed tensors in the right device. """ self = super(InputNormalization, self).to(device) self.glob_mean = self.glob_mean.to(device) self.glob_std = self.glob_std.to(device) for spk in self.spk_dict_mean: self.spk_dict_mean[spk] = self.spk_dict_mean[spk].to(device) self.spk_dict_std[spk] = self.spk_dict_std[spk].to(device) return self
[docs] def save(self, path): """Save statistic dictionary. Args: path (str): A path where to save the dictionary. """ stats = self._statistics_dict() paddle.save(stats, path)
def _load(self, path, end_of_epoch=False, device=None): """Load statistic dictionary. Arguments --------- path : str The path of the statistic dictionary device : str, None Passed to paddle.load(..., map_location=device) """ del end_of_epoch # Unused here. stats = paddle.load(path, map_location=device) self._load_statistics_dict(stats)