dan
/
fastspeech_squeezewave

import torchimport numpy as npfrom scipy.signal import get_windowimport librosa.util as librosa_util

def window_sumsquare(window, n_frames, hop_length=200, win_length=800,                     n_fft=800, dtype=np.float32, norm=None):    """
    # from librosa 0.6    Compute the sum-square envelope of a window function at a given hop length.
    This is used to estimate modulation effects induced by windowing    observations in short-time fourier transforms.
    Parameters    ----------    window : string, tuple, number, callable, or list-like        Window specification, as in `get_window`
    n_frames : int > 0        The number of analysis frames
    hop_length : int > 0        The number of samples to advance between frames
    win_length : [optional]        The length of the window function.  By default, this matches `n_fft`.
    n_fft : int > 0        The length of each analysis frame.
    dtype : np.dtype        The data type of the output
    Returns    -------    wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`        The sum-squared envelope of the window function    """
    if win_length is None:        win_length = n_fft
    n = n_fft + hop_length * (n_frames - 1)    x = np.zeros(n, dtype=dtype)
    # Compute the squared window at the desired length    win_sq = get_window(window, win_length, fftbins=True)    win_sq = librosa_util.normalize(win_sq, norm=norm)**2    win_sq = librosa_util.pad_center(win_sq, n_fft)
    # Fill the envelope    for i in range(n_frames):        sample = i * hop_length        x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]    return x

def griffin_lim(magnitudes, stft_fn, n_iters=30):    """
    PARAMS    ------    magnitudes: spectrogram magnitudes    stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods    """

    angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))    angles = angles.astype(np.float32)    angles = torch.autograd.Variable(torch.from_numpy(angles))    signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
    for i in range(n_iters):        _, angles = stft_fn.transform(signal)        signal = stft_fn.inverse(magnitudes, angles).squeeze(1)    return signal

def dynamic_range_compression(x, C=1, clip_val=1e-5):    """
    PARAMS    ------    C: compression factor    """
    return torch.log(torch.clamp(x, min=clip_val) * C)

def dynamic_range_decompression(x, C=1):    """
    PARAMS    ------    C: compression factor used to compress    """
    return torch.exp(x) / C