Fork of https://github.com/alokprasad/fastspeech_squeezewave to also fix denoising in squeezewave
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
2.5 KiB

  1. import torch
  2. import numpy as np
  3. from scipy.signal import get_window
  4. import librosa.util as librosa_util
  5. def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
  6. n_fft=800, dtype=np.float32, norm=None):
  7. """
  8. # from librosa 0.6
  9. Compute the sum-square envelope of a window function at a given hop length.
  10. This is used to estimate modulation effects induced by windowing
  11. observations in short-time fourier transforms.
  12. Parameters
  13. ----------
  14. window : string, tuple, number, callable, or list-like
  15. Window specification, as in `get_window`
  16. n_frames : int > 0
  17. The number of analysis frames
  18. hop_length : int > 0
  19. The number of samples to advance between frames
  20. win_length : [optional]
  21. The length of the window function. By default, this matches `n_fft`.
  22. n_fft : int > 0
  23. The length of each analysis frame.
  24. dtype : np.dtype
  25. The data type of the output
  26. Returns
  27. -------
  28. wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
  29. The sum-squared envelope of the window function
  30. """
  31. if win_length is None:
  32. win_length = n_fft
  33. n = n_fft + hop_length * (n_frames - 1)
  34. x = np.zeros(n, dtype=dtype)
  35. # Compute the squared window at the desired length
  36. win_sq = get_window(window, win_length, fftbins=True)
  37. win_sq = librosa_util.normalize(win_sq, norm=norm)**2
  38. win_sq = librosa_util.pad_center(win_sq, n_fft)
  39. # Fill the envelope
  40. for i in range(n_frames):
  41. sample = i * hop_length
  42. x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
  43. return x
  44. def griffin_lim(magnitudes, stft_fn, n_iters=30):
  45. """
  46. PARAMS
  47. ------
  48. magnitudes: spectrogram magnitudes
  49. stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
  50. """
  51. angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
  52. angles = angles.astype(np.float32)
  53. angles = torch.autograd.Variable(torch.from_numpy(angles))
  54. signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
  55. for i in range(n_iters):
  56. _, angles = stft_fn.transform(signal)
  57. signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
  58. return signal
  59. def dynamic_range_compression(x, C=1, clip_val=1e-5):
  60. """
  61. PARAMS
  62. ------
  63. C: compression factor
  64. """
  65. return torch.log(torch.clamp(x, min=clip_val) * C)
  66. def dynamic_range_decompression(x, C=1):
  67. """
  68. PARAMS
  69. ------
  70. C: compression factor used to compress
  71. """
  72. return torch.exp(x) / C