Fork of https://github.com/alokprasad/fastspeech_squeezewave to also fix denoising in squeezewave
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.6 KiB

  1. import torch
  2. import numpy as np
  3. from scipy.signal import get_window
  4. import librosa.util as librosa_util
  5. def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
  6. n_fft=800, dtype=np.float32, norm=None):
  7. """
  8. # from librosa 0.6
  9. Compute the sum-square envelope of a window function at a given hop length.
  10. This is used to estimate modulation effects induced by windowing
  11. observations in short-time fourier transforms.
  12. Parameters
  13. ----------
  14. window : string, tuple, number, callable, or list-like
  15. Window specification, as in `get_window`
  16. n_frames : int > 0
  17. The number of analysis frames
  18. hop_length : int > 0
  19. The number of samples to advance between frames
  20. win_length : [optional]
  21. The length of the window function. By default, this matches `n_fft`.
  22. n_fft : int > 0
  23. The length of each analysis frame.
  24. dtype : np.dtype
  25. The data type of the output
  26. Returns
  27. -------
  28. wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
  29. The sum-squared envelope of the window function
  30. """
  31. if win_length is None:
  32. win_length = n_fft
  33. n = n_fft + hop_length * (n_frames - 1)
  34. x = np.zeros(n, dtype=dtype)
  35. # Compute the squared window at the desired length
  36. win_sq = get_window(window, win_length, fftbins=True)
  37. win_sq = librosa_util.normalize(win_sq, norm=norm)**2
  38. win_sq = librosa_util.pad_center(win_sq, n_fft)
  39. # Fill the envelope
  40. for i in range(n_frames):
  41. sample = i * hop_length
  42. x[sample:min(n, sample + n_fft)
  43. ] += win_sq[:max(0, min(n_fft, n - sample))]
  44. return x
  45. def griffin_lim(magnitudes, stft_fn, n_iters=30):
  46. """
  47. PARAMS
  48. ------
  49. magnitudes: spectrogram magnitudes
  50. stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
  51. """
  52. angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
  53. angles = angles.astype(np.float32)
  54. angles = torch.autograd.Variable(torch.from_numpy(angles))
  55. signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
  56. for i in range(n_iters):
  57. _, angles = stft_fn.transform(signal)
  58. signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
  59. return signal
  60. def dynamic_range_compression(x, C=1, clip_val=1e-5):
  61. """
  62. PARAMS
  63. ------
  64. C: compression factor
  65. """
  66. return torch.log(torch.clamp(x, min=clip_val) * C)
  67. def dynamic_range_decompression(x, C=1):
  68. """
  69. PARAMS
  70. ------
  71. C: compression factor used to compress
  72. """
  73. return torch.exp(x) / C