Fork of https://github.com/alokprasad/fastspeech_squeezewave to also fix denoising in squeezewave
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
2.6 KiB

  1. # *****************************************************************************
  2. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright
  7. # notice, this list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright
  9. # notice, this list of conditions and the following disclaimer in the
  10. # documentation and/or other materials provided with the distribution.
  11. # * Neither the name of the NVIDIA CORPORATION nor the
  12. # names of its contributors may be used to endorse or promote products
  13. # derived from this software without specific prior written permission.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. # ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  19. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. #
  26. # *****************************************************************************
  27. import os
  28. from scipy.io.wavfile import write
  29. import torch
  30. from waveglow.mel2samp import files_to_list, MAX_WAV_VALUE
  31. # from denoiser import Denoiser
  32. def inference(mel, waveglow, audio_path, sigma=1.0, sampling_rate=22050):
  33. with torch.no_grad():
  34. audio = waveglow.infer(mel, sigma=sigma)
  35. audio = audio * MAX_WAV_VALUE
  36. audio = audio.squeeze()
  37. audio = audio.cpu().numpy()
  38. audio = audio.astype('int16')
  39. write(audio_path, sampling_rate, audio)
  40. def test_speed(mel, waveglow, sigma=1.0, sampling_rate=22050):
  41. with torch.no_grad():
  42. audio = waveglow.infer(mel, sigma=sigma)
  43. audio = audio * MAX_WAV_VALUE
  44. def get_wav(mel, waveglow, sigma=1.0, sampling_rate=22050):
  45. with torch.no_grad():
  46. audio = waveglow.infer(mel, sigma=sigma)
  47. audio = audio * MAX_WAV_VALUE
  48. audio = audio.squeeze()
  49. audio = audio.cpu()
  50. return audio