Fork of https://github.com/alokprasad/fastspeech_squeezewave to also fix denoising in squeezewave
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.6 KiB

  1. import torch
  2. import numpy as np
  3. import shutil
  4. import os
  5. from utils import load_data, get_Tacotron2, get_WaveGlow
  6. from utils import process_text, load_data
  7. from data import ljspeech
  8. import hparams as hp
  9. import waveglow
  10. import audio as Audio
  11. def preprocess_ljspeech(filename):
  12. in_dir = filename
  13. out_dir = hp.mel_ground_truth
  14. if not os.path.exists(out_dir):
  15. os.makedirs(out_dir, exist_ok=True)
  16. metadata = ljspeech.build_from_path(in_dir, out_dir)
  17. write_metadata(metadata, out_dir)
  18. shutil.move(os.path.join(hp.mel_ground_truth, "train.txt"),
  19. os.path.join("data", "train.txt"))
  20. def write_metadata(metadata, out_dir):
  21. with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f:
  22. for m in metadata:
  23. f.write(m + '\n')
  24. def main():
  25. path = os.path.join("data", "LJSpeech-1.1")
  26. preprocess_ljspeech(path)
  27. text_path = os.path.join("data", "train.txt")
  28. texts = process_text(text_path)
  29. if not os.path.exists(hp.alignment_path):
  30. os.mkdir(hp.alignment_path)
  31. else:
  32. return
  33. tacotron2 = get_Tacotron2()
  34. num = 0
  35. for ind, text in enumerate(texts[num:]):
  36. print(ind)
  37. character = text[0:len(text)-1]
  38. mel_gt_name = os.path.join(
  39. hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind+num+1))
  40. mel_gt_target = np.load(mel_gt_name)
  41. _, _, D = load_data(character, mel_gt_target, tacotron2)
  42. np.save(os.path.join(hp.alignment_path, str(
  43. ind+num) + ".npy"), D, allow_pickle=False)
  44. if __name__ == "__main__":
  45. main()