Fork of https://github.com/alokprasad/fastspeech_squeezewave to also fix denoising in squeezewave
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

183 lines
5.0 KiB

  1. import torch
  2. import torch.nn as nn
  3. import torch.nn.functional as F
  4. import numpy as np
  5. import matplotlib
  6. import matplotlib.pyplot as plt
  7. import os
  8. import tacotron2 as Tacotron2
  9. import text
  10. import hparams
  11. def process_text(train_text_path):
  12. with open(train_text_path, "r", encoding="utf-8") as f:
  13. txt = []
  14. for line in f.readlines():
  15. txt.append(line)
  16. return txt
  17. def get_param_num(model):
  18. num_param = sum(param.numel() for param in model.parameters())
  19. return num_param
  20. def plot_data(data, figsize=(12, 4)):
  21. _, axes = plt.subplots(1, len(data), figsize=figsize)
  22. for i in range(len(data)):
  23. axes[i].imshow(data[i], aspect='auto',
  24. origin='bottom', interpolation='none')
  25. if not os.path.exists("img"):
  26. os.mkdir("img")
  27. plt.savefig(os.path.join("img", "model_test.jpg"))
  28. def get_mask_from_lengths(lengths, max_len=None):
  29. if max_len == None:
  30. max_len = torch.max(lengths).item()
  31. ids = torch.arange(0, max_len, out=torch.cuda.LongTensor(max_len))
  32. mask = (ids < lengths.unsqueeze(1)).byte()
  33. return mask
  34. def get_WaveGlow():
  35. waveglow_path = os.path.join("waveglow", "pretrained_model")
  36. waveglow_path = os.path.join(waveglow_path, "waveglow_256channels.pt")
  37. wave_glow = torch.load(waveglow_path)['model']
  38. wave_glow = wave_glow.remove_weightnorm(wave_glow)
  39. wave_glow.cuda().eval()
  40. for m in wave_glow.modules():
  41. if 'Conv' in str(type(m)):
  42. setattr(m, 'padding_mode', 'zeros')
  43. return wave_glow
  44. def get_Tacotron2():
  45. checkpoint_path = "tacotron2_statedict.pt"
  46. checkpoint_path = os.path.join(os.path.join(
  47. "Tacotron2", "pretrained_model"), checkpoint_path)
  48. model = Tacotron2.model.Tacotron2(
  49. Tacotron2.hparams.create_hparams()).cuda()
  50. model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
  51. _ = model.cuda().eval()
  52. return model
  53. def get_D(alignment):
  54. D = np.array([0 for _ in range(np.shape(alignment)[1])])
  55. for i in range(np.shape(alignment)[0]):
  56. max_index = alignment[i].tolist().index(alignment[i].max())
  57. D[max_index] = D[max_index] + 1
  58. return D
  59. def pad_1D(inputs, PAD=0):
  60. def pad_data(x, length, PAD):
  61. x_padded = np.pad(x, (0, length - x.shape[0]),
  62. mode='constant',
  63. constant_values=PAD)
  64. return x_padded
  65. max_len = max((len(x) for x in inputs))
  66. padded = np.stack([pad_data(x, max_len, PAD) for x in inputs])
  67. return padded
  68. def pad_2D(inputs, maxlen=None):
  69. def pad(x, max_len):
  70. PAD = 0
  71. if np.shape(x)[0] > max_len:
  72. raise ValueError("not max_len")
  73. s = np.shape(x)[1]
  74. x_padded = np.pad(x, (0, max_len - np.shape(x)[0]),
  75. mode='constant',
  76. constant_values=PAD)
  77. return x_padded[:, :s]
  78. if maxlen:
  79. output = np.stack([pad(x, maxlen) for x in inputs])
  80. else:
  81. max_len = max(np.shape(x)[0] for x in inputs)
  82. output = np.stack([pad(x, max_len) for x in inputs])
  83. return output
  84. def pad(input_ele, mel_max_length=None):
  85. if mel_max_length:
  86. out_list = list()
  87. max_len = mel_max_length
  88. for i, batch in enumerate(input_ele):
  89. one_batch_padded = F.pad(
  90. batch, (0, 0, 0, max_len-batch.size(0)), "constant", 0.0)
  91. out_list.append(one_batch_padded)
  92. out_padded = torch.stack(out_list)
  93. return out_padded
  94. else:
  95. out_list = list()
  96. max_len = max([input_ele[i].size(0)for i in range(len(input_ele))])
  97. for i, batch in enumerate(input_ele):
  98. one_batch_padded = F.pad(
  99. batch, (0, 0, 0, max_len-batch.size(0)), "constant", 0.0)
  100. out_list.append(one_batch_padded)
  101. out_padded = torch.stack(out_list)
  102. return out_padded
  103. def load_data(txt, mel, model):
  104. character = text.text_to_sequence(txt, hparams.text_cleaners)
  105. character = torch.from_numpy(np.stack([np.array(character)])).long().cuda()
  106. text_length = torch.Tensor([character.size(1)]).long().cuda()
  107. mel = torch.from_numpy(np.stack([mel.T])).float().cuda()
  108. max_len = mel.size(2)
  109. output_length = torch.Tensor([max_len]).long().cuda()
  110. inputs = character, text_length, mel, max_len, output_length
  111. with torch.no_grad():
  112. [_, mel_tacotron2, _, alignment], cemb = model.forward(inputs)
  113. alignment = alignment[0].cpu().numpy()
  114. cemb = cemb[0].cpu().numpy()
  115. D = get_D(alignment)
  116. D = np.array(D)
  117. mel_tacotron2 = mel_tacotron2[0].cpu().numpy()
  118. return mel_tacotron2, cemb, D
  119. def load_data_from_tacotron2(txt, model):
  120. character = text.text_to_sequence(txt, hparams.text_cleaners)
  121. character = torch.from_numpy(np.stack([np.array(character)])).long().cuda()
  122. with torch.no_grad():
  123. [_, mel, _, alignment], cemb = model.inference(character)
  124. alignment = alignment[0].cpu().numpy()
  125. cemb = cemb[0].cpu().numpy()
  126. D = get_D(alignment)
  127. D = np.array(D)
  128. mel = mel[0].cpu().numpy()
  129. return mel, cemb, D