Browse Source

changes for squeezewave and non cuda

master
alokprasad 4 years ago
parent
commit
ecfe196598
1 changed files with 23 additions and 4 deletions
  1. +23
    -4
      FastSpeech/synthesis.py

+ 23
- 4
FastSpeech/synthesis.py View File

@ -13,6 +13,9 @@ import utils
import audio as Audio import audio as Audio
import glow import glow
import waveglow import waveglow
import time
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@ -21,7 +24,7 @@ def get_FastSpeech(num):
checkpoint_path = "checkpoint_" + str(num) + ".pth.tar" checkpoint_path = "checkpoint_" + str(num) + ".pth.tar"
model = nn.DataParallel(FastSpeech()).to(device) model = nn.DataParallel(FastSpeech()).to(device)
model.load_state_dict(torch.load(os.path.join( model.load_state_dict(torch.load(os.path.join(
hp.checkpoint_path, checkpoint_path))['model'])
hp.checkpoint_path, checkpoint_path),map_location=device)['model'])
model.eval() model.eval()
return model return model
@ -35,11 +38,15 @@ def synthesis(model, text, alpha=1.0):
src_pos = np.stack([src_pos]) src_pos = np.stack([src_pos])
with torch.no_grad(): with torch.no_grad():
sequence = torch.autograd.Variable( sequence = torch.autograd.Variable(
torch.from_numpy(text)).cuda().long()
torch.from_numpy(text)).long()
src_pos = torch.autograd.Variable( src_pos = torch.autograd.Variable(
torch.from_numpy(src_pos)).cuda().long()
torch.from_numpy(src_pos)).long()
mel, mel_postnet = model.module.forward(sequence, src_pos, alpha=alpha) mel, mel_postnet = model.module.forward(sequence, src_pos, alpha=alpha)
#script for generating torch script
#traced_script_module = torch.jit.trace(model,(sequence,src_pos))
#traced_script_module.save("traced_fastspeech_model.pt")
return mel[0].cpu().transpose(0, 1), \ return mel[0].cpu().transpose(0, 1), \
mel_postnet[0].cpu().transpose(0, 1), \ mel_postnet[0].cpu().transpose(0, 1), \
@ -54,11 +61,15 @@ if __name__ == "__main__":
model = get_FastSpeech(num) model = get_FastSpeech(num)
words = "Let’s go out to the airport. The plane landed ten minutes ago." words = "Let’s go out to the airport. The plane landed ten minutes ago."
start = time.time()
mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis( mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(
model, words, alpha=alpha) model, words, alpha=alpha)
if not os.path.exists("results"): if not os.path.exists("results"):
os.mkdir("results") os.mkdir("results")
#do not use any vocoder , mel file generated will be passed to squeezewave vocoder.
"""
Audio.tools.inv_mel_spec(mel_postnet, os.path.join( Audio.tools.inv_mel_spec(mel_postnet, os.path.join(
"results", words + "_" + str(num) + "_griffin_lim.wav")) "results", words + "_" + str(num) + "_griffin_lim.wav"))
@ -70,5 +81,13 @@ if __name__ == "__main__":
mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2) mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2)
waveglow.inference.inference(torch.stack([torch.from_numpy( waveglow.inference.inference(torch.stack([torch.from_numpy(
mel_tac2).cuda()]), wave_glow, os.path.join("results", "tacotron2.wav")) mel_tac2).cuda()]), wave_glow, os.path.join("results", "tacotron2.wav"))
utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2]) utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2])
"""
#melspec = torch.squeeze(mel_postnet_torch, 0)
torch.save(mel_postnet_torch, "../SqueezeWave/mel_spectrograms/test.pt")
end = time.time()
print("MEL Calculation:")
print(end-start)

Loading…
Cancel
Save