Browse Source

dos2unix

master
alokprasad 4 years ago
parent
commit
219e9e9c70
1 changed files with 74 additions and 74 deletions
  1. +74
    -74
      FastSpeech/synthesis.py

+ 74
- 74
FastSpeech/synthesis.py View File

@ -1,74 +1,74 @@
import torch
import torch.nn as nn
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import time
import os
from fastspeech import FastSpeech
from text import text_to_sequence
import hparams as hp
import utils
import audio as Audio
import glow
import waveglow
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_FastSpeech(num):
checkpoint_path = "checkpoint_" + str(num) + ".pth.tar"
model = nn.DataParallel(FastSpeech()).to(device)
model.load_state_dict(torch.load(os.path.join(
hp.checkpoint_path, checkpoint_path))['model'])
model.eval()
return model
def synthesis(model, text, alpha=1.0):
text = np.array(text_to_sequence(text, hp.text_cleaners))
text = np.stack([text])
src_pos = np.array([i+1 for i in range(text.shape[1])])
src_pos = np.stack([src_pos])
with torch.no_grad():
sequence = torch.autograd.Variable(
torch.from_numpy(text)).cuda().long()
src_pos = torch.autograd.Variable(
torch.from_numpy(src_pos)).cuda().long()
mel, mel_postnet = model.module.forward(sequence, src_pos, alpha=alpha)
return mel[0].cpu().transpose(0, 1), \
mel_postnet[0].cpu().transpose(0, 1), \
mel.transpose(1, 2), \
mel_postnet.transpose(1, 2)
if __name__ == "__main__":
# Test
num = 112000
alpha = 1.0
model = get_FastSpeech(num)
words = "Let’s go out to the airport. The plane landed ten minutes ago."
mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(
model, words, alpha=alpha)
if not os.path.exists("results"):
os.mkdir("results")
Audio.tools.inv_mel_spec(mel_postnet, os.path.join(
"results", words + "_" + str(num) + "_griffin_lim.wav"))
wave_glow = utils.get_WaveGlow()
waveglow.inference.inference(mel_postnet_torch, wave_glow, os.path.join(
"results", words + "_" + str(num) + "_waveglow.wav"))
tacotron2 = utils.get_Tacotron2()
mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2)
waveglow.inference.inference(torch.stack([torch.from_numpy(
mel_tac2).cuda()]), wave_glow, os.path.join("results", "tacotron2.wav"))
utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2])
import torch
import torch.nn as nn
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import time
import os
from fastspeech import FastSpeech
from text import text_to_sequence
import hparams as hp
import utils
import audio as Audio
import glow
import waveglow
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_FastSpeech(num):
checkpoint_path = "checkpoint_" + str(num) + ".pth.tar"
model = nn.DataParallel(FastSpeech()).to(device)
model.load_state_dict(torch.load(os.path.join(
hp.checkpoint_path, checkpoint_path))['model'])
model.eval()
return model
def synthesis(model, text, alpha=1.0):
text = np.array(text_to_sequence(text, hp.text_cleaners))
text = np.stack([text])
src_pos = np.array([i+1 for i in range(text.shape[1])])
src_pos = np.stack([src_pos])
with torch.no_grad():
sequence = torch.autograd.Variable(
torch.from_numpy(text)).cuda().long()
src_pos = torch.autograd.Variable(
torch.from_numpy(src_pos)).cuda().long()
mel, mel_postnet = model.module.forward(sequence, src_pos, alpha=alpha)
return mel[0].cpu().transpose(0, 1), \
mel_postnet[0].cpu().transpose(0, 1), \
mel.transpose(1, 2), \
mel_postnet.transpose(1, 2)
if __name__ == "__main__":
# Test
num = 112000
alpha = 1.0
model = get_FastSpeech(num)
words = "Let’s go out to the airport. The plane landed ten minutes ago."
mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(
model, words, alpha=alpha)
if not os.path.exists("results"):
os.mkdir("results")
Audio.tools.inv_mel_spec(mel_postnet, os.path.join(
"results", words + "_" + str(num) + "_griffin_lim.wav"))
wave_glow = utils.get_WaveGlow()
waveglow.inference.inference(mel_postnet_torch, wave_glow, os.path.join(
"results", words + "_" + str(num) + "_waveglow.wav"))
tacotron2 = utils.get_Tacotron2()
mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2)
waveglow.inference.inference(torch.stack([torch.from_numpy(
mel_tac2).cuda()]), wave_glow, os.path.join("results", "tacotron2.wav"))
utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2])

Loading…
Cancel
Save