dan
/
tia-tts


								import os

								from os.path import exists, join, basename, splitext


								git_repo_url = 'https://github.com/NVIDIA/tacotron2.git'

								project_name = splitext(basename(git_repo_url))[0]

								git_repo_url2 = 'https://github.com/alokprasad/fastspeech_squeezewave.git'

								project_name2 = splitext(basename(git_repo_url2))[0]


								import sys

								sys.path.append(join(project_name2, "SqueezeWave/"))

								sys.path.append(project_name)

								import numpy as np

								import torch


								from hparams import create_hparams

								from model import Tacotron2

								from text import text_to_sequence

								from denoiser import Denoiser

								from glow import SqueezeWave

								import librosa

								import json


								thisdict = {}

								for line in reversed((open('merged.dict_1.1.txt', "r").read()).splitlines()):

								    thisdict[(line.split(" ",1))[0]] = (line.split(" ",1))[1].strip()

								def ARPA(text):

								    out = ''

								    for word_ in text.split(" "):

								        word=word_; end_chars = ''

								        while any(elem in word for elem in r"!?,.;") and len(word) > 1:

								            if word[-1] == '!': end_chars = '!' + end_chars; word = word[:-1]

								            if word[-1] == '?': end_chars = '?' + end_chars; word = word[:-1]

								            if word[-1] == ',': end_chars = ',' + end_chars; word = word[:-1]

								            if word[-1] == '.': end_chars = '.' + end_chars; word = word[:-1]

								            if word[-1] == ';': end_chars = ';' + end_chars; word = word[:-1]

								            else: break

								        try: word_arpa = thisdict[word.upper()]

								        except: word_arpa = ''

								        if len(word_arpa)!=0: word = "{" + str(word_arpa) + "}"

								        out = (out + " " + word + end_chars).strip()

								    if out[-1] != ";": out = out + ";"

								    return out


								#torch.set_grad_enabled(False)


								# initialize Tacotron2 with the pretrained model

								hparams = create_hparams()


								tacotron2_pretrained_model = 'tacotron.pt'

								# Setup Parameters

								hparams = create_hparams()

								hparams.sampling_rate = 22050

								hparams.max_decoder_steps = 3000 # how many steps before cutting off generation, too many and you may get CUDA errors.

								hparams.gate_threshold = 0.30 # Model must be 30% sure the clip is over before ending generation

								# Load Tacotron2 model into GPU

								model = Tacotron2(hparams)

								model.load_state_dict(torch.load(tacotron2_pretrained_model, map_location=torch.device('cpu'))['state_dict'])

								_ = model.eval()

								print("This Tacotron model has been trained for ",torch.load(tacotron2_pretrained_model, map_location=torch.device('cpu'))['iteration']," Iterations.")


								# Load WaveGlow model into GPU

								waveglow_pretrained_model = 'squeezewave_dict.pt'

								with open(join(project_name2, 'SqueezeWave/configs/config_a128_c256.json')) as f:

								    data = f.read()

								config = json.loads(data)

								waveglow = SqueezeWave(**config['squeezewave_config'])

								waveglow.load_state_dict(torch.load(waveglow_pretrained_model), strict=False)

								waveglow = waveglow.remove_weightnorm(waveglow)

								waveglow.eval()

								for k in waveglow.convinv:

								    k.float()

								denoiser = Denoiser(waveglow)

								print("SqueezeWave model loaded")


								import time


								# All right, I've been thinking. , When life gives you lemons? , Don't make lemonade. , Make life take the lemons back! , Get mad! , 'I don't want your damn lemons! What am I supposed to do with these?' , Demand to see life's manager! , Make life rue the day it thought it could give Cave Johnson lemons! , Do you know who I am? , I'm the man who's going to burn your house down! , With the lemons! , I'm going to get my engineers to invent a combustible lemon that burns your house down!

								text = """

								Peter Piper picked a peck of pickled peppers, A peck of pickled peppers Peter Piper picked; If Peter Piper picked a peck of pickled peppers, where’s the peck of pickled peppers Peter Piper picked?

								She sells sea shells by the seashore, The shells she sells are sea shells, I’m sure. So if she sells sea shells on the seashore, Then I’m sure she sells seashore shells.

								"""

								sigma = 0.75

								denoise_strength = 0.01

								raw_input = False # disables automatic ARPAbet conversion, useful for inputting your own ARPAbet pronounciations or just for testing


								counter = 0

								for i in text.split("\n"):

								    start_time = time.time()

								    if len(i) < 1: continue;

								    print(i)

								    if raw_input:

								        if i[-1] != ";": i=i+";"

								    else: i = ARPA(i)

								    print(i)

								    with torch.no_grad(): # save VRAM by not including gradients

								        sequence = np.array(text_to_sequence(i, ['english_cleaners']))[None, :]

								        sequence = torch.autograd.Variable(torch.from_numpy(sequence)).long()

								        mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)

								        audio = waveglow.infer(mel_outputs_postnet, sigma=sigma); print("");

								        audio_denoised = denoiser(audio, strength=denoise_strength)[:, 0]; print("Denoised");

								        # librosa.output.write_wav('Inf_' + str(counter) + '.wav', np.swapaxes(audio.cpu().numpy(),0,1), hparams.sampling_rate)

								        librosa.output.write_wav('Inf_' + str(counter) + '_denoised.wav', np.swapaxes(audio_denoised.cpu().numpy(),0,1), hparams.sampling_rate)

								        counter += 1

								    print("--- %s seconds ---" % (time.time() - start_time))