Fork of https://github.com/alokprasad/fastspeech_squeezewave to also fix denoising in squeezewave
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

61 lines
1.6 KiB

import torch
import numpy as np
import shutil
import os
from utils import load_data, get_Tacotron2, get_WaveGlow
from utils import process_text, load_data
from data import ljspeech
import hparams as hp
import waveglow
import audio as Audio
def preprocess_ljspeech(filename):
in_dir = filename
out_dir = hp.mel_ground_truth
if not os.path.exists(out_dir):
os.makedirs(out_dir, exist_ok=True)
metadata = ljspeech.build_from_path(in_dir, out_dir)
write_metadata(metadata, out_dir)
shutil.move(os.path.join(hp.mel_ground_truth, "train.txt"),
os.path.join("data", "train.txt"))
def write_metadata(metadata, out_dir):
with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f:
for m in metadata:
f.write(m + '\n')
def main():
path = os.path.join("data", "LJSpeech-1.1")
preprocess_ljspeech(path)
text_path = os.path.join("data", "train.txt")
texts = process_text(text_path)
if not os.path.exists(hp.alignment_path):
os.mkdir(hp.alignment_path)
else:
return
tacotron2 = get_Tacotron2()
num = 0
for ind, text in enumerate(texts[num:]):
print(ind)
character = text[0:len(text)-1]
mel_gt_name = os.path.join(
hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind+num+1))
mel_gt_target = np.load(mel_gt_name)
_, _, D = load_data(character, mel_gt_target, tacotron2)
np.save(os.path.join(hp.alignment_path, str(
ind+num) + ".npy"), D, allow_pickle=False)
if __name__ == "__main__":
main()