import torch import numpy as np import shutil import os from utils import load_data, get_Tacotron2, get_WaveGlow from utils import process_text, load_data from data import ljspeech import hparams as hp import waveglow import audio as Audio def preprocess_ljspeech(filename): in_dir = filename out_dir = hp.mel_ground_truth if not os.path.exists(out_dir): os.makedirs(out_dir, exist_ok=True) metadata = ljspeech.build_from_path(in_dir, out_dir) write_metadata(metadata, out_dir) shutil.move(os.path.join(hp.mel_ground_truth, "train.txt"), os.path.join("data", "train.txt")) def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write(m + '\n') def main(): path = os.path.join("data", "LJSpeech-1.1") preprocess_ljspeech(path) text_path = os.path.join("data", "train.txt") texts = process_text(text_path) if not os.path.exists(hp.alignment_path): os.mkdir(hp.alignment_path) else: return tacotron2 = get_Tacotron2() num = 0 for ind, text in enumerate(texts[num:]): print(ind) character = text[0:len(text)-1] mel_gt_name = os.path.join( hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind+num+1)) mel_gt_target = np.load(mel_gt_name) _, _, D = load_data(character, mel_gt_target, tacotron2) np.save(os.path.join(hp.alignment_path, str( ind+num) + ".npy"), D, allow_pickle=False) if __name__ == "__main__": main()