|
|
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import numpy as np
- import matplotlib
- import matplotlib.pyplot as plt
- import os
-
- import tacotron2 as Tacotron2
- import text
- import hparams
-
-
- def process_text(train_text_path):
- with open(train_text_path, "r", encoding="utf-8") as f:
- txt = []
- for line in f.readlines():
- txt.append(line)
-
- return txt
-
-
- def get_param_num(model):
- num_param = sum(param.numel() for param in model.parameters())
- return num_param
-
-
- def plot_data(data, figsize=(12, 4)):
- _, axes = plt.subplots(1, len(data), figsize=figsize)
- for i in range(len(data)):
- axes[i].imshow(data[i], aspect='auto',
- origin='bottom', interpolation='none')
-
- if not os.path.exists("img"):
- os.mkdir("img")
- plt.savefig(os.path.join("img", "model_test.jpg"))
-
-
- def get_mask_from_lengths(lengths, max_len=None):
- if max_len == None:
- max_len = torch.max(lengths).item()
-
- ids = torch.arange(0, max_len, out=torch.cuda.LongTensor(max_len))
- mask = (ids < lengths.unsqueeze(1)).byte()
-
- return mask
-
-
- def get_WaveGlow():
- waveglow_path = os.path.join("waveglow", "pretrained_model")
- waveglow_path = os.path.join(waveglow_path, "waveglow_256channels.pt")
- wave_glow = torch.load(waveglow_path)['model']
- wave_glow = wave_glow.remove_weightnorm(wave_glow)
- wave_glow.cuda().eval()
- for m in wave_glow.modules():
- if 'Conv' in str(type(m)):
- setattr(m, 'padding_mode', 'zeros')
-
- return wave_glow
-
-
- def get_Tacotron2():
- checkpoint_path = "tacotron2_statedict.pt"
- checkpoint_path = os.path.join(os.path.join(
- "Tacotron2", "pretrained_model"), checkpoint_path)
-
- model = Tacotron2.model.Tacotron2(
- Tacotron2.hparams.create_hparams()).cuda()
- model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
- _ = model.cuda().eval()
-
- return model
-
-
- def get_D(alignment):
- D = np.array([0 for _ in range(np.shape(alignment)[1])])
-
- for i in range(np.shape(alignment)[0]):
- max_index = alignment[i].tolist().index(alignment[i].max())
- D[max_index] = D[max_index] + 1
-
- return D
-
-
- def pad_1D(inputs, PAD=0):
-
- def pad_data(x, length, PAD):
- x_padded = np.pad(x, (0, length - x.shape[0]),
- mode='constant',
- constant_values=PAD)
- return x_padded
-
- max_len = max((len(x) for x in inputs))
- padded = np.stack([pad_data(x, max_len, PAD) for x in inputs])
-
- return padded
-
-
- def pad_2D(inputs, maxlen=None):
-
- def pad(x, max_len):
- PAD = 0
- if np.shape(x)[0] > max_len:
- raise ValueError("not max_len")
-
- s = np.shape(x)[1]
- x_padded = np.pad(x, (0, max_len - np.shape(x)[0]),
- mode='constant',
- constant_values=PAD)
- return x_padded[:, :s]
-
- if maxlen:
- output = np.stack([pad(x, maxlen) for x in inputs])
- else:
- max_len = max(np.shape(x)[0] for x in inputs)
- output = np.stack([pad(x, max_len) for x in inputs])
-
- return output
-
-
- def pad(input_ele, mel_max_length=None):
- if mel_max_length:
- out_list = list()
- max_len = mel_max_length
- for i, batch in enumerate(input_ele):
- one_batch_padded = F.pad(
- batch, (0, 0, 0, max_len-batch.size(0)), "constant", 0.0)
- out_list.append(one_batch_padded)
- out_padded = torch.stack(out_list)
- return out_padded
- else:
- out_list = list()
- max_len = max([input_ele[i].size(0)for i in range(len(input_ele))])
-
- for i, batch in enumerate(input_ele):
- one_batch_padded = F.pad(
- batch, (0, 0, 0, max_len-batch.size(0)), "constant", 0.0)
- out_list.append(one_batch_padded)
- out_padded = torch.stack(out_list)
- return out_padded
-
-
- def load_data(txt, mel, model):
- character = text.text_to_sequence(txt, hparams.text_cleaners)
- character = torch.from_numpy(np.stack([np.array(character)])).long().cuda()
-
- text_length = torch.Tensor([character.size(1)]).long().cuda()
- mel = torch.from_numpy(np.stack([mel.T])).float().cuda()
- max_len = mel.size(2)
- output_length = torch.Tensor([max_len]).long().cuda()
-
- inputs = character, text_length, mel, max_len, output_length
-
- with torch.no_grad():
- [_, mel_tacotron2, _, alignment], cemb = model.forward(inputs)
-
- alignment = alignment[0].cpu().numpy()
- cemb = cemb[0].cpu().numpy()
-
- D = get_D(alignment)
- D = np.array(D)
-
- mel_tacotron2 = mel_tacotron2[0].cpu().numpy()
-
- return mel_tacotron2, cemb, D
-
-
- def load_data_from_tacotron2(txt, model):
- character = text.text_to_sequence(txt, hparams.text_cleaners)
- character = torch.from_numpy(np.stack([np.array(character)])).long().cuda()
-
- with torch.no_grad():
- [_, mel, _, alignment], cemb = model.inference(character)
-
- alignment = alignment[0].cpu().numpy()
- cemb = cemb[0].cpu().numpy()
-
- D = get_D(alignment)
- D = np.array(D)
-
- mel = mel[0].cpu().numpy()
-
- return mel, cemb, D
|