import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
import numpy as np
|
|
import matplotlib
|
|
import matplotlib.pyplot as plt
|
|
import os
|
|
|
|
import tacotron2 as Tacotron2
|
|
import text
|
|
import hparams
|
|
|
|
|
|
def process_text(train_text_path):
|
|
with open(train_text_path, "r", encoding="utf-8") as f:
|
|
txt = []
|
|
for line in f.readlines():
|
|
txt.append(line)
|
|
|
|
return txt
|
|
|
|
|
|
def get_param_num(model):
|
|
num_param = sum(param.numel() for param in model.parameters())
|
|
return num_param
|
|
|
|
|
|
def plot_data(data, figsize=(12, 4)):
|
|
_, axes = plt.subplots(1, len(data), figsize=figsize)
|
|
for i in range(len(data)):
|
|
axes[i].imshow(data[i], aspect='auto',
|
|
origin='bottom', interpolation='none')
|
|
|
|
if not os.path.exists("img"):
|
|
os.mkdir("img")
|
|
plt.savefig(os.path.join("img", "model_test.jpg"))
|
|
|
|
|
|
def get_mask_from_lengths(lengths, max_len=None):
|
|
if max_len == None:
|
|
max_len = torch.max(lengths).item()
|
|
|
|
ids = torch.arange(0, max_len, out=torch.cuda.LongTensor(max_len))
|
|
mask = (ids < lengths.unsqueeze(1)).byte()
|
|
|
|
return mask
|
|
|
|
|
|
def get_WaveGlow():
|
|
waveglow_path = os.path.join("waveglow", "pretrained_model")
|
|
waveglow_path = os.path.join(waveglow_path, "waveglow_256channels.pt")
|
|
wave_glow = torch.load(waveglow_path)['model']
|
|
wave_glow = wave_glow.remove_weightnorm(wave_glow)
|
|
wave_glow.cuda().eval()
|
|
for m in wave_glow.modules():
|
|
if 'Conv' in str(type(m)):
|
|
setattr(m, 'padding_mode', 'zeros')
|
|
|
|
return wave_glow
|
|
|
|
|
|
def get_Tacotron2():
|
|
checkpoint_path = "tacotron2_statedict.pt"
|
|
checkpoint_path = os.path.join(os.path.join(
|
|
"Tacotron2", "pretrained_model"), checkpoint_path)
|
|
|
|
model = Tacotron2.model.Tacotron2(
|
|
Tacotron2.hparams.create_hparams()).cuda()
|
|
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
|
|
_ = model.cuda().eval()
|
|
|
|
return model
|
|
|
|
|
|
def get_D(alignment):
|
|
D = np.array([0 for _ in range(np.shape(alignment)[1])])
|
|
|
|
for i in range(np.shape(alignment)[0]):
|
|
max_index = alignment[i].tolist().index(alignment[i].max())
|
|
D[max_index] = D[max_index] + 1
|
|
|
|
return D
|
|
|
|
|
|
def pad_1D(inputs, PAD=0):
|
|
|
|
def pad_data(x, length, PAD):
|
|
x_padded = np.pad(x, (0, length - x.shape[0]),
|
|
mode='constant',
|
|
constant_values=PAD)
|
|
return x_padded
|
|
|
|
max_len = max((len(x) for x in inputs))
|
|
padded = np.stack([pad_data(x, max_len, PAD) for x in inputs])
|
|
|
|
return padded
|
|
|
|
|
|
def pad_2D(inputs, maxlen=None):
|
|
|
|
def pad(x, max_len):
|
|
PAD = 0
|
|
if np.shape(x)[0] > max_len:
|
|
raise ValueError("not max_len")
|
|
|
|
s = np.shape(x)[1]
|
|
x_padded = np.pad(x, (0, max_len - np.shape(x)[0]),
|
|
mode='constant',
|
|
constant_values=PAD)
|
|
return x_padded[:, :s]
|
|
|
|
if maxlen:
|
|
output = np.stack([pad(x, maxlen) for x in inputs])
|
|
else:
|
|
max_len = max(np.shape(x)[0] for x in inputs)
|
|
output = np.stack([pad(x, max_len) for x in inputs])
|
|
|
|
return output
|
|
|
|
|
|
def pad(input_ele, mel_max_length=None):
|
|
if mel_max_length:
|
|
out_list = list()
|
|
max_len = mel_max_length
|
|
for i, batch in enumerate(input_ele):
|
|
one_batch_padded = F.pad(
|
|
batch, (0, 0, 0, max_len-batch.size(0)), "constant", 0.0)
|
|
out_list.append(one_batch_padded)
|
|
out_padded = torch.stack(out_list)
|
|
return out_padded
|
|
else:
|
|
out_list = list()
|
|
max_len = max([input_ele[i].size(0)for i in range(len(input_ele))])
|
|
|
|
for i, batch in enumerate(input_ele):
|
|
one_batch_padded = F.pad(
|
|
batch, (0, 0, 0, max_len-batch.size(0)), "constant", 0.0)
|
|
out_list.append(one_batch_padded)
|
|
out_padded = torch.stack(out_list)
|
|
return out_padded
|
|
|
|
|
|
def load_data(txt, mel, model):
|
|
character = text.text_to_sequence(txt, hparams.text_cleaners)
|
|
character = torch.from_numpy(np.stack([np.array(character)])).long().cuda()
|
|
|
|
text_length = torch.Tensor([character.size(1)]).long().cuda()
|
|
mel = torch.from_numpy(np.stack([mel.T])).float().cuda()
|
|
max_len = mel.size(2)
|
|
output_length = torch.Tensor([max_len]).long().cuda()
|
|
|
|
inputs = character, text_length, mel, max_len, output_length
|
|
|
|
with torch.no_grad():
|
|
[_, mel_tacotron2, _, alignment], cemb = model.forward(inputs)
|
|
|
|
alignment = alignment[0].cpu().numpy()
|
|
cemb = cemb[0].cpu().numpy()
|
|
|
|
D = get_D(alignment)
|
|
D = np.array(D)
|
|
|
|
mel_tacotron2 = mel_tacotron2[0].cpu().numpy()
|
|
|
|
return mel_tacotron2, cemb, D
|
|
|
|
|
|
def load_data_from_tacotron2(txt, model):
|
|
character = text.text_to_sequence(txt, hparams.text_cleaners)
|
|
character = torch.from_numpy(np.stack([np.array(character)])).long().cuda()
|
|
|
|
with torch.no_grad():
|
|
[_, mel, _, alignment], cemb = model.inference(character)
|
|
|
|
alignment = alignment[0].cpu().numpy()
|
|
cemb = cemb[0].cpu().numpy()
|
|
|
|
D = get_D(alignment)
|
|
D = np.array(D)
|
|
|
|
mel = mel[0].cpu().numpy()
|
|
|
|
return mel, cemb, D
|