dan
/
tacotron2


								{

								 "cells": [

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "## Tacotron 2 inference code \n",

								    "Edit the variables **checkpoint_path** and **text** to match yours and run the entire code to generate plots of mel outputs, alignments and audio synthesis from the generated mel-spectrogram using Griffin-Lim."

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Import libraries and setup matplotlib"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 1,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stderr",

								     "output_type": "stream",

								     "text": [

								      "/home/dcg-adlr-rafaelvalle-source.cosmos597/repos/nvidia/tacotron2/plotting_utils.py:2: UserWarning: matplotlib.pyplot as already been imported, this call will have no effect.\n",

								      "  matplotlib.use(\"Agg\")\n"

								     ]

								    }

								   ],

								   "source": [

								    "import matplotlib\n",

								    "matplotlib.use(\"Agg\")\n",

								    "import matplotlib.pylab as plt\n",

								    "%matplotlib inline\n",

								    "import IPython.display as ipd\n",

								    "\n",

								    "import sys\n",

								    "sys.path.append('waveglow/')\n",

								    "import numpy as np\n",

								    "import torch\n",

								    "\n",

								    "from hparams import create_hparams\n",

								    "from model import Tacotron2\n",

								    "from layers import TacotronSTFT\n",

								    "from audio_processing import griffin_lim\n",

								    "from train import load_model\n",

								    "from text import text_to_sequence\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 2,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "def plot_data(data, figsize=(16, 4)):\n",

								    "    fig, axes = plt.subplots(1, len(data), figsize=figsize)\n",

								    "    for i in range(len(data)):\n",

								    "        axes[i].imshow(data[i], aspect='auto', origin='bottom', \n",

								    "                       interpolation='none')"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Setup hparams"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "hparams = create_hparams(\"distributed_run=False,mask_padding=False\")\n",

								    "hparams.sampling_rate = 22050\n",

								    "hparams.filter_length = 1024\n",

								    "hparams.hop_length = 256\n",

								    "hparams.win_length = 1024"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Load model from checkpoint"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "checkpoint_path = \"tacotron2_statedict\"\n",

								    "\n",

								    "model = load_model(hparams)\n",

								    "try:\n",

								    "    model = model.module\n",

								    "except:\n",

								    "    pass\n",

								    "model.load_state_dict({k.replace('module.',''):v for k,v in torch.load(checkpoint_path)['state_dict'].items()})\n",

								    "_ = model.eval()"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Load WaveGlow for mel2audio synthesis"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "waveglow_path = 'waveglow_old.pt'\n",

								    "waveglow = torch.load(waveglow_path)['model']"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Prepare text input"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "text = \"Waveglow is really awesome!\"\n",

								    "sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]\n",

								    "sequence = torch.autograd.Variable(\n",

								    "    torch.from_numpy(sequence)).cuda().long()"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Decode text input and plot results"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "scrolled": true

								   },

								   "outputs": [],

								   "source": [

								    "mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)\n",

								    "plot_data((mel_outputs.data.cpu().numpy()[0],\n",

								    "           mel_outputs_postnet.data.cpu().numpy()[0],\n",

								    "           alignments.data.cpu().numpy()[0].T))"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "#### Synthesize audio from spectrogram using WaveGlow"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "with torch.no_grad():\n",

								    "    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)\n",

								    "ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)"

								   ]

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.6.6"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 2

								}