dan
/
fastspeech_squeezewave

# We retain the copyright notice by NVIDIA from the original code. However, we# we reserve our rights on the modifications based on the original code.## *****************************************************************************#  Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.##  Redistribution and use in source and binary forms, with or without#  modification, are permitted provided that the following conditions are met:#      * Redistributions of source code must retain the above copyright#        notice, this list of conditions and the following disclaimer.#      * Redistributions in binary form must reproduce the above copyright#        notice, this list of conditions and the following disclaimer in the#        documentation and/or other materials provided with the distribution.#      * Neither the name of the NVIDIA CORPORATION nor the#        names of its contributors may be used to endorse or promote products#        derived from this software without specific prior written permission.##  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.## *****************************************************************************import argparseimport jsonimport osimport torch
#=====START: ADDED FOR DISTRIBUTED======from distributed import init_distributed, apply_gradient_allreduce, reduce_tensorfrom torch.utils.data.distributed import DistributedSampler#=====END:   ADDED FOR DISTRIBUTED======
from torch.utils.data import DataLoaderfrom glow import SqueezeWave, SqueezeWaveLossfrom mel2samp import Mel2Samp
def load_checkpoint(    checkpoint_path, model, optimizer, n_flows, n_early_every,    n_early_size, n_mel_channels, n_audio_channel, WN_config):
    assert os.path.isfile(checkpoint_path)        checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')    iteration = checkpoint_dict['iteration']    #iteration = 1    optimizer.load_state_dict(checkpoint_dict['optimizer'])    model_for_loading = checkpoint_dict['model']    state_dict = model_for_loading.state_dict()
    model.load_state_dict(state_dict, strict = False)    print("Loaded checkpoint '{}' (iteration {})" .format(checkpoint_path, iteration))                                                 return model, optimizer, iteration
def save_checkpoint(model, optimizer, learning_rate, iteration, filepath):    print("Saving model and optimizer state at iteration {} to {}".format(          iteration, filepath))    model_for_saving = SqueezeWave(**squeezewave_config).cuda()    model_for_saving.load_state_dict(model.state_dict())    torch.save({'model': model_for_saving,                'iteration': iteration,                'optimizer': optimizer.state_dict(),                'learning_rate': learning_rate}, filepath)
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,          checkpoint_path, with_tensorboard):    torch.manual_seed(seed)    torch.cuda.manual_seed(seed)    #=====START: ADDED FOR DISTRIBUTED======    if num_gpus > 1:        init_distributed(rank, num_gpus, group_name, **dist_config)    #=====END:   ADDED FOR DISTRIBUTED======
    criterion = SqueezeWaveLoss(sigma)    model = SqueezeWave(**squeezewave_config).cuda()    print(model)    pytorch_total_params = sum(p.numel() for p in model.parameters())    pytorch_total_params_train = sum(p.numel() for p in model.parameters() if p.requires_grad)    print("param", pytorch_total_params)    print("param trainable", pytorch_total_params_train)
    #=====START: ADDED FOR DISTRIBUTED======    if num_gpus > 1:        model = apply_gradient_allreduce(model)    #=====END:   ADDED FOR DISTRIBUTED======
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    if fp16_run:        from apex import amp        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    # Load checkpoint if one exists    iteration = 0     if checkpoint_path != "":        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,                                                      optimizer, **squeezewave_config)        iteration += 1  # next iteration is iteration + 1
    n_audio_channel =  squeezewave_config["n_audio_channel"]    trainset = Mel2Samp(n_audio_channel, **data_config)    # =====START: ADDED FOR DISTRIBUTED======    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None    # =====END:   ADDED FOR DISTRIBUTED======    train_loader = DataLoader(trainset, num_workers=0, shuffle=False,                              sampler=train_sampler,                              batch_size=batch_size,                              pin_memory=False,                              drop_last=True)
    # Get shared output_directory ready    if rank == 0:        if not os.path.isdir(output_directory):            os.makedirs(output_directory)            os.chmod(output_directory, 0o775)        print("output directory", output_directory)
    if with_tensorboard and rank == 0:        from tensorboardX import SummaryWriter        logger = SummaryWriter(os.path.join(output_directory, 'logs'))
    model.train()    epoch_offset = max(0, int(iteration / len(train_loader)))    # ================ MAIN TRAINNIG LOOP! ===================    for epoch in range(epoch_offset, epochs):        print("Epoch: {}".format(epoch))        for i, batch in enumerate(train_loader):            model.zero_grad()
            mel, audio = batch            mel = torch.autograd.Variable(mel.cuda())            audio = torch.autograd.Variable(audio.cuda())            outputs = model((mel, audio))
            loss = criterion(outputs)            if num_gpus > 1:                reduced_loss = reduce_tensor(loss.data, num_gpus).item()            else:                reduced_loss = loss.item()
            if fp16_run:                with amp.scale_loss(loss, optimizer) as scaled_loss:                    scaled_loss.backward()            else:                loss.backward()
            optimizer.step()
            print("{}:\t{:.9f}\t".format(iteration, reduced_loss))            if with_tensorboard and rank == 0:                logger.add_scalar('training_loss', reduced_loss, i + len(train_loader) * epoch)            if (iteration % iters_per_checkpoint == 0):                if rank == 0:                    checkpoint_path = "{}/SqueezeWave_{}".format(                        output_directory, iteration)                    save_checkpoint(model, optimizer, learning_rate, iteration,                                    checkpoint_path)
            iteration += 1
if __name__ == "__main__":    parser = argparse.ArgumentParser()    parser.add_argument('-c', '--config', type=str,                        help='JSON file for configuration')    parser.add_argument('-r', '--rank', type=int, default=0,                        help='rank of process for distributed')    parser.add_argument('-g', '--group_name', type=str, default='',                        help='name of group for distributed')    args = parser.parse_args()
    # Parse configs.  Globals nicer in this case    with open(args.config) as f:        data = f.read()    config = json.loads(data)    train_config = config["train_config"]    global data_config    data_config = config["data_config"]    global dist_config    dist_config = config["dist_config"]    global squeezewave_config    squeezewave_config = config["squeezewave_config"]
    num_gpus = torch.cuda.device_count()    if num_gpus > 1:        if args.group_name == '':            print("WARNING: Multiple GPUs detected but no distributed group set")            print("Only running 1 GPU.  Use distributed.py for multiple GPUs")            num_gpus = 1
    if num_gpus == 1 and args.rank != 0:        raise Exception("Doing single GPU training on rank > 0")
    torch.backends.cudnn.enabled = True    torch.backends.cudnn.benchmark = False    train(num_gpus, args.rank, args.group_name, **train_config)