import time
|
|
import torch
|
|
import sys
|
|
import subprocess
|
|
|
|
argslist = list(sys.argv)[1:]
|
|
num_gpus = torch.cuda.device_count()
|
|
argslist.append('--n_gpus={}'.format(num_gpus))
|
|
workers = []
|
|
job_id = time.strftime("%Y_%m_%d-%H%M%S")
|
|
argslist.append("--group_name=group_{}".format(job_id))
|
|
|
|
for i in range(num_gpus):
|
|
argslist.append('--rank={}'.format(i))
|
|
stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i),
|
|
"w")
|
|
print(argslist)
|
|
p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout)
|
|
workers.append(p)
|
|
argslist = argslist[:-1]
|
|
|
|
for p in workers:
|
|
p.wait()
|