Browse Source

DDP Multi-GPU --resume bug fix (#1810)

5.0
Glenn Jocher GitHub 3 years ago
parent
commit
7180b22e2d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions
  1. +2
    -1
      train.py

+ 2
- 1
train.py View File

@@ -472,9 +472,10 @@ if __name__ == '__main__':
if opt.resume: # resume an interrupted run
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
apriori = opt.global_rank, opt.local_rank
with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace
opt.cfg, opt.weights, opt.resume = '', ckpt, True
opt.cfg, opt.weights, opt.resume, opt.global_rank, opt.local_rank = '', ckpt, True, *apriori # reinstate
logger.info('Resuming training from %s' % ckpt)
else:
# opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')

Loading…
Cancel
Save