|
|
@@ -34,6 +34,12 @@ from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_di |
|
|
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
try: |
|
|
|
import wandb |
|
|
|
except ImportError: |
|
|
|
wandb = None |
|
|
|
logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)") |
|
|
|
|
|
|
|
|
|
|
|
def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
logger.info(f'Hyperparameters {hyp}') |
|
|
@@ -54,6 +60,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
yaml.dump(vars(opt), f, sort_keys=False) |
|
|
|
|
|
|
|
# Configure |
|
|
|
plots = not opt.evolve # create plots |
|
|
|
cuda = device.type != 'cpu' |
|
|
|
init_seeds(2 + rank) |
|
|
|
with open(opt.data) as f: |
|
|
@@ -122,6 +129,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
|
|
|
|
# Logging |
|
|
|
if wandb and wandb.run is None: |
|
|
|
opt.hyp = hyp # add hyperparameters |
|
|
|
wandb_run = wandb.init(config=opt, resume="allow", |
|
|
|
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, |
|
|
|
name=save_dir.stem, |
|
|
@@ -164,7 +172,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) |
|
|
|
logger.info('Using SyncBatchNorm()') |
|
|
|
|
|
|
|
# Exponential moving average |
|
|
|
# EMA |
|
|
|
ema = ModelEMA(model) if rank in [-1, 0] else None |
|
|
|
|
|
|
|
# DDP mode |
|
|
@@ -191,10 +199,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
c = torch.tensor(labels[:, 0]) # classes |
|
|
|
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency |
|
|
|
# model._initialize_biases(cf.to(device)) |
|
|
|
plot_labels(labels, save_dir=save_dir) |
|
|
|
if tb_writer: |
|
|
|
# tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 |
|
|
|
tb_writer.add_histogram('classes', c, 0) |
|
|
|
if plots: |
|
|
|
plot_labels(labels, save_dir=save_dir) |
|
|
|
if tb_writer: |
|
|
|
tb_writer.add_histogram('classes', c, 0) |
|
|
|
if wandb: |
|
|
|
wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.png')]}) |
|
|
|
|
|
|
|
# Anchors |
|
|
|
if not opt.noautoanchor: |
|
|
@@ -298,14 +308,17 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
pbar.set_description(s) |
|
|
|
|
|
|
|
# Plot |
|
|
|
if ni < 3: |
|
|
|
f = str(save_dir / f'train_batch{ni}.jpg') # filename |
|
|
|
result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) |
|
|
|
# if tb_writer and result is not None: |
|
|
|
# tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) |
|
|
|
# tb_writer.add_graph(model, imgs) # add model to tensorboard |
|
|
|
if plots and ni < 3: |
|
|
|
f = save_dir / f'train_batch{ni}.jpg' # filename |
|
|
|
plot_images(images=imgs, targets=targets, paths=paths, fname=f) |
|
|
|
# if tb_writer: |
|
|
|
# tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) |
|
|
|
# tb_writer.add_graph(model, imgs) # add model to tensorboard |
|
|
|
elif plots and ni == 3 and wandb: |
|
|
|
wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]}) |
|
|
|
|
|
|
|
# end batch ------------------------------------------------------------------------------------------------ |
|
|
|
# end epoch ---------------------------------------------------------------------------------------------------- |
|
|
|
|
|
|
|
# Scheduler |
|
|
|
lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard |
|
|
@@ -325,7 +338,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
single_cls=opt.single_cls, |
|
|
|
dataloader=testloader, |
|
|
|
save_dir=save_dir, |
|
|
|
plots=epoch == 0 or final_epoch, # plot first and last |
|
|
|
plots=plots and final_epoch, |
|
|
|
log_imgs=opt.log_imgs if wandb else 0) |
|
|
|
|
|
|
|
# Write |
|
|
@@ -380,11 +393,16 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): |
|
|
|
strip_optimizer(f2) # strip optimizer |
|
|
|
os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload |
|
|
|
# Finish |
|
|
|
if not opt.evolve: |
|
|
|
if plots: |
|
|
|
plot_results(save_dir=save_dir) # save as results.png |
|
|
|
if wandb: |
|
|
|
wandb.log({"Results": [wandb.Image(str(save_dir / x), caption=x) for x in |
|
|
|
['results.png', 'precision-recall_curve.png']]}) |
|
|
|
logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) |
|
|
|
else: |
|
|
|
dist.destroy_process_group() |
|
|
|
|
|
|
|
dist.destroy_process_group() if rank not in [-1, 0] else None |
|
|
|
wandb.run.finish() if wandb and wandb.run else None |
|
|
|
torch.cuda.empty_cache() |
|
|
|
return results |
|
|
|
|
|
|
@@ -413,7 +431,7 @@ if __name__ == '__main__': |
|
|
|
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') |
|
|
|
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') |
|
|
|
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') |
|
|
|
parser.add_argument('--log-imgs', type=int, default=10, help='number of images for W&B logging, max 100') |
|
|
|
parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100') |
|
|
|
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') |
|
|
|
parser.add_argument('--project', default='runs/train', help='save to project/name') |
|
|
|
parser.add_argument('--name', default='exp', help='save to project/name') |
|
|
@@ -442,7 +460,7 @@ if __name__ == '__main__': |
|
|
|
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' |
|
|
|
opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) |
|
|
|
opt.name = 'evolve' if opt.evolve else opt.name |
|
|
|
opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run |
|
|
|
opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve) # increment run |
|
|
|
|
|
|
|
# DDP mode |
|
|
|
device = select_device(opt.device, batch_size=opt.batch_size) |
|
|
@@ -465,20 +483,10 @@ if __name__ == '__main__': |
|
|
|
# Train |
|
|
|
logger.info(opt) |
|
|
|
if not opt.evolve: |
|
|
|
tb_writer, wandb = None, None # init loggers |
|
|
|
tb_writer = None # init loggers |
|
|
|
if opt.global_rank in [-1, 0]: |
|
|
|
# Tensorboard |
|
|
|
logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/') |
|
|
|
tb_writer = SummaryWriter(opt.save_dir) # runs/train/exp |
|
|
|
|
|
|
|
# W&B |
|
|
|
try: |
|
|
|
import wandb |
|
|
|
|
|
|
|
assert os.environ.get('WANDB_DISABLED') != 'true' |
|
|
|
except (ImportError, AssertionError): |
|
|
|
logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)") |
|
|
|
|
|
|
|
tb_writer = SummaryWriter(opt.save_dir) # Tensorboard |
|
|
|
train(hyp, opt, device, tb_writer, wandb) |
|
|
|
|
|
|
|
# Evolve hyperparameters (optional) |
|
|
@@ -553,7 +561,7 @@ if __name__ == '__main__': |
|
|
|
hyp[k] = round(hyp[k], 5) # significant digits |
|
|
|
|
|
|
|
# Train mutation |
|
|
|
results = train(hyp.copy(), opt, device) |
|
|
|
results = train(hyp.copy(), opt, device, wandb=wandb) |
|
|
|
|
|
|
|
# Write mutation results |
|
|
|
print_mutation(hyp.copy(), results, yaml_file, opt.bucket) |