浏览代码

W&B logging add hyperparameters (#1399)

* W&B logging add hyperparameters

* hyp bug fix and image logging updates

* if plots and wandb:

* cleanup

* wandb/ gitignore add

* cleanup 2

* cleanup 3

* move wandb import to top of file

* wandb evolve

* update import

* wandb.run.finish()

* default anchors: 3
5.0
Glenn Jocher GitHub 3 年前
父节点
当前提交
9c91aeae10
找不到此签名对应的密钥 GPG 密钥 ID: 4AEE18F83AFDEB23
共有 5 个文件被更改,包括 60 次插入48 次删除
  1. +2
    -0
      .gitignore
  2. +1
    -1
      data/hyp.scratch.yaml
  3. +16
    -14
      test.py
  4. +37
    -29
      train.py
  5. +4
    -4
      utils/plots.py

+ 2
- 0
.gitignore 查看文件

@@ -79,9 +79,11 @@ sdist/
var/
wheels/
*.egg-info/
wandb/
.installed.cfg
*.egg


# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.

+ 1
- 1
data/hyp.scratch.yaml 查看文件

@@ -17,7 +17,7 @@ obj: 1.0 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
iou_t: 0.20 # IoU training threshold
anchor_t: 4.0 # anchor-multiple threshold
# anchors: 0 # anchors per output grid (0 to ignore)
# anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)

+ 16
- 14
test.py 查看文件

@@ -75,7 +75,7 @@ def test(data,
niou = iouv.numel()

# Logging
log_imgs = min(log_imgs, 100) # ceil
log_imgs, wandb = min(log_imgs, 100), None # ceil
try:
import wandb # Weights & Biases
except ImportError:
@@ -132,6 +132,7 @@ def test(data,
continue

# Append to text file
path = Path(paths[si])
if save_txt:
gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh
x = pred.clone()
@@ -139,18 +140,18 @@ def test(data,
for *xyxy, conf, cls in x:
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(str(save_dir / 'labels' / Path(paths[si]).stem) + '.txt', 'a') as f:
with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')

# W&B logging
if len(wandb_images) < log_imgs:
if plots and len(wandb_images) < log_imgs:
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (names[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"} for *xyxy, conf, cls in pred.clone().tolist()]
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
boxes = {"predictions": {"box_data": box_data, "class_labels": names}}
wandb_images.append(wandb.Image(img[si], boxes=boxes))
wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name))

# Clip boxes to image bounds
clip_coords(pred, (height, width))
@@ -158,13 +159,13 @@ def test(data,
# Append to pycocotools JSON dictionary
if save_json:
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
image_id = Path(paths[si]).stem
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
box = pred[:, :4].clone() # xyxy
scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
box = xyxy2xywh(box) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
for p, b in zip(pred.tolist(), box.tolist()):
jdict.append({'image_id': int(image_id) if image_id.isnumeric() else image_id,
jdict.append({'image_id': image_id,
'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})
@@ -203,15 +204,11 @@ def test(data,
stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

# Plot images
if plots and batch_i < 1:
if plots and batch_i < 3:
f = save_dir / f'test_batch{batch_i}_labels.jpg' # filename
plot_images(img, targets, paths, str(f), names) # labels
plot_images(img, targets, paths, f, names) # labels
f = save_dir / f'test_batch{batch_i}_pred.jpg'
plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions

# W&B logging
if wandb_images:
wandb.log({"outputs": wandb_images})
plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions

# Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
@@ -223,6 +220,11 @@ def test(data,
else:
nt = torch.zeros(1)

# W&B logging
if plots and wandb:
wandb.log({"Images": wandb_images})
wandb.log({"Validation": [wandb.Image(str(x), caption=x.name) for x in sorted(save_dir.glob('test*.jpg'))]})

# Print results
pf = '%20s' + '%12.3g' * 6 # print format
print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

+ 37
- 29
train.py 查看文件

@@ -34,6 +34,12 @@ from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_di

logger = logging.getLogger(__name__)

try:
import wandb
except ImportError:
wandb = None
logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)")


def train(hyp, opt, device, tb_writer=None, wandb=None):
logger.info(f'Hyperparameters {hyp}')
@@ -54,6 +60,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
yaml.dump(vars(opt), f, sort_keys=False)

# Configure
plots = not opt.evolve # create plots
cuda = device.type != 'cpu'
init_seeds(2 + rank)
with open(opt.data) as f:
@@ -122,6 +129,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):

# Logging
if wandb and wandb.run is None:
opt.hyp = hyp # add hyperparameters
wandb_run = wandb.init(config=opt, resume="allow",
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
name=save_dir.stem,
@@ -164,7 +172,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
logger.info('Using SyncBatchNorm()')

# Exponential moving average
# EMA
ema = ModelEMA(model) if rank in [-1, 0] else None

# DDP mode
@@ -191,10 +199,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
c = torch.tensor(labels[:, 0]) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
# model._initialize_biases(cf.to(device))
plot_labels(labels, save_dir=save_dir)
if tb_writer:
# tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384
tb_writer.add_histogram('classes', c, 0)
if plots:
plot_labels(labels, save_dir=save_dir)
if tb_writer:
tb_writer.add_histogram('classes', c, 0)
if wandb:
wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.png')]})

# Anchors
if not opt.noautoanchor:
@@ -298,14 +308,17 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
pbar.set_description(s)

# Plot
if ni < 3:
f = str(save_dir / f'train_batch{ni}.jpg') # filename
result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
# if tb_writer and result is not None:
# tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
# tb_writer.add_graph(model, imgs) # add model to tensorboard
if plots and ni < 3:
f = save_dir / f'train_batch{ni}.jpg' # filename
plot_images(images=imgs, targets=targets, paths=paths, fname=f)
# if tb_writer:
# tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
# tb_writer.add_graph(model, imgs) # add model to tensorboard
elif plots and ni == 3 and wandb:
wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]})

# end batch ------------------------------------------------------------------------------------------------
# end epoch ----------------------------------------------------------------------------------------------------

# Scheduler
lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard
@@ -325,7 +338,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
single_cls=opt.single_cls,
dataloader=testloader,
save_dir=save_dir,
plots=epoch == 0 or final_epoch, # plot first and last
plots=plots and final_epoch,
log_imgs=opt.log_imgs if wandb else 0)

# Write
@@ -380,11 +393,16 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
strip_optimizer(f2) # strip optimizer
os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload
# Finish
if not opt.evolve:
if plots:
plot_results(save_dir=save_dir) # save as results.png
if wandb:
wandb.log({"Results": [wandb.Image(str(save_dir / x), caption=x) for x in
['results.png', 'precision-recall_curve.png']]})
logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
else:
dist.destroy_process_group()

dist.destroy_process_group() if rank not in [-1, 0] else None
wandb.run.finish() if wandb and wandb.run else None
torch.cuda.empty_cache()
return results

@@ -413,7 +431,7 @@ if __name__ == '__main__':
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
parser.add_argument('--log-imgs', type=int, default=10, help='number of images for W&B logging, max 100')
parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100')
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
parser.add_argument('--project', default='runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
@@ -442,7 +460,7 @@ if __name__ == '__main__':
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
opt.name = 'evolve' if opt.evolve else opt.name
opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run
opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve) # increment run

# DDP mode
device = select_device(opt.device, batch_size=opt.batch_size)
@@ -465,20 +483,10 @@ if __name__ == '__main__':
# Train
logger.info(opt)
if not opt.evolve:
tb_writer, wandb = None, None # init loggers
tb_writer = None # init loggers
if opt.global_rank in [-1, 0]:
# Tensorboard
logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/')
tb_writer = SummaryWriter(opt.save_dir) # runs/train/exp

# W&B
try:
import wandb

assert os.environ.get('WANDB_DISABLED') != 'true'
except (ImportError, AssertionError):
logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)")

tb_writer = SummaryWriter(opt.save_dir) # Tensorboard
train(hyp, opt, device, tb_writer, wandb)

# Evolve hyperparameters (optional)
@@ -553,7 +561,7 @@ if __name__ == '__main__':
hyp[k] = round(hyp[k], 5) # significant digits

# Train mutation
results = train(hyp.copy(), opt, device)
results = train(hyp.copy(), opt, device, wandb=wandb)

# Write mutation results
print_mutation(hyp.copy(), results, yaml_file, opt.bucket)

+ 4
- 4
utils/plots.py 查看文件

@@ -158,13 +158,13 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
cls = int(classes[j])
color = colors[cls % len(colors)]
cls = names[cls] if names else cls
if labels or conf[j] > 0.3: # 0.3 conf thresh
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)

# Draw image filename labels
if paths is not None:
label = os.path.basename(paths[i])[:40] # trim to 40 char
if paths:
label = Path(paths[i]).name[:40] # trim to 40 char
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
lineType=cv2.LINE_AA)
@@ -172,7 +172,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
# Image border
cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)

if fname is not None:
if fname:
r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
# cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save

正在加载...
取消
保存