From 187f7c2ed16b9eb9754b28e7b0aa397f908155aa Mon Sep 17 00:00:00 2001 From: Junghoon Kim Date: Mon, 2 Nov 2020 08:08:36 +0900 Subject: [PATCH] Change optimizer parameters group method (#1239) * Change optimizer parameters group method * Add torch nn * Change isinstance method(torch.Tensor to nn.Parameter) * parameter freeze fix, PEP8 reformat * freeze bug fix Co-authored-by: Glenn Jocher --- train.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/train.py b/train.py index 2df54ca..a24f212 100644 --- a/train.py +++ b/train.py @@ -10,6 +10,7 @@ from warnings import warn import math import numpy as np import torch.distributed as dist +import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler @@ -80,12 +81,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze - freeze = ['', ] # parameter names to freeze (full or partial) - if any(freeze): - for k, v in model.named_parameters(): - if any(x in k for x in freeze): - print('freezing %s' % k) - v.requires_grad = False + freeze = [] # parameter names to freeze (full or partial) + for k, v in model.named_parameters(): + v.requires_grad = True # train all layers + if any(x in k for x in freeze): + print('freezing %s' % k) + v.requires_grad = False # Optimizer nbs = 64 # nominal batch size @@ -93,14 +94,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None): hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups - for k, v in model.named_parameters(): - v.requires_grad = True - if '.bias' in k: - pg2.append(v) # biases - elif '.weight' in k and '.bn' not in k: - pg1.append(v) # apply weight decay - else: - pg0.append(v) # all else + for k, v in model.named_modules(): + if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d): + pg0.append(v.weight) # no decay + elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum