180 lines
6.8 KiB
Python
180 lines
6.8 KiB
Python
"""Popular Learning Rate Schedulers"""
|
|
from __future__ import division
|
|
import math
|
|
import torch
|
|
|
|
from bisect import bisect_right
|
|
|
|
__all__ = ['LRScheduler', 'WarmupMultiStepLR', 'WarmupPolyLR']
|
|
|
|
|
|
class LRScheduler(object):
|
|
r"""Learning Rate Scheduler
|
|
|
|
Parameters
|
|
----------
|
|
mode : str
|
|
Modes for learning rate scheduler.
|
|
Currently it supports 'constant', 'step', 'linear', 'poly' and 'cosine'.
|
|
base_lr : float
|
|
Base learning rate, i.e. the starting learning rate.
|
|
target_lr : float
|
|
Target learning rate, i.e. the ending learning rate.
|
|
With constant mode target_lr is ignored.
|
|
niters : int
|
|
Number of iterations to be scheduled.
|
|
nepochs : int
|
|
Number of epochs to be scheduled.
|
|
iters_per_epoch : int
|
|
Number of iterations in each epoch.
|
|
offset : int
|
|
Number of iterations before this scheduler.
|
|
power : float
|
|
Power parameter of poly scheduler.
|
|
step_iter : list
|
|
A list of iterations to decay the learning rate.
|
|
step_epoch : list
|
|
A list of epochs to decay the learning rate.
|
|
step_factor : float
|
|
Learning rate decay factor.
|
|
"""
|
|
|
|
def __init__(self, mode, base_lr=0.01, target_lr=0, niters=0, nepochs=0, iters_per_epoch=0,
|
|
offset=0, power=0.9, step_iter=None, step_epoch=None, step_factor=0.1, warmup_epochs=0):
|
|
super(LRScheduler, self).__init__()
|
|
assert (mode in ['constant', 'step', 'linear', 'poly', 'cosine'])
|
|
|
|
if mode == 'step':
|
|
assert (step_iter is not None or step_epoch is not None)
|
|
self.niters = niters
|
|
self.step = step_iter
|
|
epoch_iters = nepochs * iters_per_epoch
|
|
if epoch_iters > 0:
|
|
self.niters = epoch_iters
|
|
if step_epoch is not None:
|
|
self.step = [s * iters_per_epoch for s in step_epoch]
|
|
|
|
self.step_factor = step_factor
|
|
self.base_lr = base_lr
|
|
self.target_lr = base_lr if mode == 'constant' else target_lr
|
|
self.offset = offset
|
|
self.power = power
|
|
self.warmup_iters = warmup_epochs * iters_per_epoch
|
|
self.mode = mode
|
|
|
|
def __call__(self, optimizer, num_update):
|
|
self.update(num_update)
|
|
assert self.learning_rate >= 0
|
|
self._adjust_learning_rate(optimizer, self.learning_rate)
|
|
|
|
def update(self, num_update):
|
|
N = self.niters - 1
|
|
T = num_update - self.offset
|
|
T = min(max(0, T), N)
|
|
|
|
if self.mode == 'constant':
|
|
factor = 0
|
|
elif self.mode == 'linear':
|
|
factor = 1 - T / N
|
|
elif self.mode == 'poly':
|
|
factor = pow(1 - T / N, self.power)
|
|
elif self.mode == 'cosine':
|
|
factor = (1 + math.cos(math.pi * T / N)) / 2
|
|
elif self.mode == 'step':
|
|
if self.step is not None:
|
|
count = sum([1 for s in self.step if s <= T])
|
|
factor = pow(self.step_factor, count)
|
|
else:
|
|
factor = 1
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
# warm up lr schedule
|
|
if self.warmup_iters > 0 and T < self.warmup_iters:
|
|
factor = factor * 1.0 * T / self.warmup_iters
|
|
|
|
if self.mode == 'step':
|
|
self.learning_rate = self.base_lr * factor
|
|
else:
|
|
self.learning_rate = self.target_lr + (self.base_lr - self.target_lr) * factor
|
|
|
|
def _adjust_learning_rate(self, optimizer, lr):
|
|
optimizer.param_groups[0]['lr'] = lr
|
|
# enlarge the lr at the head
|
|
for i in range(1, len(optimizer.param_groups)):
|
|
optimizer.param_groups[i]['lr'] = lr * 10
|
|
|
|
|
|
# separating MultiStepLR with WarmupLR
|
|
# but the current LRScheduler design doesn't allow it
|
|
# reference: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/solver/lr_scheduler.py
|
|
class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
|
|
def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3,
|
|
warmup_iters=500, warmup_method="linear", last_epoch=-1):
|
|
super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
|
|
if not list(milestones) == sorted(milestones):
|
|
raise ValueError(
|
|
"Milestones should be a list of" " increasing integers. Got {}", milestones)
|
|
if warmup_method not in ("constant", "linear"):
|
|
raise ValueError(
|
|
"Only 'constant' or 'linear' warmup_method accepted got {}".format(warmup_method))
|
|
|
|
self.milestones = milestones
|
|
self.gamma = gamma
|
|
self.warmup_factor = warmup_factor
|
|
self.warmup_iters = warmup_iters
|
|
self.warmup_method = warmup_method
|
|
|
|
def get_lr(self):
|
|
warmup_factor = 1
|
|
if self.last_epoch < self.warmup_iters:
|
|
if self.warmup_method == 'constant':
|
|
warmup_factor = self.warmup_factor
|
|
elif self.warmup_factor == 'linear':
|
|
alpha = float(self.last_epoch) / self.warmup_iters
|
|
warmup_factor = self.warmup_factor * (1 - alpha) + alpha
|
|
return [base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
|
|
for base_lr in self.base_lrs]
|
|
|
|
|
|
class WarmupPolyLR(torch.optim.lr_scheduler._LRScheduler):
|
|
def __init__(self, optimizer, target_lr=0, max_iters=0, power=0.9, warmup_factor=1.0 / 3,
|
|
warmup_iters=500, warmup_method='linear', last_epoch=-1):
|
|
if warmup_method not in ("constant", "linear"):
|
|
raise ValueError(
|
|
"Only 'constant' or 'linear' warmup_method accepted "
|
|
"got {}".format(warmup_method))
|
|
|
|
self.target_lr = target_lr
|
|
self.max_iters = max_iters
|
|
self.power = power
|
|
self.warmup_factor = warmup_factor
|
|
self.warmup_iters = warmup_iters
|
|
self.warmup_method = warmup_method
|
|
|
|
super(WarmupPolyLR, self).__init__(optimizer, last_epoch)
|
|
|
|
def get_lr(self):
|
|
N = self.max_iters - self.warmup_iters
|
|
T = self.last_epoch - self.warmup_iters
|
|
if self.last_epoch < self.warmup_iters:
|
|
if self.warmup_method == 'constant':
|
|
warmup_factor = self.warmup_factor
|
|
elif self.warmup_method == 'linear':
|
|
alpha = float(self.last_epoch) / self.warmup_iters
|
|
warmup_factor = self.warmup_factor * (1 - alpha) + alpha
|
|
else:
|
|
raise ValueError("Unknown warmup type.")
|
|
return [self.target_lr + (base_lr - self.target_lr) * warmup_factor for base_lr in self.base_lrs]
|
|
factor = pow(1 - T / N, self.power)
|
|
return [self.target_lr + (base_lr - self.target_lr) * factor for base_lr in self.base_lrs]
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
model = nn.Conv2d(16, 16, 3, 1, 1)
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
|
lr_scheduler = WarmupPolyLR(optimizer, niters=1000)
|