|
|
@@ -22,6 +22,8 @@ try: |
|
|
|
import thop # for FLOPs computation |
|
|
|
except ImportError: |
|
|
|
thop = None |
|
|
|
|
|
|
|
logging.basicConfig(format="%(message)s", level=logging.INFO) |
|
|
|
LOGGER = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
|
@@ -103,11 +105,10 @@ def profile(x, ops, n=100, device=None): |
|
|
|
# m2 = nn.SiLU() |
|
|
|
# profile(x, [m1, m2], n=100) # profile speed over 100 iterations |
|
|
|
|
|
|
|
device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') |
|
|
|
device = device or select_device() |
|
|
|
x = x.to(device) |
|
|
|
x.requires_grad = True |
|
|
|
print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') |
|
|
|
print(f"\n{'Params':>12s}{'GFLOPs':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") |
|
|
|
print(f"{'Params':>12s}{'GFLOPs':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") |
|
|
|
for m in ops if isinstance(ops, list) else [ops]: |
|
|
|
m = m.to(device) if hasattr(m, 'to') else m # device |
|
|
|
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type |