%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

plt.style.use('./deadneuron.mplstyle')


class Net(nn.Module):
    def __init__(self, in_channels=1, n_outputs=10):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, 8, kernel_size=3)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(16*3*3, 128)
        self.fc2 = nn.Linear(128, n_outputs)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x


model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

constant = torch.optim.lr_scheduler.ConstantLR(
    optimizer, factor=1.0, total_iters=50)
decay = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=50, eta_min=0.01)
cyclic = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.01, max_lr=0.05, step_size_up=1, step_size_down=9)
scheduler = torch.optim.lr_scheduler.SequentialLR(
    optimizer, schedulers=[constant, decay, cyclic], milestones=[50, 100])

lrs = []
epochs = 140

for i in range(epochs + 1):
    lrs.append(scheduler.get_last_lr())
    scheduler.step()

avg_points = [(100, 0.01), (110, 0.01)]
plt.plot(range(0, 100), lrs[:100], label="Base LR Schedule")
plt.plot(range(100, 141), lrs[100:141], label="SWA LR Schedule")
plt.plot(range(100,141,10), [0.01]*5, 'o', label="Model Weight Checkpoints")
plt.legend()
plt.show()

Stochastic Weight Averaging¶