Pytorch MNIST

Standard MNIST

Let’s solve the MNIST problem using Pytorch.

# MNIST solver

import torch

# Load MNIST data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load MNIST data
mnist_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# Inspect data
print(mnist_train)
print(mnist_test)

# Print the shape of the first image in the training set
print(mnist_train[0][0].shape)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()
torch.Size([1, 28, 28])

The data is huge, the training data consist of 60,000 entries of 28x28 images, i.e. it’s a matrix of 60,000x28x28

Stochastic Gradient Descent

Stochastic Gradient Descent (SGD) is a special type of Gradient Descent where the loss is computed on a single example. This is a very common approach in Deep Learning because it is much faster than computing the loss on the whole dataset. The loss is computed on a single example and the weights are updated after each example. This is why it is called Stochastic Gradient Descent.

Mini-batch Gradient Descent

Mini-batch Gradient Descent is a compromise between SGD and Batch Gradient Descent. In Mini-batch Gradient Descent, the loss is computed on a small number of examples (typically between 8 and 256) instead of a single example. This makes it more computationally efficient than SGD because you can use vectorized operations, especially when using GPUs.

Data Loader

Pytorch has a data loader that can be used to load the data in batches. This is very useful when the data is huge and cannot be loaded in memory.

# MNIST solver

import torch

# Load MNIST data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load MNIST data
mnist_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# Use Data Loader
train_loader = DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=100, shuffle=False)

# Iterate through data
for images, labels in train_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([100, 1, 28, 28])
Image label dimensions: torch.Size([100])

Let’s start training

# MNIST solver

import torch

# Load MNIST data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load MNIST data
mnist_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# Use Data Loader
train_loader = DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=100, shuffle=False)

# Train
import torch.nn as nn

# Define model
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(784, 10)

    def forward(self, xb):
        # Flatten the image tensors using reshape
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
    
# Instantiate the model
model = MnistModel()

# Define loss function
loss_fn = nn.CrossEntropyLoss()

# Define optimizer
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Define accuracy function
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

# Train
for epoch in range(1):
    for images, labels in train_loader:
        # Generate predictions
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        # Perform gradient descent
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 20, loss.item()))

# Evaluate
with torch.no_grad():
    accum_acc = 0
    accum_lost = 0
    for images, labels in test_loader:
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        accum_lost += loss.item()
        acc = accuracy(outputs, labels)
        accum_acc += acc
    
    print('Test loss: {:.4f}, Test accuracy: {:.4f}'.format(accum_lost/len(test_loader), accum_acc/len(test_loader)))

Epoch [1/20], Loss: 0.7701
Test loss: 0.7179, Test accuracy: 0.8514

Let’s add hidden layer

# MNIST solver

import torch

# Load MNIST data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load MNIST data
mnist_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# Use Data Loader
train_loader = DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=100, shuffle=False)

# Train
import torch.nn as nn

# Define model
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(784, 100)
        self.hidden = nn.Linear(100, 10)

    def forward(self, xb):
        # Flatten the image tensors using reshape
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        out = self.hidden(out)
        return out
    
# Instantiate the model
model = MnistModel()

# Define loss function
loss_fn = nn.CrossEntropyLoss()

# Define optimizer
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Define accuracy function
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

# Train
for epoch in range(20):
    for images, labels in train_loader:
        # Generate predictions
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        # Perform gradient descent
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 20, loss.item()))

# Evaluate
with torch.no_grad():
    accum_acc = 0
    for images, labels in test_loader:
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        acc = accuracy(outputs, labels)
        accum_acc += acc
    
    print('Test loss: {:.4f}, Test accuracy: {:.4f}'.format(loss.item(), accum_acc/len(test_loader)))

Epoch [1/20], Loss: 0.7054
Epoch [2/20], Loss: 0.4153
Epoch [3/20], Loss: 0.4130
Epoch [4/20], Loss: 0.5600
Epoch [5/20], Loss: 0.2385
Epoch [6/20], Loss: 0.4459
Epoch [7/20], Loss: 0.2949
Epoch [8/20], Loss: 0.2567
Epoch [9/20], Loss: 0.3537
Epoch [10/20], Loss: 0.3268
Epoch [11/20], Loss: 0.2535
Epoch [12/20], Loss: 0.2902
Epoch [13/20], Loss: 0.2985
Epoch [14/20], Loss: 0.3891
Epoch [15/20], Loss: 0.2007
Epoch [16/20], Loss: 0.4770
Epoch [17/20], Loss: 0.2669
Epoch [18/20], Loss: 0.2373
Epoch [19/20], Loss: 0.2236
Epoch [20/20], Loss: 0.3353
Test loss: 0.3888, Test accuracy: 0.9206

Exercise 1

The SOTA for MNIST is 99.8% accuracy. Can you get there?

!pip install rggrader

from rggrader import submit

# @title #### Student Identity
student_id = "your student id" # @param {type:"string"}
name = "your name" # @param {type:"string"}

assignment_id = "13_pytorch-mnist"
question_id = "00_mnist-data"
my_accuracy = ""
submit(student_id, name, assignment_id, str(my_accuracy), question_id)

Fashion MNIST

import torch

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load Fashion MNIST data
fmnist_train = datasets.FashionMNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
fmnist_test = datasets.FashionMNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

# Inspect data
print(fmnist_train)
print(fmnist_test)

# Print the shape of the first image in the training set
print(fmnist_train[0][0].shape)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz

100%|██████████| 26421880/26421880 [09:53<00:00, 44521.39it/s]

Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz

100%|██████████| 29515/29515 [00:00<00:00, 159889.65it/s]

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz

100%|██████████| 4422102/4422102 [01:29<00:00, 49617.97it/s]

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz

100%|██████████| 5148/5148 [00:00<00:00, 488281.07it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()
torch.Size([1, 28, 28])

Exercise 2

Try implement the Fashion MNIST dataset, and see if you can get to 90%+ accuracy.

assignment_id = "13_pytorch-mnist"
question_id = "01_fashion-mnist-data"
my_accuracy = ""
submit(student_id, name, assignment_id, str(my_accuracy), question_id)