!pip install torch torchvision scipy matplotlib numpy
Training
Let’s try our hand in training a CNN
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define a CNN model for MNIST
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 5 * 5, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
= torch.relu(self.conv1(x))
x = torch.max_pool2d(x, 2)
x = torch.relu(self.conv2(x))
x = torch.max_pool2d(x, 2)
x = x.view(x.size(0), -1)
x = torch.relu(self.fc1(x))
x = self.fc2(x)
x return x
# Define data transformations
= transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
transform
# Load MNIST data
= datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
mnist_train = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)
mnist_test
# Use Data Loader
= DataLoader(mnist_train, batch_size=100, shuffle=True)
train_loader = DataLoader(mnist_test, batch_size=100, shuffle=False)
test_loader
# Instantiate the CNN model
= CNNModel()
cnn_model
# Define loss function and optimizer
= nn.CrossEntropyLoss()
loss_fn = 0.01
learning_rate = optim.SGD(cnn_model.parameters(), lr=learning_rate)
optimizer
# Define accuracy function
def accuracy(outputs, labels):
= torch.max(outputs, dim=1)
_, preds return torch.tensor(torch.sum(preds == labels).item() / len(preds))
# Training loop
= 5
total_epochs for epoch in range(total_epochs):
for images, labels in train_loader:
= cnn_model(images)
outputs = loss_fn(outputs, labels)
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))
# Evaluation
#cnn_model.eval()
with torch.no_grad():
= 0
accum_acc for images, labels in test_loader:
= cnn_model(images)
outputs
= loss_fn(outputs, labels)
loss = accuracy(outputs, labels)
acc += acc
accum_acc
print('Test loss: {:.4f}, Test accuracy: {:.4f}'.format(loss.item(), accum_acc/len(test_loader)))
Epoch [1/5], Loss: 0.7850
Epoch [2/5], Loss: 0.4941
Epoch [3/5], Loss: 0.4238
Epoch [4/5], Loss: 0.4913
Epoch [5/5], Loss: 0.4813
Test loss: 0.4732, Test accuracy: 0.8098
Data Augmentation
Can we train a CNN mdoel on a relatively small dataset ? What happens if the dataset is small ?
It is possible to train on a small dataset, and quite accurate too. However there is one major problem, if the input image differs, for example, it’s upside down, the model will fail. This is known as overfitting. Overfitting occurs when a model learns to perform well on the training data but fails to generalize to unseen data.
To overcome this issue, we can use data augmentation. What is Data augmentation ?
Basically we artificially increase the size and diversity of the training dataset. We can do this by: - Rotation: Data augmentation can involve rotating the digit images by various angles. This helps the model learn to recognize digits even if they are slightly tilted or rotated when written by different people. - Scaling and Shearing: You can apply transformations that stretch or compress the digit images in both the x and y directions. This allows the model to handle variations in digit size and aspect ratio. - Translation: Shifting the digit images within the image frame helps the model learn to recognize digits in different positions on the input image. - Noise: Adding random noise to the images simulates variations in writing style and drawing imperfections.
Let’s assume we want to make sure that make sure that our CNN model based on the MNIST dataset to recognize digits written by various individuals with different writing styles. Here’s what we could do:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
= transforms.Compose([transforms.ToTensor()])
transform = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_dataset
# Example data augmentation transformations
= transforms.Compose([
data_augmentation =(-10, 10), fill=0), # Fill with black for rotation
transforms.RandomRotation(degrees=0, translate=(0.1, 0.1)),
transforms.RandomAffine(degrees=0.2, contrast=0.2, saturation=0.2, hue=0.2),
transforms.ColorJitter(brightness=(28, 28), scale=(0.8, 1.2)),
transforms.RandomResizedCrop(size=0.2, p=0.5),
transforms.RandomPerspective(distortion_scale=0.5),
transforms.RandomErasing(p
])
# Create a custom dataset class to store augmented data
class AugmentedDataset(torch.utils.data.Dataset):
def __init__(self, original_dataset, data_augmentation):
self.original_dataset = original_dataset
self.data_augmentation = data_augmentation
def __len__(self):
return len(self.original_dataset)
def __getitem__(self, idx):
= self.original_dataset[idx]
image, label = self.data_augmentation(image)
augmented_image return augmented_image, label
# Create an augmented dataset
= AugmentedDataset(train_dataset, data_augmentation)
augmented_dataset
# Choose a digit class (e.g., digit 7)
= 7
digit_class
# Filter the dataset to get images of the chosen class
= [image for image, label in train_dataset if label == digit_class]
digit_images
# Apply data augmentation to the images and convert to PIL Images
= [transforms.ToPILImage()(data_augmentation(image)) for image in digit_images]
augmented_images_pil
# Convert PIL Images to NumPy arrays before visualization
= [image.squeeze().numpy() for image in digit_images]
original_images_np = [np.array(image) for image in augmented_images_pil]
augmented_images_np
# Visualize original and augmented images
=(12, 6))
plt.figure(figsize
for i in range(5):
2, 5, i + 1)
plt.subplot(='gray')
plt.imshow(original_images_np[i], cmap"Original")
plt.title(
for i in range(5):
2, 5, i + 6)
plt.subplot(='gray')
plt.imshow(augmented_images_np[i], cmap"Augmented")
plt.title(
plt.show()
How do we combine them ? We can use ConcatDataset
from torch.utils.data import ConcatDataset
# Assuming you have a dataset object, e.g., mnist_train
= len(train_dataset)
num_images print("Number of images in the dataset (before):", num_images)
# Combine the original and augmented datasets
= ConcatDataset([train_dataset, augmented_dataset])
combined_dataset
# Create a DataLoader for the combined dataset
= DataLoader(combined_dataset, batch_size=100, shuffle=True)
combined_train_loader
# Assuming you have a dataset object, e.g., mnist_train
= len(combined_dataset)
num_images print("Number of images in the dataset (after):", num_images)
Number of images in the dataset (before): 60000
Number of images in the dataset (after): 120000
Next we can train them as usual. Pretty neat, eh ?
Exercise CNN Training
!pip install rggrader
# @title #### Student Identity
= "student_id" # @param {type:"string"}
student_id = "your_name" # @param {type:"string"} name
# @title #### 00. CNN Model using SHVN Dataset
from rggrader import submit
# TODO: Train a model on your own, using the SVHN (Street View House Numbers) dataset :: https://huggingface.co/datasets/svhn
# You may add any code here to derive your variables
# Please change this
= 0
accuracy
print(f"The accuracy is {accuracy}")
# Submit Method
= "03_cnn"
assignment_id = "01_training_svhn"
question_id str(accuracy), question_id, "") submit(student_id, name, assignment_id,