!pip install torch torchvision matplotlib
CIFAR10 comparison for regular Neural Network vs CNN
CIFAR-10 dataset
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.
The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.
The classes are completely mutually exclusive. There is no overlap between automobiles and trucks. “Automobile” includes sedans, SUVs, things of that sort. “Truck” includes only big trucks. Neither includes pickup trucks.
Source: CIFAR 10 Dataset - cs.toronto.edu
Let’s explore the dataset:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
# Define data transformations (you can customize these)
= transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transform
# Load the training dataset
= torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainset = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True, num_workers=2)
trainloader
# Define class labels
= ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
classes
# Function to display an image
def imshow(img):
= img / 2 + 0.5 # Unnormalize the image
img = img.numpy()
npimg 1, 2, 0)))
plt.imshow(np.transpose(npimg, (
plt.show()
# Function to display a grid of images from a specified class with labels
def show_images_from_class_with_labels(class_id, num_images=10):
# Find indices of images from the specified class
= [i for i, label in enumerate(trainset.targets) if label == class_id]
class_indices
# Randomly select num_images indices from the class
= np.random.choice(class_indices, num_images, replace=False)
selected_indices
# Create a grid for displaying images and labels
= []
images_grid = []
labels_grid
for idx in selected_indices:
= trainset[idx]
image, label # Convert image tensor to a NumPy array and reshape it from (C, H, W) to (H, W, C)
= np.transpose(image.numpy(), (1, 2, 0))
image
# Normalize the image data to be in the [0, 1] range
= (image - image.min()) / (image.max() - image.min())
image
images_grid.append(image)
# Show the grid of images and one label for the class
= plt.subplots(1, 2, figsize=(12, 1)) # Create a 1x2 grid of subplots
fig, axs
0].axis('off') # Turn off axis for labels
axs[0].text(0.5, 0.5, f'Class: {classes[class_id]}', ha='center', va='center', fontsize=12)
axs[
1].axis('off') # Turn off axis for images
axs[1].imshow(np.concatenate(images_grid, axis=1)) # Concatenate images horizontally
axs[
plt.show()
# Display 10 random images from each class with labels
for class_id, class_name in enumerate(classes):
show_images_from_class_with_labels(class_id)
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
100%|██████████| 170498071/170498071 [43:03<00:00, 66006.41it/s]
Extracting ./data/cifar-10-python.tar.gz to ./data
Regular NN version:
Let’s try again using the NN version:
import ssl #only for localhost
= ssl._create_unverified_context #only for localhost
ssl._create_default_https_context
# MNIST solver
import torch
# Load MNIST data
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define data transformations
= transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transform
# Load MNIST data
= datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
cifar10_train = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
cifar10_test
# Use Data Loader
= DataLoader(cifar10_train, batch_size=100, shuffle=True)
train_loader = DataLoader(cifar10_test, batch_size=100, shuffle=False)
test_loader
# Train
import torch.nn as nn
# Define model
class FNNModel(nn.Module):
def __init__(self):
super(FNNModel, self).__init__()
self.fc1 = nn.Linear(3 * 32 * 32, 128) # Adjust input size for CIFAR-10 (3 color channels, 32x32 pixels)
self.fc2 = nn.Linear(128, 10) # 10 classes for CIFAR-10
def forward(self, x):
= x.view(x.size(0), -1) # Flatten the input
x = torch.relu(self.fc1(x))
x = self.fc2(x)
x return x
# Instantiate the model
= FNNModel()
model
# Define loss function
= nn.CrossEntropyLoss()
loss_fn
# Define optimizer
= 1e-2
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer
# Define accuracy function
def accuracy(outputs, labels):
= torch.max(outputs, dim=1)
_, preds return torch.tensor(torch.sum(preds == labels).item() / len(preds))
# Train
= 10
total_epochs
model.train()for epoch in range(5):
for images, labels in train_loader:
# Generate predictions
= model(images)
outputs = loss_fn(outputs, labels)
loss # Perform gradient descent
optimizer.zero_grad()
loss.backward()
optimizer.step()print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))
# Evaluate
eval()
model.with torch.no_grad():
= 0
accum_acc for images, labels in test_loader:
= model(images)
outputs = loss_fn(outputs, labels)
loss = accuracy(outputs, labels)
acc += acc
accum_acc
print('Test loss: {:.4f}, Test accuracy: {:.4f}'.format(loss.item(), accum_acc/len(test_loader)))
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 1.8312
Epoch [2/10], Loss: 1.7067
Epoch [3/10], Loss: 1.6943
Epoch [4/10], Loss: 1.5868
Epoch [5/10], Loss: 1.5829
Test loss: 1.5123, Test accuracy: 0.4600
Only 46% accuracy.
CNN Version
Let’s also try the CNN version:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define a CNN model for CIFAR-10
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3) # Adjust input channels for CIFAR-10 (3 color channels)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 6 * 6, 128) # Adjust input size for CIFAR-10 (32x32 images)
self.fc2 = nn.Linear(128, 10) # 10 classes for CIFAR-10
def forward(self, x):
= torch.relu(self.conv1(x))
x = torch.max_pool2d(x, 2)
x = torch.relu(self.conv2(x))
x = torch.max_pool2d(x, 2)
x = x.view(x.size(0), -1)
x = torch.relu(self.fc1(x))
x = self.fc2(x)
x return x
# Define data transformations
= transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transform
# Load CIFAR-10 data
= datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
cifar10_train = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
cifar10_test
# Use Data Loader
= DataLoader(cifar10_train, batch_size=100, shuffle=True)
train_loader = DataLoader(cifar10_test, batch_size=100, shuffle=False)
test_loader
# Instantiate the CNN model
= CNNModel()
cnn_model
= nn.CrossEntropyLoss()
loss_fn = 0.01
learning_rate = optim.SGD(cnn_model.parameters(), lr=learning_rate)
optimizer
# Define accuracy function
def accuracy(outputs, labels):
= torch.max(outputs, dim=1)
_, preds return torch.tensor(torch.sum(preds == labels).item() / len(preds))
= 10
total_epochs # Training loop
cnn_model.train()for epoch in range(total_epochs):
for images, labels in train_loader:
= cnn_model(images)
outputs = loss_fn(outputs, labels)
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, loss.item()))
# Evaluation
eval()
cnn_model.with torch.no_grad():
= 0
accum_acc for images, labels in test_loader:
= cnn_model(images)
outputs
= loss_fn(outputs, labels)
loss = accuracy(outputs, labels)
acc += acc
accum_acc
print('Test loss: {:.4f}, Test accuracy: {:.4f}'.format(loss.item(), accum_acc/len(test_loader)))
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 2.1188
Epoch [2/10], Loss: 1.9551
Epoch [3/10], Loss: 1.6204
Epoch [4/10], Loss: 1.5244
Epoch [5/10], Loss: 1.4974
Epoch [6/10], Loss: 1.3608
Epoch [7/10], Loss: 1.4381
Epoch [8/10], Loss: 1.3200
Epoch [9/10], Loss: 1.2991
Epoch [10/10], Loss: 1.1985
Test loss: 1.2555, Test accuracy: 0.5520
Wow, 55% accuracy!
Result
We can see the gap widens now, 45% vs 55%, compared to 91% vs 97% on the MNIST dataset.
You can increase the epochs to find out when one of model reaches 90%, which one do you think ?
I hope I make my point across that CNN is far superior to regular NN when we are working with images.
Exercise CNN
!pip install rggrader
# @title #### Student Identity
= "student_id" # @param {type:"string"}
student_id = "your_name" # @param {type:"string"} name
# @title #### 00. CIFAR10 Dataset Accuracy
from rggrader import submit
# TODO: Improve the accuracy of the CNN model using the CIFAR10 dataset above. Write your code here.
# You may add any code here to derive your variables
# Please change this
= 5
accuracy
print(f"The accuracy is {accuracy}")
# Submit Method
= "03_cnn"
assignment_id = "00_cifar10_accuracy"
question_id str(accuracy), question_id, "") submit(student_id, name, assignment_id,