Created
March 17, 2022 02:40
-
-
Save Blezzoh/b76779a0544f8a2cdbe26b7b22280153 to your computer and use it in GitHub Desktop.
hw1_BlaiseIradukunda.convo.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "hw1_BlaiseIradukunda.convo.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [], | |
| "machine_shape": "hm", | |
| "authorship_tag": "ABX9TyMoTlgMMM2mpxlis12T8buj", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU", | |
| "widgets": { | |
| "application/vnd.jupyter.widget-state+json": { | |
| "e33bc54db09f4d30897a859b7066785f": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HBoxModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HBoxView", | |
| "box_style": "", | |
| "children": [ | |
| "IPY_MODEL_8b7cd36574fd433695c7997694eedcc3", | |
| "IPY_MODEL_4cb44587a40a4ae9b85b8b4755cac74d", | |
| "IPY_MODEL_e320bb066c8645ab8d76f25a3cd31096" | |
| ], | |
| "layout": "IPY_MODEL_bf66a85cbe2545efa91b870170a14701" | |
| } | |
| }, | |
| "8b7cd36574fd433695c7997694eedcc3": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_23a8c73505a3460b8e4950fca9e46120", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_9b0c068d6f274d68b8e76b49c28825e5", | |
| "value": "" | |
| } | |
| }, | |
| "4cb44587a40a4ae9b85b8b4755cac74d": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "FloatProgressModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "ProgressView", | |
| "bar_style": "success", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_f3ccf56ca8514a24b7faac20f5133429", | |
| "max": 170498071, | |
| "min": 0, | |
| "orientation": "horizontal", | |
| "style": "IPY_MODEL_a60ac0994b3a4291a181798854eee801", | |
| "value": 170498071 | |
| } | |
| }, | |
| "e320bb066c8645ab8d76f25a3cd31096": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_dom_classes": [], | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "HTMLModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_view_module_version": "1.5.0", | |
| "_view_name": "HTMLView", | |
| "description": "", | |
| "description_tooltip": null, | |
| "layout": "IPY_MODEL_957e8f465045477eab03b08b85594e27", | |
| "placeholder": "", | |
| "style": "IPY_MODEL_be9f4ea87c5b4f6cae766b34f2d741ed", | |
| "value": " 170499072/? [00:03<00:00, 55620936.44it/s]" | |
| } | |
| }, | |
| "bf66a85cbe2545efa91b870170a14701": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "23a8c73505a3460b8e4950fca9e46120": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "9b0c068d6f274d68b8e76b49c28825e5": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| }, | |
| "f3ccf56ca8514a24b7faac20f5133429": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "a60ac0994b3a4291a181798854eee801": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "ProgressStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "bar_color": null, | |
| "description_width": "" | |
| } | |
| }, | |
| "957e8f465045477eab03b08b85594e27": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.2.0", | |
| "_model_name": "LayoutModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "LayoutView", | |
| "align_content": null, | |
| "align_items": null, | |
| "align_self": null, | |
| "border": null, | |
| "bottom": null, | |
| "display": null, | |
| "flex": null, | |
| "flex_flow": null, | |
| "grid_area": null, | |
| "grid_auto_columns": null, | |
| "grid_auto_flow": null, | |
| "grid_auto_rows": null, | |
| "grid_column": null, | |
| "grid_gap": null, | |
| "grid_row": null, | |
| "grid_template_areas": null, | |
| "grid_template_columns": null, | |
| "grid_template_rows": null, | |
| "height": null, | |
| "justify_content": null, | |
| "justify_items": null, | |
| "left": null, | |
| "margin": null, | |
| "max_height": null, | |
| "max_width": null, | |
| "min_height": null, | |
| "min_width": null, | |
| "object_fit": null, | |
| "object_position": null, | |
| "order": null, | |
| "overflow": null, | |
| "overflow_x": null, | |
| "overflow_y": null, | |
| "padding": null, | |
| "right": null, | |
| "top": null, | |
| "visibility": null, | |
| "width": null | |
| } | |
| }, | |
| "be9f4ea87c5b4f6cae766b34f2d741ed": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_model_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_model_name": "DescriptionStyleModel", | |
| "_view_count": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "_view_module_version": "1.2.0", | |
| "_view_name": "StyleView", | |
| "description_width": "" | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/Blezzoh/b76779a0544f8a2cdbe26b7b22280153/hw1_blaiseiradukunda-convo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 153, | |
| "referenced_widgets": [ | |
| "e33bc54db09f4d30897a859b7066785f", | |
| "8b7cd36574fd433695c7997694eedcc3", | |
| "4cb44587a40a4ae9b85b8b4755cac74d", | |
| "e320bb066c8645ab8d76f25a3cd31096", | |
| "bf66a85cbe2545efa91b870170a14701", | |
| "23a8c73505a3460b8e4950fca9e46120", | |
| "9b0c068d6f274d68b8e76b49c28825e5", | |
| "f3ccf56ca8514a24b7faac20f5133429", | |
| "a60ac0994b3a4291a181798854eee801", | |
| "957e8f465045477eab03b08b85594e27", | |
| "be9f4ea87c5b4f6cae766b34f2d741ed" | |
| ] | |
| }, | |
| "id": "rLquaUMxYAKs", | |
| "outputId": "8c517246-7906-4333-db0e-b2a554884f8f" | |
| }, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| " 0%| | 0/170498071 [00:00<?, ?it/s]" | |
| ], | |
| "application/vnd.jupyter.widget-view+json": { | |
| "version_major": 2, | |
| "version_minor": 0, | |
| "model_id": "e33bc54db09f4d30897a859b7066785f" | |
| } | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Extracting ./data/cifar-10-python.tar.gz to ./data\n", | |
| "Files already downloaded and verified\n", | |
| "Files already downloaded and verified\n", | |
| "Files already downloaded and verified\n", | |
| "cuda 12500\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import torch\n", | |
| "import torchvision\n", | |
| "import torchvision.transforms as transforms\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "import torch.nn as nn\n", | |
| "import torch.nn.functional as F\n", | |
| "import torch.optim as optim\n", | |
| "\n", | |
| "\n", | |
| "transform = transforms.Compose(\n", | |
| " [transforms.ToTensor(),\n", | |
| " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", | |
| "\n", | |
| "batch_size = 4\n", | |
| "\n", | |
| "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", | |
| "\n", | |
| "trainset = torchvision.datasets.CIFAR10(root='./data', train=True,\n", | |
| " download=True, transform=transform)\n", | |
| "trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,\n", | |
| " shuffle=True, num_workers=2)\n", | |
| "\n", | |
| "testset = torchvision.datasets.CIFAR10(root='./data', train=False,\n", | |
| " download=True, transform=transform)\n", | |
| "testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,\n", | |
| " shuffle=False, num_workers=2)\n", | |
| "\n", | |
| "train_transform_augmented = transforms.Compose([\n", | |
| " transforms.RandomHorizontalFlip(p=0.5),\n", | |
| " transforms.RandomCrop(32, padding=4),\n", | |
| " transforms.ToTensor(),\n", | |
| " transforms.Normalize([0, 0, 0], [1, 1, 1])\n", | |
| "])\n", | |
| "\n", | |
| "test_transform_augmented = transforms.Compose([\n", | |
| " transforms.ToTensor(),\n", | |
| " transforms.Normalize([0, 0, 0], [1, 1, 1])\n", | |
| "])\n", | |
| "\n", | |
| "trainset_augmented = torchvision.datasets.CIFAR10(root='./data', train=True,\n", | |
| " download=True, transform=train_transform_augmented)\n", | |
| "trainloader_augmented = torch.utils.data.DataLoader(trainset, batch_size=batch_size,\n", | |
| " shuffle=True, num_workers=2)\n", | |
| "\n", | |
| "testset_augmented = torchvision.datasets.CIFAR10(root='./data', train=False,\n", | |
| " download=True, transform=test_transform_augmented)\n", | |
| "testloader_augmented = torch.utils.data.DataLoader(testset, batch_size=batch_size,\n", | |
| " shuffle=False, num_workers=2)\n", | |
| "classes = ('plane', 'car', 'bird', 'cat',\n", | |
| " 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n", | |
| "\n", | |
| "\n", | |
| "class ScratchNet(nn.Module):\n", | |
| " def __init__(self):\n", | |
| " super().__init__()\n", | |
| " self.conv1 = nn.Conv2d(3, 6, 5)\n", | |
| " self.pool = nn.MaxPool2d(2, 2)\n", | |
| " self.conv2 = nn.Conv2d(6, 16, 5)\n", | |
| " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", | |
| " self.fc2 = nn.Linear(120, 84)\n", | |
| " self.fc3 = nn.Linear(84, 10)\n", | |
| "\n", | |
| " def forward(self, x):\n", | |
| " x = self.pool(F.relu(self.conv1(x)))\n", | |
| " x = self.pool(F.relu(self.conv2(x)))\n", | |
| " x = torch.flatten(x, 1) # flatten all dimensions except batch\n", | |
| " x = F.relu(self.fc1(x))\n", | |
| " x = F.relu(self.fc2(x))\n", | |
| " x = self.fc3(x)\n", | |
| " return x\n", | |
| "\n", | |
| "def imshow(img):\n", | |
| " img = img / 2 + 0.5 # unnormalize\n", | |
| " npimg = img.numpy()\n", | |
| " plt.imshow(np.transpose(npimg, (1, 2, 0)))\n", | |
| " plt.show()\n", | |
| "\n", | |
| "\n", | |
| "# Device configuration\n", | |
| "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", | |
| "\n", | |
| "# Hyper-parameters\n", | |
| "num_epochs = 10\n", | |
| "learning_rate = 0.001\n", | |
| "\n", | |
| "# Image preprocessing modules\n", | |
| "transform = transforms.Compose([\n", | |
| " transforms.Pad(4),\n", | |
| " transforms.RandomHorizontalFlip(),\n", | |
| " transforms.RandomCrop(32),\n", | |
| " transforms.ToTensor()])\n", | |
| "\n", | |
| "# 3x3 convolution\n", | |
| "def conv3x3(in_channels, out_channels, stride=1):\n", | |
| " return nn.Conv2d(in_channels, out_channels, kernel_size=3, \n", | |
| " stride=stride, padding=1, bias=False)\n", | |
| "\n", | |
| "# Residual block\n", | |
| "class ResidualBlock(nn.Module):\n", | |
| " def __init__(self, in_channels, out_channels, stride=1, downsample=None):\n", | |
| " super(ResidualBlock, self).__init__()\n", | |
| " self.conv1 = conv3x3(in_channels, out_channels, stride)\n", | |
| " self.bn1 = nn.BatchNorm2d(out_channels)\n", | |
| " self.relu = nn.ReLU(inplace=True)\n", | |
| " self.conv2 = conv3x3(out_channels, out_channels)\n", | |
| " self.bn2 = nn.BatchNorm2d(out_channels)\n", | |
| " self.downsample = downsample\n", | |
| "\n", | |
| " def forward(self, x):\n", | |
| " residual = x\n", | |
| " out = self.conv1(x)\n", | |
| " out = self.bn1(out)\n", | |
| " out = self.relu(out)\n", | |
| " out = self.conv2(out)\n", | |
| " out = self.bn2(out)\n", | |
| " if self.downsample:\n", | |
| " residual = self.downsample(x)\n", | |
| " out += residual\n", | |
| " out = self.relu(out)\n", | |
| " return out\n", | |
| "\n", | |
| "# ResNet\n", | |
| "class ResNet(nn.Module):\n", | |
| " def __init__(self, block, layers, num_classes=10):\n", | |
| " super(ResNet, self).__init__()\n", | |
| " self.in_channels = 16\n", | |
| " self.conv = conv3x3(3, 16)\n", | |
| " self.bn = nn.BatchNorm2d(16)\n", | |
| " self.relu = nn.ReLU(inplace=True)\n", | |
| " self.layer1 = self.make_layer(block, 16, layers[0])\n", | |
| " self.layer2 = self.make_layer(block, 32, layers[1], 2)\n", | |
| " self.layer3 = self.make_layer(block, 64, layers[2], 2)\n", | |
| " self.avg_pool = nn.AvgPool2d(8)\n", | |
| " self.fc = nn.Linear(64, num_classes)\n", | |
| "\n", | |
| " def make_layer(self, block, out_channels, blocks, stride=1):\n", | |
| " downsample = None\n", | |
| " if (stride != 1) or (self.in_channels != out_channels):\n", | |
| " downsample = nn.Sequential(\n", | |
| " conv3x3(self.in_channels, out_channels, stride=stride),\n", | |
| " nn.BatchNorm2d(out_channels))\n", | |
| " layers = []\n", | |
| " layers.append(block(self.in_channels, out_channels, stride, downsample))\n", | |
| " self.in_channels = out_channels\n", | |
| " for i in range(1, blocks):\n", | |
| " layers.append(block(out_channels, out_channels))\n", | |
| " return nn.Sequential(*layers)\n", | |
| "\n", | |
| " def forward(self, x):\n", | |
| " out = self.conv(x)\n", | |
| " out = self.bn(out)\n", | |
| " out = self.relu(out)\n", | |
| " out = self.layer1(out)\n", | |
| " out = self.layer2(out)\n", | |
| " out = self.layer3(out)\n", | |
| " out = self.avg_pool(out)\n", | |
| " out = out.view(out.size(0), -1)\n", | |
| " out = self.fc(out)\n", | |
| " return out\n", | |
| "print(device, len(trainloader))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# running a convolution network from scratch\n", | |
| "\n", | |
| "\n", | |
| "net = ScratchNet()\n", | |
| "if torch.cuda.device_count() > 1:\n", | |
| " print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n", | |
| " # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs\n", | |
| " net = nn.DataParallel(net)\n", | |
| "else:\n", | |
| " print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n", | |
| "\n", | |
| "net.to(device)\n", | |
| "criterion = nn.CrossEntropyLoss()\n", | |
| "optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)\n", | |
| "num_epochs = 10\n", | |
| "loss_array = []\n", | |
| "for epoch in range(num_epochs): # loop over the dataset multiple times\n", | |
| "\n", | |
| " running_loss = 0.0\n", | |
| " for i, data in enumerate(trainloader, 0):\n", | |
| " # get the inputs; data is a list of [inputs, labels]\n", | |
| " inputs, labels = data\n", | |
| " inputs, labels = inputs.cuda(), labels.cuda() \n", | |
| " # zero the parameter gradients\n", | |
| " optimizer.zero_grad()\n", | |
| "\n", | |
| " # forward + backward + optimize\n", | |
| " outputs = net(inputs)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| " # print statistics\n", | |
| " running_loss += loss.item()\n", | |
| " if(i == len(trainloader) -1):\n", | |
| " num_l = format(running_loss/2000, \".3f\")\n", | |
| " loss_array.append(float(num_l))\n", | |
| " print(loss_array, running_loss/2000)\n", | |
| " if i % 2500 == 2499: # print every 2500 mini-batches\n", | |
| " print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 2000:.3f}')\n", | |
| " running_loss = 0.0\n", | |
| "\n", | |
| "PATH = './cifar_net.pth'\n", | |
| "torch.save(net.state_dict(), PATH)\n", | |
| "\n", | |
| "print('Finished Training')" | |
| ], | |
| "metadata": { | |
| "id": "xjU8xumQZ5Bw" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import matplotlib.pyplot as plt\n", | |
| "\n", | |
| "net = ScratchNet()\n", | |
| "net.load_state_dict(torch.load(PATH))\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| "# print accuracy for each class\n", | |
| "for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| "print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')\n", | |
| "plt.plot(loss_array)\n", | |
| "plt.title('loss/epoch')\n", | |
| "plt.xlabel('epochs')\n", | |
| "plt.ylabel('loss')" | |
| ], | |
| "metadata": { | |
| "id": "B8QLTEp5HU87" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Accuracy for class: plane is 72.5 %\n", | |
| "\n", | |
| "Accuracy for class: car is 76.3 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 41.0 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 50.0 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 57.2 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 43.8 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 71.9 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 70.1 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 69.0 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 68.4 %\n", | |
| "\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 62 %\n", | |
| "\n", | |
| "" | |
| ], | |
| "metadata": { | |
| "id": "Jrj73pHncw5G" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import time\n", | |
| "\n", | |
| "#lenet\n", | |
| "class Flatten(torch.nn.Module):\n", | |
| " def forward(self, x):\n", | |
| " return x.view(x.shape[0], -1)\n", | |
| "\n", | |
| "class Reshape(torch.nn.Module):\n", | |
| " def forward(self, x):\n", | |
| " return x.view(-1,1,32,32)\n", | |
| "\n", | |
| "def init_weights(m):\n", | |
| " if type(m) == nn.Linear or type(m) == nn.Conv2d:\n", | |
| " torch.nn.init.xavier_uniform_(m.weight)\n", | |
| "def evaluate_accuracy(data_iter, net,device=torch.device('cpu')):\n", | |
| " \"\"\"Evaluate accuracy of a model on the given data set.\"\"\"\n", | |
| " acc_sum,n = torch.tensor([0],dtype=torch.float32,device=device),0\n", | |
| " for X,y in data_iter:\n", | |
| " # If device is the GPU, copy the data to the GPU.\n", | |
| " X,y = X.to(device),y.to(device)\n", | |
| " net.eval()\n", | |
| " with torch.no_grad():\n", | |
| " y = y.long()\n", | |
| " acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y))\n", | |
| " n += y.shape[0]\n", | |
| " return acc_sum.item()/n\n", | |
| "\n", | |
| "# lenet = torch.nn.Sequential(\n", | |
| " # Reshape(),\n", | |
| " # nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, padding=2),\n", | |
| " # nn.Sigmoid(),\n", | |
| " # nn.AvgPool2d(kernel_size=2, stride=2),\n", | |
| " # nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),\n", | |
| " # nn.Sigmoid(),\n", | |
| " # nn.AvgPool2d(kernel_size=2, stride=2),\n", | |
| " # Flatten(),\n", | |
| " # nn.Linear(in_features=576, out_features=120),\n", | |
| " # nn.Sigmoid(),\n", | |
| " # nn.Linear(120, 84),\n", | |
| " # nn.Sigmoid(),\n", | |
| " # nn.Linear(84, 10)\n", | |
| "# )\n", | |
| "\n", | |
| "lenet = torch.nn.Sequential(\n", | |
| " nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, padding=0, stride=1),\n", | |
| " nn.Tanh(),\n", | |
| " nn.AvgPool2d(kernel_size=2, stride=2),\n", | |
| " nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5,5), padding=0, stride=1),\n", | |
| " nn.Tanh(),\n", | |
| " nn.AvgPool2d(kernel_size=2, stride=2),\n", | |
| " nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(5,5), padding=0, stride=1),\n", | |
| " nn.Tanh(),\n", | |
| " Flatten(),\n", | |
| " nn.Linear(120,84),\n", | |
| " nn.Tanh(),\n", | |
| " nn.Linear(84, 10)\n", | |
| ")\n", | |
| "\n", | |
| "criterion = nn.CrossEntropyLoss()\n", | |
| "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", | |
| "\n", | |
| "\n", | |
| "lenet.apply(init_weights)\n", | |
| "\n", | |
| "lenet= lenet.to(device)\n", | |
| "\n", | |
| "def train_ch5(network, train_iter, test_iter,criterion, num_epochs, batch_size, device,lr=None):\n", | |
| " \"\"\"Train and evaluate a model with CPU or GPU.\"\"\"\n", | |
| " print('training on', device)\n", | |
| " network.to(device)\n", | |
| " optimizer = optim.SGD(network.parameters(), lr=lr)\n", | |
| " for epoch in range(num_epochs):\n", | |
| " train_l_sum = torch.tensor([0.0],dtype=torch.float32,device=device)\n", | |
| " train_acc_sum = torch.tensor([0.0],dtype=torch.float32,device=device)\n", | |
| " n, start = 0, time.time()\n", | |
| " for data, label in train_iter:\n", | |
| " network.train()\n", | |
| " optimizer.zero_grad()\n", | |
| " data, label= data.to(device),label.to(device) \n", | |
| " out = network(data)\n", | |
| " loss = criterion(out, label)\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| " \n", | |
| " with torch.no_grad():\n", | |
| " label = label.long()\n", | |
| " train_l_sum += loss.float()\n", | |
| " train_acc_sum += (torch.sum((torch.argmax(out, dim=1) == label))).float()\n", | |
| " n += label.shape[0]\n", | |
| " test_acc = evaluate_accuracy(test_iter, network,device)\n", | |
| " print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '\n", | |
| " 'time %.1f sec'\n", | |
| " % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc,\n", | |
| " time.time() - start))\n", | |
| " \n", | |
| "lr, num_epochs = 0.9, 10\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "train_ch5(lenet, trainloader, testloader, criterion,num_epochs, batch_size,device, lr)\n", | |
| "print(len(testloader), len(trainloader))" | |
| ], | |
| "metadata": { | |
| "id": "nrTAa4IeHqMK" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "learning rate 0.9, rounds 10, test set 2500, training set 12500, lenet\n", | |
| "\n", | |
| "epoch 1, loss 9.3629, train acc 0.105, test acc 0.100, time 42.2 sec\n", | |
| "\n", | |
| "epoch 2, loss 9.8082, train acc 0.104, test acc 0.100, time 41.8 sec\n", | |
| "\n", | |
| "epoch 3, loss 9.8746, train acc 0.103, test acc 0.100, time 42.6 sec\n", | |
| "\n", | |
| "epoch 4, loss 10.0857, train acc 0.101, test acc 0.100, time 42.3 sec\n", | |
| "\n", | |
| "epoch 5, loss 10.1143, train acc 0.102, test acc 0.100, time 42.1 sec\n", | |
| "\n", | |
| "epoch 6, loss 10.0937, train acc 0.103, test acc 0.100, time 41.8 sec\n", | |
| "\n", | |
| "epoch 7, loss 10.1260, train acc 0.103, test acc 0.100, time 42.0 sec\n", | |
| "\n", | |
| "epoch 8, loss 10.1510, train acc 0.100, test acc 0.100, time 41.9 sec\n", | |
| "\n", | |
| "epoch 9, loss 10.1925, train acc 0.098, test acc 0.100, time 41.9 sec\n", | |
| "\n", | |
| "epoch 10, loss 10.1775, train acc 0.100, test acc 0.100, time 42.2 sec\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "id": "4h07WkryCAVV" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "lr, num_epochs = 0.09, 10\n", | |
| "\n", | |
| "\n", | |
| "train_ch5(lenet, trainloader, testloader, criterion,num_epochs, batch_size,device, lr)\n", | |
| "print(len(testloader), len(trainloader))" | |
| ], | |
| "metadata": { | |
| "id": "DOqt5zAY8eFy" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "learning rate 0.9, rounds 10, test set 2500, training set 12500, lenet\n", | |
| "\n", | |
| "epoch 1, loss 0.8374, train acc 0.099, test acc 0.100, time 42.5 sec\n", | |
| "\n", | |
| "epoch 2, loss 0.8296, train acc 0.101, test acc 0.100, time 42.6 sec\n", | |
| "\n", | |
| "epoch 3, loss 0.8302, train acc 0.102, test acc 0.100, time 42.3 sec\n", | |
| "\n", | |
| "epoch 4, loss 0.8323, train acc 0.099, test acc 0.100, time 43.4 sec\n", | |
| "\n", | |
| "epoch 5, loss 0.8293, train acc 0.100, test acc 0.100, time 44.1 sec\n", | |
| "\n", | |
| "epoch 6, loss 0.8344, train acc 0.099, test acc 0.100, time 42.3 sec\n", | |
| "\n", | |
| "epoch 7, loss 0.8315, train acc 0.101, test acc 0.100, time 42.2 sec\n", | |
| "\n", | |
| "epoch 8, loss 0.8313, train acc 0.099, test acc 0.100, time 42.3 sec\n", | |
| "\n", | |
| "epoch 9, loss 0.8307, train acc 0.101, test acc 0.100, time 42.6 sec\n", | |
| "\n", | |
| "epoch 10, loss 0.8310, train acc 0.101, test acc 0.100, time 42.1 sec\n", | |
| "\n", | |
| "Conclusion: Lenet doesn't improve it's accuracy per learning rate" | |
| ], | |
| "metadata": { | |
| "id": "_uudnQ1MCvGV" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "\n", | |
| "# Device configuration\n", | |
| "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", | |
| "\n", | |
| "# Hyper-parameters\n", | |
| "num_epochs = 10\n", | |
| "learning_rate = 0.001\n", | |
| "\n", | |
| "# Image preprocessing modules\n", | |
| "transform = transforms.Compose([\n", | |
| " transforms.Pad(4),\n", | |
| " transforms.RandomHorizontalFlip(),\n", | |
| " transforms.RandomCrop(32),\n", | |
| " transforms.ToTensor()])\n", | |
| "\n", | |
| "# 3x3 convolution\n", | |
| "def conv3x3(in_channels, out_channels, stride=1):\n", | |
| " return nn.Conv2d(in_channels, out_channels, kernel_size=3, \n", | |
| " stride=stride, padding=1, bias=False)\n", | |
| "\n", | |
| "# Residual block\n", | |
| "class ResidualBlock(nn.Module):\n", | |
| " def __init__(self, in_channels, out_channels, stride=1, downsample=None):\n", | |
| " super(ResidualBlock, self).__init__()\n", | |
| " self.conv1 = conv3x3(in_channels, out_channels, stride)\n", | |
| " self.bn1 = nn.BatchNorm2d(out_channels)\n", | |
| " self.relu = nn.ReLU(inplace=True)\n", | |
| " self.conv2 = conv3x3(out_channels, out_channels)\n", | |
| " self.bn2 = nn.BatchNorm2d(out_channels)\n", | |
| " self.downsample = downsample\n", | |
| "\n", | |
| " def forward(self, x):\n", | |
| " residual = x\n", | |
| " out = self.conv1(x)\n", | |
| " out = self.bn1(out)\n", | |
| " out = self.relu(out)\n", | |
| " out = self.conv2(out)\n", | |
| " out = self.bn2(out)\n", | |
| " if self.downsample:\n", | |
| " residual = self.downsample(x)\n", | |
| " out += residual\n", | |
| " out = self.relu(out)\n", | |
| " return out\n", | |
| "\n", | |
| "# ResNet\n", | |
| "class ResNet(nn.Module):\n", | |
| " def __init__(self, block, layers, num_classes=10):\n", | |
| " super(ResNet, self).__init__()\n", | |
| " self.in_channels = 16\n", | |
| " self.conv = conv3x3(3, 16)\n", | |
| " self.bn = nn.BatchNorm2d(16)\n", | |
| " self.relu = nn.ReLU(inplace=True)\n", | |
| " self.layer1 = self.make_layer(block, 16, layers[0])\n", | |
| " self.layer2 = self.make_layer(block, 32, layers[1], 2)\n", | |
| " self.layer3 = self.make_layer(block, 64, layers[2], 2)\n", | |
| " self.avg_pool = nn.AvgPool2d(8)\n", | |
| " self.fc = nn.Linear(64, num_classes)\n", | |
| "\n", | |
| " def make_layer(self, block, out_channels, blocks, stride=1):\n", | |
| " downsample = None\n", | |
| " if (stride != 1) or (self.in_channels != out_channels):\n", | |
| " downsample = nn.Sequential(\n", | |
| " conv3x3(self.in_channels, out_channels, stride=stride),\n", | |
| " nn.BatchNorm2d(out_channels))\n", | |
| " layers = []\n", | |
| " layers.append(block(self.in_channels, out_channels, stride, downsample))\n", | |
| " self.in_channels = out_channels\n", | |
| " for i in range(1, blocks):\n", | |
| " layers.append(block(out_channels, out_channels))\n", | |
| " return nn.Sequential(*layers)\n", | |
| "\n", | |
| " def forward(self, x):\n", | |
| " out = self.conv(x)\n", | |
| " out = self.bn(out)\n", | |
| " out = self.relu(out)\n", | |
| " out = self.layer1(out)\n", | |
| " out = self.layer2(out)\n", | |
| " out = self.layer3(out)\n", | |
| " out = self.avg_pool(out)\n", | |
| " out = out.view(out.size(0), -1)\n", | |
| " out = self.fc(out)\n", | |
| " return out\n", | |
| "\n", | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "\n", | |
| "\n", | |
| "# Loss and optimizer\n", | |
| "criterion = nn.CrossEntropyLoss()\n", | |
| "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "PATH_ADAM = 'resnet.ckpt'\n", | |
| "torch.save(model.state_dict(), PATH_ADAM)" | |
| ], | |
| "metadata": { | |
| "id": "kAhu-biX8Vpf", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "outputId": "4b50bcf6-970f-4b32-9c9a-7b5a0d6140b8" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Epoch [1/10], Step [1000/12500] Loss: 2.1732\n", | |
| "Epoch [1/10], Step [2000/12500] Loss: 1.5022\n", | |
| "Epoch [1/10], Step [3000/12500] Loss: 1.3424\n", | |
| "Epoch [1/10], Step [4000/12500] Loss: 1.6357\n", | |
| "Epoch [1/10], Step [5000/12500] Loss: 1.8339\n", | |
| "Epoch [1/10], Step [6000/12500] Loss: 1.8645\n", | |
| "Epoch [1/10], Step [7000/12500] Loss: 1.2705\n", | |
| "Epoch [1/10], Step [8000/12500] Loss: 0.3739\n", | |
| "Epoch [1/10], Step [9000/12500] Loss: 1.3352\n", | |
| "Epoch [1/10], Step [10000/12500] Loss: 1.2586\n", | |
| "Epoch [1/10], Step [11000/12500] Loss: 2.1507\n", | |
| "Epoch [1/10], Step [12000/12500] Loss: 1.5038\n", | |
| "Epoch [2/10], Step [1000/12500] Loss: 1.5137\n", | |
| "Epoch [2/10], Step [2000/12500] Loss: 0.9084\n", | |
| "Epoch [2/10], Step [3000/12500] Loss: 1.5097\n", | |
| "Epoch [2/10], Step [4000/12500] Loss: 1.2039\n", | |
| "Epoch [2/10], Step [5000/12500] Loss: 0.4190\n", | |
| "Epoch [2/10], Step [6000/12500] Loss: 1.5147\n", | |
| "Epoch [2/10], Step [7000/12500] Loss: 0.1477\n", | |
| "Epoch [2/10], Step [8000/12500] Loss: 0.4043\n", | |
| "Epoch [2/10], Step [9000/12500] Loss: 0.4189\n", | |
| "Epoch [2/10], Step [10000/12500] Loss: 0.5096\n", | |
| "Epoch [2/10], Step [11000/12500] Loss: 1.5689\n", | |
| "Epoch [2/10], Step [12000/12500] Loss: 1.4065\n", | |
| "Epoch [3/10], Step [1000/12500] Loss: 0.9672\n", | |
| "Epoch [3/10], Step [2000/12500] Loss: 0.9948\n", | |
| "Epoch [3/10], Step [3000/12500] Loss: 0.4711\n", | |
| "Epoch [3/10], Step [4000/12500] Loss: 1.3512\n", | |
| "Epoch [3/10], Step [5000/12500] Loss: 0.4913\n", | |
| "Epoch [3/10], Step [6000/12500] Loss: 0.3216\n", | |
| "Epoch [3/10], Step [7000/12500] Loss: 0.4052\n", | |
| "Epoch [3/10], Step [8000/12500] Loss: 1.1706\n", | |
| "Epoch [3/10], Step [9000/12500] Loss: 0.4825\n", | |
| "Epoch [3/10], Step [10000/12500] Loss: 1.0973\n", | |
| "Epoch [3/10], Step [11000/12500] Loss: 0.2907\n", | |
| "Epoch [3/10], Step [12000/12500] Loss: 0.9265\n", | |
| "Epoch [4/10], Step [1000/12500] Loss: 0.8592\n", | |
| "Epoch [4/10], Step [2000/12500] Loss: 1.2742\n", | |
| "Epoch [4/10], Step [3000/12500] Loss: 0.5266\n", | |
| "Epoch [4/10], Step [4000/12500] Loss: 0.1456\n", | |
| "Epoch [4/10], Step [5000/12500] Loss: 1.4422\n", | |
| "Epoch [4/10], Step [6000/12500] Loss: 1.3843\n", | |
| "Epoch [4/10], Step [7000/12500] Loss: 1.0108\n", | |
| "Epoch [4/10], Step [8000/12500] Loss: 0.4158\n", | |
| "Epoch [4/10], Step [9000/12500] Loss: 1.3131\n", | |
| "Epoch [4/10], Step [10000/12500] Loss: 0.2142\n", | |
| "Epoch [4/10], Step [11000/12500] Loss: 1.2804\n", | |
| "Epoch [4/10], Step [12000/12500] Loss: 0.3383\n", | |
| "Epoch [5/10], Step [1000/12500] Loss: 0.0636\n", | |
| "Epoch [5/10], Step [2000/12500] Loss: 2.3171\n", | |
| "Epoch [5/10], Step [3000/12500] Loss: 1.5220\n", | |
| "Epoch [5/10], Step [4000/12500] Loss: 0.4020\n", | |
| "Epoch [5/10], Step [5000/12500] Loss: 0.3562\n", | |
| "Epoch [5/10], Step [6000/12500] Loss: 0.5989\n", | |
| "Epoch [5/10], Step [7000/12500] Loss: 0.4212\n", | |
| "Epoch [5/10], Step [8000/12500] Loss: 1.1807\n", | |
| "Epoch [5/10], Step [9000/12500] Loss: 0.2010\n", | |
| "Epoch [5/10], Step [10000/12500] Loss: 0.5611\n", | |
| "Epoch [5/10], Step [11000/12500] Loss: 0.6691\n", | |
| "Epoch [5/10], Step [12000/12500] Loss: 1.2104\n", | |
| "Epoch [6/10], Step [1000/12500] Loss: 1.1366\n", | |
| "Epoch [6/10], Step [2000/12500] Loss: 0.2460\n", | |
| "Epoch [6/10], Step [3000/12500] Loss: 0.5435\n", | |
| "Epoch [6/10], Step [4000/12500] Loss: 0.4072\n", | |
| "Epoch [6/10], Step [5000/12500] Loss: 0.2886\n", | |
| "Epoch [6/10], Step [6000/12500] Loss: 0.7182\n", | |
| "Epoch [6/10], Step [7000/12500] Loss: 0.0643\n", | |
| "Epoch [6/10], Step [8000/12500] Loss: 0.9365\n", | |
| "Epoch [6/10], Step [9000/12500] Loss: 0.1049\n", | |
| "Epoch [6/10], Step [10000/12500] Loss: 0.1087\n", | |
| "Epoch [6/10], Step [11000/12500] Loss: 0.6126\n", | |
| "Epoch [6/10], Step [12000/12500] Loss: 0.6975\n", | |
| "Epoch [7/10], Step [1000/12500] Loss: 0.3737\n", | |
| "Epoch [7/10], Step [2000/12500] Loss: 1.2417\n", | |
| "Epoch [7/10], Step [3000/12500] Loss: 0.0258\n", | |
| "Epoch [7/10], Step [4000/12500] Loss: 1.1269\n", | |
| "Epoch [7/10], Step [5000/12500] Loss: 0.2910\n", | |
| "Epoch [7/10], Step [6000/12500] Loss: 0.0475\n", | |
| "Epoch [7/10], Step [7000/12500] Loss: 0.6163\n", | |
| "Epoch [7/10], Step [8000/12500] Loss: 0.2173\n", | |
| "Epoch [7/10], Step [9000/12500] Loss: 0.2072\n", | |
| "Epoch [7/10], Step [10000/12500] Loss: 0.0700\n", | |
| "Epoch [7/10], Step [11000/12500] Loss: 0.0784\n", | |
| "Epoch [7/10], Step [12000/12500] Loss: 0.0942\n", | |
| "Epoch [8/10], Step [1000/12500] Loss: 0.5770\n", | |
| "Epoch [8/10], Step [2000/12500] Loss: 0.8305\n", | |
| "Epoch [8/10], Step [3000/12500] Loss: 0.1170\n", | |
| "Epoch [8/10], Step [4000/12500] Loss: 0.0587\n", | |
| "Epoch [8/10], Step [5000/12500] Loss: 0.0339\n", | |
| "Epoch [8/10], Step [6000/12500] Loss: 0.0366\n", | |
| "Epoch [8/10], Step [7000/12500] Loss: 0.1115\n", | |
| "Epoch [8/10], Step [8000/12500] Loss: 0.4153\n", | |
| "Epoch [8/10], Step [9000/12500] Loss: 0.4663\n", | |
| "Epoch [8/10], Step [10000/12500] Loss: 0.9037\n", | |
| "Epoch [8/10], Step [11000/12500] Loss: 0.3840\n", | |
| "Epoch [8/10], Step [12000/12500] Loss: 0.2206\n", | |
| "Epoch [9/10], Step [1000/12500] Loss: 0.7461\n", | |
| "Epoch [9/10], Step [2000/12500] Loss: 0.2539\n", | |
| "Epoch [9/10], Step [3000/12500] Loss: 0.1930\n", | |
| "Epoch [9/10], Step [4000/12500] Loss: 0.4961\n", | |
| "Epoch [9/10], Step [5000/12500] Loss: 0.3581\n", | |
| "Epoch [9/10], Step [6000/12500] Loss: 0.0161\n", | |
| "Epoch [9/10], Step [7000/12500] Loss: 1.0494\n", | |
| "Epoch [9/10], Step [8000/12500] Loss: 0.1826\n", | |
| "Epoch [9/10], Step [9000/12500] Loss: 0.1787\n", | |
| "Epoch [9/10], Step [10000/12500] Loss: 1.0742\n", | |
| "Epoch [9/10], Step [11000/12500] Loss: 0.1517\n", | |
| "Epoch [9/10], Step [12000/12500] Loss: 0.1589\n", | |
| "Epoch [10/10], Step [1000/12500] Loss: 0.3740\n", | |
| "Epoch [10/10], Step [2000/12500] Loss: 0.4085\n", | |
| "Epoch [10/10], Step [3000/12500] Loss: 0.9218\n", | |
| "Epoch [10/10], Step [4000/12500] Loss: 0.2560\n", | |
| "Epoch [10/10], Step [5000/12500] Loss: 0.0220\n", | |
| "Epoch [10/10], Step [6000/12500] Loss: 0.2226\n", | |
| "Epoch [10/10], Step [7000/12500] Loss: 0.1159\n", | |
| "Epoch [10/10], Step [8000/12500] Loss: 0.5933\n", | |
| "Epoch [10/10], Step [9000/12500] Loss: 0.0703\n", | |
| "Epoch [10/10], Step [10000/12500] Loss: 0.3460\n", | |
| "Epoch [10/10], Step [11000/12500] Loss: 0.0148\n", | |
| "Epoch [10/10], Step [12000/12500] Loss: 0.3395\n", | |
| "Accuracy of the model on the test images: 81.77 %\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import matplotlib.pyplot as plt\n", | |
| "\n", | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load('resnet.ckpt'))\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| "# print accuracy for each class\n", | |
| "for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| "print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "bnP527k-LURB" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Resnet with Adam optimizer, lr=0.001\n", | |
| "\n", | |
| "Accuracy for class: plane is 78.9 %\n", | |
| "\n", | |
| "Accuracy for class: car is 86.5 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 75.0 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 55.5 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 75.5 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 78.2 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 82.3 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 88.5 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 91.2 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 89.6 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 80 %" | |
| ], | |
| "metadata": { | |
| "id": "7B3b3BrwW_S9" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "path_sgd = 'resnet.sdg.ckpt'\n", | |
| "torch.save(model.state_dict(), path_sgd)" | |
| ], | |
| "metadata": { | |
| "id": "tME-a-oIV34i" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load(path_sgd))\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| "# print accuracy for each class\n", | |
| "for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| "print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "24UheCa7btjM" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Same lr, Resnet, 10 epochs, using Stochastic Gradient Descent\n", | |
| "\n", | |
| "Accuracy for class: plane is 73.2 %\n", | |
| "\n", | |
| "Accuracy for class: car is 85.7 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 60.8 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 47.6 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 65.6 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 63.4 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 82.0 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 68.7 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 85.2 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 83.2 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 71 %" | |
| ], | |
| "metadata": { | |
| "id": "eY2FvLiFcOvU" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "optimizer = torch.optim.ASGD(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "path_asgd = 'resnet.asgd.ckpt'\n", | |
| "torch.save(model.state_dict(), path_asgd)" | |
| ], | |
| "metadata": { | |
| "id": "s4WgDGBGdA2j" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load(path_asgd))\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| "# print accuracy for each class\n", | |
| "for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| "print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "3qjgMHP6fjw7" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Same lr, Resnet, 10 epochs, using Asynchronous Stochastic Gradient Descent\n", | |
| "\n", | |
| "Accuracy for class: plane is 77.8 %\n", | |
| "\n", | |
| "Accuracy for class: car is 86.7 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 58.4 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 54.8 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 69.0 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 58.5 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 77.4 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 79.2 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 83.2 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 86.2 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 73 %" | |
| ], | |
| "metadata": { | |
| "id": "TOdY2_EQzgX7" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "path_RMSprop = 'resnet.RMSprop.ckpt'\n", | |
| "torch.save(model.state_dict(), path_RMSprop)\n", | |
| "\n", | |
| "# ---------------#\n", | |
| "\n", | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load(path_RMSprop))\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| "# print accuracy for each class\n", | |
| "for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| "print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "x-37crNBj_u5" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Root Mean Squared Propagation, Resnet, lr = 0.001\n", | |
| "\n", | |
| "Accuracy of the model on the test images: 82.08 %\n", | |
| "\n", | |
| "Accuracy for class: plane is 84.3 %\n", | |
| "\n", | |
| "Accuracy for class: car is 87.6 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 69.4 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 54.5 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 76.0 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 79.3 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 87.8 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 85.7 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 88.9 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 93.7 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 80 %" | |
| ], | |
| "metadata": { | |
| "id": "LwLNevLhux1D" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "optimizer = torch.optim.Rprop(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "path_Rprop = 'resnet.Rprop.ckpt'\n", | |
| "torch.save(model.state_dict(), path_Rprop)\n", | |
| "\n", | |
| "\n", | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load(path_Rprop))\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| "# print accuracy for each class\n", | |
| "for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| "print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "0vK4hs1llmkK" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Resilient backpropagation, Resnet, lr = 0.001\n", | |
| "\n", | |
| "Accuracy of the model on the test images: 46.02 %\n", | |
| "\n", | |
| "Accuracy for class: plane is 46.6 %\n", | |
| "\n", | |
| "Accuracy for class: car is 47.5 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 24.5 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 16.2 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 17.9 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 52.6 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 44.8 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 51.9 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 53.9 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 50.5 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 40 %" | |
| ], | |
| "metadata": { | |
| "id": "xQqf1SLNyGvi" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "\n", | |
| "\n", | |
| "# Loss and optimizer\n", | |
| "criterion = nn.CrossEntropyLoss()\n", | |
| "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader_augmented)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader_augmented):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "PATH_ADAM_AUGEMENTED = 'resnet.ckpt'\n", | |
| "torch.save(model.state_dict(), PATH_ADAM_AUGEMENTED)\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "id": "Wc9LrbZQTESF", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "outputId": "cb7cce7b-2852-4ae5-9d9e-5963b9d2a38e" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Epoch [1/10], Step [1000/12500] Loss: 1.6488\n", | |
| "Epoch [1/10], Step [2000/12500] Loss: 1.5001\n", | |
| "Epoch [1/10], Step [3000/12500] Loss: 1.2900\n", | |
| "Epoch [1/10], Step [4000/12500] Loss: 2.4702\n", | |
| "Epoch [1/10], Step [5000/12500] Loss: 2.2237\n", | |
| "Epoch [1/10], Step [6000/12500] Loss: 0.8622\n", | |
| "Epoch [1/10], Step [7000/12500] Loss: 2.3030\n", | |
| "Epoch [1/10], Step [8000/12500] Loss: 1.0766\n", | |
| "Epoch [1/10], Step [9000/12500] Loss: 1.0635\n", | |
| "Epoch [1/10], Step [10000/12500] Loss: 0.7565\n", | |
| "Epoch [1/10], Step [11000/12500] Loss: 1.1626\n", | |
| "Epoch [1/10], Step [12000/12500] Loss: 1.1515\n", | |
| "Epoch [2/10], Step [1000/12500] Loss: 1.0389\n", | |
| "Epoch [2/10], Step [2000/12500] Loss: 1.2625\n", | |
| "Epoch [2/10], Step [3000/12500] Loss: 2.1380\n", | |
| "Epoch [2/10], Step [4000/12500] Loss: 1.2180\n", | |
| "Epoch [2/10], Step [5000/12500] Loss: 1.1856\n", | |
| "Epoch [2/10], Step [6000/12500] Loss: 0.9694\n", | |
| "Epoch [2/10], Step [7000/12500] Loss: 1.0996\n", | |
| "Epoch [2/10], Step [8000/12500] Loss: 1.1586\n", | |
| "Epoch [2/10], Step [9000/12500] Loss: 1.5745\n", | |
| "Epoch [2/10], Step [10000/12500] Loss: 0.9466\n", | |
| "Epoch [2/10], Step [11000/12500] Loss: 0.4195\n", | |
| "Epoch [2/10], Step [12000/12500] Loss: 1.2954\n", | |
| "Epoch [3/10], Step [1000/12500] Loss: 0.4085\n", | |
| "Epoch [3/10], Step [2000/12500] Loss: 1.4312\n", | |
| "Epoch [3/10], Step [3000/12500] Loss: 1.4415\n", | |
| "Epoch [3/10], Step [4000/12500] Loss: 0.6472\n", | |
| "Epoch [3/10], Step [5000/12500] Loss: 0.2433\n", | |
| "Epoch [3/10], Step [6000/12500] Loss: 1.1598\n", | |
| "Epoch [3/10], Step [7000/12500] Loss: 1.3151\n", | |
| "Epoch [3/10], Step [8000/12500] Loss: 0.4340\n", | |
| "Epoch [3/10], Step [9000/12500] Loss: 0.4305\n", | |
| "Epoch [3/10], Step [10000/12500] Loss: 1.0205\n", | |
| "Epoch [3/10], Step [11000/12500] Loss: 1.3396\n", | |
| "Epoch [3/10], Step [12000/12500] Loss: 0.3702\n", | |
| "Epoch [4/10], Step [1000/12500] Loss: 0.2822\n", | |
| "Epoch [4/10], Step [2000/12500] Loss: 0.5113\n", | |
| "Epoch [4/10], Step [3000/12500] Loss: 0.5215\n", | |
| "Epoch [4/10], Step [4000/12500] Loss: 1.1429\n", | |
| "Epoch [4/10], Step [5000/12500] Loss: 0.3803\n", | |
| "Epoch [4/10], Step [6000/12500] Loss: 0.3917\n", | |
| "Epoch [4/10], Step [7000/12500] Loss: 1.2291\n", | |
| "Epoch [4/10], Step [8000/12500] Loss: 0.0800\n", | |
| "Epoch [4/10], Step [9000/12500] Loss: 1.0293\n", | |
| "Epoch [4/10], Step [10000/12500] Loss: 0.1283\n", | |
| "Epoch [4/10], Step [11000/12500] Loss: 0.0295\n", | |
| "Epoch [4/10], Step [12000/12500] Loss: 0.1379\n", | |
| "Epoch [5/10], Step [1000/12500] Loss: 1.8303\n", | |
| "Epoch [5/10], Step [2000/12500] Loss: 0.5930\n", | |
| "Epoch [5/10], Step [3000/12500] Loss: 0.9088\n", | |
| "Epoch [5/10], Step [4000/12500] Loss: 0.2855\n", | |
| "Epoch [5/10], Step [5000/12500] Loss: 0.0507\n", | |
| "Epoch [5/10], Step [6000/12500] Loss: 1.1488\n", | |
| "Epoch [5/10], Step [7000/12500] Loss: 0.3918\n", | |
| "Epoch [5/10], Step [8000/12500] Loss: 1.0577\n", | |
| "Epoch [5/10], Step [9000/12500] Loss: 0.2474\n", | |
| "Epoch [5/10], Step [10000/12500] Loss: 0.3767\n", | |
| "Epoch [5/10], Step [11000/12500] Loss: 0.0803\n", | |
| "Epoch [5/10], Step [12000/12500] Loss: 0.6229\n", | |
| "Epoch [6/10], Step [1000/12500] Loss: 1.8214\n", | |
| "Epoch [6/10], Step [2000/12500] Loss: 0.2744\n", | |
| "Epoch [6/10], Step [3000/12500] Loss: 1.1929\n", | |
| "Epoch [6/10], Step [4000/12500] Loss: 0.1360\n", | |
| "Epoch [6/10], Step [5000/12500] Loss: 0.6343\n", | |
| "Epoch [6/10], Step [6000/12500] Loss: 1.9563\n", | |
| "Epoch [6/10], Step [7000/12500] Loss: 0.1162\n", | |
| "Epoch [6/10], Step [8000/12500] Loss: 0.1928\n", | |
| "Epoch [6/10], Step [9000/12500] Loss: 0.0924\n", | |
| "Epoch [6/10], Step [10000/12500] Loss: 0.1161\n", | |
| "Epoch [6/10], Step [11000/12500] Loss: 0.8213\n", | |
| "Epoch [6/10], Step [12000/12500] Loss: 0.6539\n", | |
| "Epoch [7/10], Step [1000/12500] Loss: 1.0220\n", | |
| "Epoch [7/10], Step [2000/12500] Loss: 1.1749\n", | |
| "Epoch [7/10], Step [3000/12500] Loss: 0.6238\n", | |
| "Epoch [7/10], Step [4000/12500] Loss: 0.5763\n", | |
| "Epoch [7/10], Step [5000/12500] Loss: 0.1381\n", | |
| "Epoch [7/10], Step [6000/12500] Loss: 0.3322\n", | |
| "Epoch [7/10], Step [7000/12500] Loss: 0.3666\n", | |
| "Epoch [7/10], Step [8000/12500] Loss: 0.0246\n", | |
| "Epoch [7/10], Step [9000/12500] Loss: 2.1057\n", | |
| "Epoch [7/10], Step [10000/12500] Loss: 0.4271\n", | |
| "Epoch [7/10], Step [11000/12500] Loss: 0.1509\n", | |
| "Epoch [7/10], Step [12000/12500] Loss: 0.5697\n", | |
| "Epoch [8/10], Step [1000/12500] Loss: 0.9959\n", | |
| "Epoch [8/10], Step [2000/12500] Loss: 0.4746\n", | |
| "Epoch [8/10], Step [3000/12500] Loss: 1.4254\n", | |
| "Epoch [8/10], Step [4000/12500] Loss: 0.1075\n", | |
| "Epoch [8/10], Step [5000/12500] Loss: 0.2413\n", | |
| "Epoch [8/10], Step [6000/12500] Loss: 0.8106\n", | |
| "Epoch [8/10], Step [7000/12500] Loss: 0.6026\n", | |
| "Epoch [8/10], Step [8000/12500] Loss: 2.4376\n", | |
| "Epoch [8/10], Step [9000/12500] Loss: 0.0220\n", | |
| "Epoch [8/10], Step [10000/12500] Loss: 1.1986\n", | |
| "Epoch [8/10], Step [11000/12500] Loss: 0.6351\n", | |
| "Epoch [8/10], Step [12000/12500] Loss: 0.3078\n", | |
| "Epoch [9/10], Step [1000/12500] Loss: 0.1422\n", | |
| "Epoch [9/10], Step [2000/12500] Loss: 0.1972\n", | |
| "Epoch [9/10], Step [3000/12500] Loss: 0.0723\n", | |
| "Epoch [9/10], Step [4000/12500] Loss: 0.0459\n", | |
| "Epoch [9/10], Step [5000/12500] Loss: 0.3117\n", | |
| "Epoch [9/10], Step [6000/12500] Loss: 0.0568\n", | |
| "Epoch [9/10], Step [7000/12500] Loss: 0.1940\n", | |
| "Epoch [9/10], Step [8000/12500] Loss: 0.4410\n", | |
| "Epoch [9/10], Step [9000/12500] Loss: 0.0609\n", | |
| "Epoch [9/10], Step [10000/12500] Loss: 1.0699\n", | |
| "Epoch [9/10], Step [11000/12500] Loss: 0.0101\n", | |
| "Epoch [9/10], Step [12000/12500] Loss: 0.2711\n", | |
| "Epoch [10/10], Step [1000/12500] Loss: 0.3460\n", | |
| "Epoch [10/10], Step [2000/12500] Loss: 0.2487\n", | |
| "Epoch [10/10], Step [3000/12500] Loss: 1.0143\n", | |
| "Epoch [10/10], Step [4000/12500] Loss: 0.1073\n", | |
| "Epoch [10/10], Step [5000/12500] Loss: 0.2611\n", | |
| "Epoch [10/10], Step [6000/12500] Loss: 0.4431\n", | |
| "Epoch [10/10], Step [7000/12500] Loss: 0.5115\n", | |
| "Epoch [10/10], Step [8000/12500] Loss: 0.3097\n", | |
| "Epoch [10/10], Step [9000/12500] Loss: 0.0407\n", | |
| "Epoch [10/10], Step [10000/12500] Loss: 0.1351\n", | |
| "Epoch [10/10], Step [11000/12500] Loss: 0.5124\n", | |
| "Epoch [10/10], Step [12000/12500] Loss: 1.2812\n", | |
| "Accuracy of the model on the test images: 82.46 %\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load(PATH_ADAM_AUGEMENTED))\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader_augmented:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader_augmented:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| " # print accuracy for each class\n", | |
| " for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| " print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "9EjEroZfNGZk" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "Accuracy for the model using augemented data vs non-augemented(Resnet, Adam optimizer)\n", | |
| "\n", | |
| "Accuracy for class: plane is 82.3 % vs 78.9 %\n", | |
| "\n", | |
| "Accuracy for class: car is 91.5 % vs 86.5 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 64.0 % vs 75.0 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 57.5 % vs 55.5 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 87.8 % vs 75.5 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 73.5 % vs 78.2 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 89.6 % vs 82.3 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 82.5 % vs 88.5 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 88.0 % vs 91.2 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 89.9 % vs 89.6 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 80 %(same everywhere)\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "id": "_XqgowhHMHEa" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "PATH_ADAM_AUGEMENTED = 'resnet.ckpt'\n", | |
| "model = ResNet(ResidualBlock, [2, 2, 2]).to(device)\n", | |
| "model.load_state_dict(torch.load(PATH_ADAM_AUGEMENTED))\n", | |
| "\n", | |
| "# Loss and optimizer\n", | |
| "criterion = nn.CrossEntropyLoss()\n", | |
| "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n", | |
| "\n", | |
| "# For updating learning rate\n", | |
| "def update_lr(optimizer, lr): \n", | |
| " for param_group in optimizer.param_groups:\n", | |
| " param_group['lr'] = lr\n", | |
| "\n", | |
| "# Train the model\n", | |
| "total_step = len(trainloader_augmented)\n", | |
| "curr_lr = learning_rate\n", | |
| "for epoch in range(num_epochs):\n", | |
| " for i, (images, labels) in enumerate(trainloader_augmented):\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " outputs = model(images)\n", | |
| " loss = criterion(outputs, labels)\n", | |
| "\n", | |
| " # Backward and optimize\n", | |
| " optimizer.zero_grad()\n", | |
| " loss.backward()\n", | |
| " optimizer.step()\n", | |
| "\n", | |
| " if (i+1) % 1000 == 0:\n", | |
| " print (\"Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}\"\n", | |
| " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", | |
| "\n", | |
| " # Decay learning rate\n", | |
| " if (epoch+1) % 20 == 0:\n", | |
| " curr_lr /= 3\n", | |
| " update_lr(optimizer, curr_lr)\n", | |
| "\n", | |
| "# Test the model\n", | |
| "model.eval()\n", | |
| "with torch.no_grad():\n", | |
| " correct = 0\n", | |
| " total = 0\n", | |
| " for images, labels in testloader:\n", | |
| " images = images.to(device)\n", | |
| " labels = labels.to(device)\n", | |
| " outputs = model(images)\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| " print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))\n", | |
| "\n", | |
| "# Save the model checkpoint\n", | |
| "PATH_ADAM_AUGEMENTED_TUNED = 'resnet_finetuned.ckpt'\n", | |
| "torch.save(model.state_dict(), PATH_ADAM_AUGEMENTED_TUNED)\n", | |
| "\n", | |
| "net = ResNet(ResidualBlock, [2, 2, 2])\n", | |
| "net.load_state_dict(torch.load(PATH_ADAM_AUGEMENTED_TUNED))\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "correct = 0\n", | |
| "total = 0\n", | |
| "# since we're not training, we don't need to calculate the gradients for our outputs\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader_augmented:\n", | |
| " images, labels = data\n", | |
| " # calculate outputs by running images through the network\n", | |
| " outputs = net(images)\n", | |
| " # the class with the highest energy is what we choose as prediction\n", | |
| " _, predicted = torch.max(outputs.data, 1)\n", | |
| " total += labels.size(0)\n", | |
| " correct += (predicted == labels).sum().item()\n", | |
| "\n", | |
| "# prepare to count predictions for each class\n", | |
| "correct_pred = {classname: 0 for classname in classes}\n", | |
| "total_pred = {classname: 0 for classname in classes}\n", | |
| "\n", | |
| "# again no gradients needed\n", | |
| "with torch.no_grad():\n", | |
| " for data in testloader_augmented:\n", | |
| " images, labels = data\n", | |
| " outputs = net(images)\n", | |
| " _, predictions = torch.max(outputs, 1)\n", | |
| " # collect the correct predictions for each class\n", | |
| " for label, prediction in zip(labels, predictions):\n", | |
| " if label == prediction:\n", | |
| " correct_pred[classes[label]] += 1\n", | |
| " total_pred[classes[label]] += 1\n", | |
| "\n", | |
| "\n", | |
| " # print accuracy for each class\n", | |
| " for classname, correct_count in correct_pred.items():\n", | |
| " accuracy = 100 * float(correct_count) / total_pred[classname]\n", | |
| " print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')\n", | |
| " print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')" | |
| ], | |
| "metadata": { | |
| "id": "kVX68we3Qk1y" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "The training is done on an already loaded model, I might have understood fine tunning wrong but I was expecting better results. \n", | |
| "\n", | |
| "Accuracy for class: plane is 86.4 %\n", | |
| "\n", | |
| "Accuracy for class: car is 92.1 %\n", | |
| "\n", | |
| "Accuracy for class: bird is 68.7 %\n", | |
| "\n", | |
| "Accuracy for class: cat is 64.3 %\n", | |
| "\n", | |
| "Accuracy for class: deer is 78.9 %\n", | |
| "\n", | |
| "Accuracy for class: dog is 69.5 %\n", | |
| "\n", | |
| "Accuracy for class: frog is 73.2 %\n", | |
| "\n", | |
| "Accuracy for class: horse is 91.7 %\n", | |
| "\n", | |
| "Accuracy for class: ship is 89.3 %\n", | |
| "\n", | |
| "Accuracy for class: truck is 85.6 %\n", | |
| "\n", | |
| "Accuracy of the network on the 10000 test images: 79 %" | |
| ], | |
| "metadata": { | |
| "id": "JcLYHTwnq0rf" | |
| } | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment