Created
May 26, 2019 16:41
-
-
Save mde-2590/52bdd7015ff5194ea192e509b92da2d6 to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import matplotlib.pylab as plt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "filename = \"https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/auto.csv\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "headers = [\"symboling\",\"normalized-losses\",\"make\",\"fuel-type\",\"aspiration\", \"num-of-doors\",\"body-style\",\n", | |
| " \"drive-wheels\",\"engine-location\",\"wheel-base\", \"length\",\"width\",\"height\",\"curb-weight\",\"engine-type\",\n", | |
| " \"num-of-cylinders\", \"engine-size\",\"fuel-system\",\"bore\",\"stroke\",\"compression-ratio\",\"horsepower\",\n", | |
| " \"peak-rpm\",\"city-mpg\",\"highway-mpg\",\"price\"]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.read_csv(filename, names = headers)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>engine-size</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>?</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>13495</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>?</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>16500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>?</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>...</td>\n", | |
| " <td>152</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26</td>\n", | |
| " <td>16500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>...</td>\n", | |
| " <td>109</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30</td>\n", | |
| " <td>13950</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>...</td>\n", | |
| " <td>136</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22</td>\n", | |
| " <td>17450</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 26 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration num-of-doors \\\n", | |
| "0 3 ? alfa-romero gas std two \n", | |
| "1 3 ? alfa-romero gas std two \n", | |
| "2 1 ? alfa-romero gas std two \n", | |
| "3 2 164 audi gas std four \n", | |
| "4 2 164 audi gas std four \n", | |
| "\n", | |
| " body-style drive-wheels engine-location wheel-base ... engine-size \\\n", | |
| "0 convertible rwd front 88.6 ... 130 \n", | |
| "1 convertible rwd front 88.6 ... 130 \n", | |
| "2 hatchback rwd front 94.5 ... 152 \n", | |
| "3 sedan fwd front 99.8 ... 109 \n", | |
| "4 sedan 4wd front 99.4 ... 136 \n", | |
| "\n", | |
| " fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n", | |
| "0 mpfi 3.47 2.68 9.0 111 5000 21 \n", | |
| "1 mpfi 3.47 2.68 9.0 111 5000 21 \n", | |
| "2 mpfi 2.68 3.47 9.0 154 5000 19 \n", | |
| "3 mpfi 3.19 3.40 10.0 102 5500 24 \n", | |
| "4 mpfi 3.19 3.40 8.0 115 5500 18 \n", | |
| "\n", | |
| " highway-mpg price \n", | |
| "0 27 13495 \n", | |
| "1 27 16500 \n", | |
| "2 26 16500 \n", | |
| "3 30 13950 \n", | |
| "4 22 17450 \n", | |
| "\n", | |
| "[5 rows x 26 columns]" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# To see what the data set looks like, we'll use the head() method.\n", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>engine-size</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>13495</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>16500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>...</td>\n", | |
| " <td>152</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26</td>\n", | |
| " <td>16500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>...</td>\n", | |
| " <td>109</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30</td>\n", | |
| " <td>13950</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>...</td>\n", | |
| " <td>136</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22</td>\n", | |
| " <td>17450</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 26 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration num-of-doors \\\n", | |
| "0 3 NaN alfa-romero gas std two \n", | |
| "1 3 NaN alfa-romero gas std two \n", | |
| "2 1 NaN alfa-romero gas std two \n", | |
| "3 2 164 audi gas std four \n", | |
| "4 2 164 audi gas std four \n", | |
| "\n", | |
| " body-style drive-wheels engine-location wheel-base ... engine-size \\\n", | |
| "0 convertible rwd front 88.6 ... 130 \n", | |
| "1 convertible rwd front 88.6 ... 130 \n", | |
| "2 hatchback rwd front 94.5 ... 152 \n", | |
| "3 sedan fwd front 99.8 ... 109 \n", | |
| "4 sedan 4wd front 99.4 ... 136 \n", | |
| "\n", | |
| " fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n", | |
| "0 mpfi 3.47 2.68 9.0 111 5000 21 \n", | |
| "1 mpfi 3.47 2.68 9.0 111 5000 21 \n", | |
| "2 mpfi 2.68 3.47 9.0 154 5000 19 \n", | |
| "3 mpfi 3.19 3.40 10.0 102 5500 24 \n", | |
| "4 mpfi 3.19 3.40 8.0 115 5500 18 \n", | |
| "\n", | |
| " highway-mpg price \n", | |
| "0 27 13495 \n", | |
| "1 27 16500 \n", | |
| "2 26 16500 \n", | |
| "3 30 13950 \n", | |
| "4 22 17450 \n", | |
| "\n", | |
| "[5 rows x 26 columns]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import numpy as np\n", | |
| "\n", | |
| "# replace \"?\" to NaN\n", | |
| "df.replace(\"?\", np.nan, inplace = True)\n", | |
| "df.head(5)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>engine-size</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>...</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>...</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>...</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>...</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>...</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 26 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration num-of-doors \\\n", | |
| "0 False True False False False False \n", | |
| "1 False True False False False False \n", | |
| "2 False True False False False False \n", | |
| "3 False False False False False False \n", | |
| "4 False False False False False False \n", | |
| "\n", | |
| " body-style drive-wheels engine-location wheel-base ... engine-size \\\n", | |
| "0 False False False False ... False \n", | |
| "1 False False False False ... False \n", | |
| "2 False False False False ... False \n", | |
| "3 False False False False ... False \n", | |
| "4 False False False False ... False \n", | |
| "\n", | |
| " fuel-system bore stroke compression-ratio horsepower peak-rpm \\\n", | |
| "0 False False False False False False \n", | |
| "1 False False False False False False \n", | |
| "2 False False False False False False \n", | |
| "3 False False False False False False \n", | |
| "4 False False False False False False \n", | |
| "\n", | |
| " city-mpg highway-mpg price \n", | |
| "0 False False False \n", | |
| "1 False False False \n", | |
| "2 False False False \n", | |
| "3 False False False \n", | |
| "4 False False False \n", | |
| "\n", | |
| "[5 rows x 26 columns]" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "missing_data = df.isnull()\n", | |
| "missing_data.head(5)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "symboling\n", | |
| "False 205\n", | |
| "Name: symboling, dtype: int64\n", | |
| "\n", | |
| "normalized-losses\n", | |
| "False 164\n", | |
| "True 41\n", | |
| "Name: normalized-losses, dtype: int64\n", | |
| "\n", | |
| "make\n", | |
| "False 205\n", | |
| "Name: make, dtype: int64\n", | |
| "\n", | |
| "fuel-type\n", | |
| "False 205\n", | |
| "Name: fuel-type, dtype: int64\n", | |
| "\n", | |
| "aspiration\n", | |
| "False 205\n", | |
| "Name: aspiration, dtype: int64\n", | |
| "\n", | |
| "num-of-doors\n", | |
| "False 203\n", | |
| "True 2\n", | |
| "Name: num-of-doors, dtype: int64\n", | |
| "\n", | |
| "body-style\n", | |
| "False 205\n", | |
| "Name: body-style, dtype: int64\n", | |
| "\n", | |
| "drive-wheels\n", | |
| "False 205\n", | |
| "Name: drive-wheels, dtype: int64\n", | |
| "\n", | |
| "engine-location\n", | |
| "False 205\n", | |
| "Name: engine-location, dtype: int64\n", | |
| "\n", | |
| "wheel-base\n", | |
| "False 205\n", | |
| "Name: wheel-base, dtype: int64\n", | |
| "\n", | |
| "length\n", | |
| "False 205\n", | |
| "Name: length, dtype: int64\n", | |
| "\n", | |
| "width\n", | |
| "False 205\n", | |
| "Name: width, dtype: int64\n", | |
| "\n", | |
| "height\n", | |
| "False 205\n", | |
| "Name: height, dtype: int64\n", | |
| "\n", | |
| "curb-weight\n", | |
| "False 205\n", | |
| "Name: curb-weight, dtype: int64\n", | |
| "\n", | |
| "engine-type\n", | |
| "False 205\n", | |
| "Name: engine-type, dtype: int64\n", | |
| "\n", | |
| "num-of-cylinders\n", | |
| "False 205\n", | |
| "Name: num-of-cylinders, dtype: int64\n", | |
| "\n", | |
| "engine-size\n", | |
| "False 205\n", | |
| "Name: engine-size, dtype: int64\n", | |
| "\n", | |
| "fuel-system\n", | |
| "False 205\n", | |
| "Name: fuel-system, dtype: int64\n", | |
| "\n", | |
| "bore\n", | |
| "False 201\n", | |
| "True 4\n", | |
| "Name: bore, dtype: int64\n", | |
| "\n", | |
| "stroke\n", | |
| "False 201\n", | |
| "True 4\n", | |
| "Name: stroke, dtype: int64\n", | |
| "\n", | |
| "compression-ratio\n", | |
| "False 205\n", | |
| "Name: compression-ratio, dtype: int64\n", | |
| "\n", | |
| "horsepower\n", | |
| "False 203\n", | |
| "True 2\n", | |
| "Name: horsepower, dtype: int64\n", | |
| "\n", | |
| "peak-rpm\n", | |
| "False 203\n", | |
| "True 2\n", | |
| "Name: peak-rpm, dtype: int64\n", | |
| "\n", | |
| "city-mpg\n", | |
| "False 205\n", | |
| "Name: city-mpg, dtype: int64\n", | |
| "\n", | |
| "highway-mpg\n", | |
| "False 205\n", | |
| "Name: highway-mpg, dtype: int64\n", | |
| "\n", | |
| "price\n", | |
| "False 201\n", | |
| "True 4\n", | |
| "Name: price, dtype: int64\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for column in missing_data.columns.values.tolist():\n", | |
| " print(column)\n", | |
| " print (missing_data[column].value_counts())\n", | |
| " print(\"\") " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average of normalized-losses: 122.0\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "avg_norm_loss = df[\"normalized-losses\"].astype(\"float\").mean(axis=0)\n", | |
| "print(\"Average of normalized-losses:\", avg_norm_loss)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df[\"normalized-losses\"].replace(np.nan, avg_norm_loss, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average of bore: 3.3297512437810943\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "avg_bore=df['bore'].astype('float').mean(axis=0)\n", | |
| "print(\"Average of bore:\", avg_bore)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df[\"bore\"].replace(np.nan, avg_bore, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Question #1: \n", | |
| "According to the example above, replace NaN in \"stroke\" column by mean." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average of stroke: 3.255422885572139\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# calculate the mean vaule for \"stroke\" column\n", | |
| "avg_stroke = df[\"stroke\"].astype(\"float\").mean(axis = 0)\n", | |
| "print(\"Average of stroke:\", avg_stroke)\n", | |
| "\n", | |
| "# replace NaN by mean value in \"stroke\" column\n", | |
| "df[\"stroke\"].replace(np.nan, avg_stroke, inplace = True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average horsepower: 104.25615763546797\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Calculate the mean value for the 'horsepower' column:\n", | |
| " \n", | |
| "avg_horsepower = df['horsepower'].astype('float').mean(axis=0)\n", | |
| "print(\"Average horsepower:\", avg_horsepower)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Replace \"NaN\" by mean value:\n", | |
| "\n", | |
| "df['horsepower'].replace(np.nan, avg_horsepower, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average peak rpm: 5125.369458128079\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Calculate the mean value for 'peak-rpm' column:\n", | |
| "\n", | |
| "avg_peakrpm=df['peak-rpm'].astype('float').mean(axis=0)\n", | |
| "print(\"Average peak rpm:\", avg_peakrpm)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Replace NaN by mean value:\n", | |
| "\n", | |
| "df['peak-rpm'].replace(np.nan, avg_peakrpm, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "four 114\n", | |
| "two 89\n", | |
| "Name: num-of-doors, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#To see which values are present in a particular column, we can use the \".value_counts()\" method:\n", | |
| "\n", | |
| "df['num-of-doors'].value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'four'" | |
| ] | |
| }, | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['num-of-doors'].value_counts().idxmax()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#replace the missing 'num-of-doors' values by the most frequent \n", | |
| "\n", | |
| "df[\"num-of-doors\"].replace(np.nan, \"four\", inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# simply drop whole row with NaN in \"price\" column\n", | |
| "\n", | |
| "df.dropna(subset=[\"price\"], axis=0, inplace=True)\n", | |
| "\n", | |
| "# reset index, because we droped two rows\n", | |
| "\n", | |
| "df.reset_index(drop=True, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>engine-size</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>13495</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>16500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>...</td>\n", | |
| " <td>152</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26</td>\n", | |
| " <td>16500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>...</td>\n", | |
| " <td>109</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30</td>\n", | |
| " <td>13950</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>...</td>\n", | |
| " <td>136</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22</td>\n", | |
| " <td>17450</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 26 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration num-of-doors \\\n", | |
| "0 3 122 alfa-romero gas std two \n", | |
| "1 3 122 alfa-romero gas std two \n", | |
| "2 1 122 alfa-romero gas std two \n", | |
| "3 2 164 audi gas std four \n", | |
| "4 2 164 audi gas std four \n", | |
| "\n", | |
| " body-style drive-wheels engine-location wheel-base ... engine-size \\\n", | |
| "0 convertible rwd front 88.6 ... 130 \n", | |
| "1 convertible rwd front 88.6 ... 130 \n", | |
| "2 hatchback rwd front 94.5 ... 152 \n", | |
| "3 sedan fwd front 99.8 ... 109 \n", | |
| "4 sedan 4wd front 99.4 ... 136 \n", | |
| "\n", | |
| " fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n", | |
| "0 mpfi 3.47 2.68 9.0 111 5000 21 \n", | |
| "1 mpfi 3.47 2.68 9.0 111 5000 21 \n", | |
| "2 mpfi 2.68 3.47 9.0 154 5000 19 \n", | |
| "3 mpfi 3.19 3.40 10.0 102 5500 24 \n", | |
| "4 mpfi 3.19 3.40 8.0 115 5500 18 \n", | |
| "\n", | |
| " highway-mpg price \n", | |
| "0 27 13495 \n", | |
| "1 27 16500 \n", | |
| "2 26 16500 \n", | |
| "3 30 13950 \n", | |
| "4 22 17450 \n", | |
| "\n", | |
| "[5 rows x 26 columns]" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "symboling int64\n", | |
| "normalized-losses object\n", | |
| "make object\n", | |
| "fuel-type object\n", | |
| "aspiration object\n", | |
| "num-of-doors object\n", | |
| "body-style object\n", | |
| "drive-wheels object\n", | |
| "engine-location object\n", | |
| "wheel-base float64\n", | |
| "length float64\n", | |
| "width float64\n", | |
| "height float64\n", | |
| "curb-weight int64\n", | |
| "engine-type object\n", | |
| "num-of-cylinders object\n", | |
| "engine-size int64\n", | |
| "fuel-system object\n", | |
| "bore object\n", | |
| "stroke object\n", | |
| "compression-ratio float64\n", | |
| "horsepower object\n", | |
| "peak-rpm object\n", | |
| "city-mpg int64\n", | |
| "highway-mpg int64\n", | |
| "price object\n", | |
| "dtype: object" | |
| ] | |
| }, | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#data types for each column\n", | |
| "\n", | |
| "df.dtypes" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Convert data types to proper format\n", | |
| "\n", | |
| "df[[\"bore\", \"stroke\"]] = df[[\"bore\", \"stroke\"]].astype(\"float\")\n", | |
| "df[[\"normalized-losses\"]] = df[[\"normalized-losses\"]].astype(\"int\")\n", | |
| "df[[\"price\"]] = df[[\"price\"]].astype(\"float\")\n", | |
| "df[[\"peak-rpm\"]] = df[[\"peak-rpm\"]].astype(\"float\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "symboling int64\n", | |
| "normalized-losses int64\n", | |
| "make object\n", | |
| "fuel-type object\n", | |
| "aspiration object\n", | |
| "num-of-doors object\n", | |
| "body-style object\n", | |
| "drive-wheels object\n", | |
| "engine-location object\n", | |
| "wheel-base float64\n", | |
| "length float64\n", | |
| "width float64\n", | |
| "height float64\n", | |
| "curb-weight int64\n", | |
| "engine-type object\n", | |
| "num-of-cylinders object\n", | |
| "engine-size int64\n", | |
| "fuel-system object\n", | |
| "bore float64\n", | |
| "stroke float64\n", | |
| "compression-ratio float64\n", | |
| "horsepower object\n", | |
| "peak-rpm float64\n", | |
| "city-mpg int64\n", | |
| "highway-mpg int64\n", | |
| "price float64\n", | |
| "dtype: object" | |
| ] | |
| }, | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#List the columns after the conversion\n", | |
| "\n", | |
| "df.dtypes" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>engine-size</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>13495.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>130</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>16500.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>...</td>\n", | |
| " <td>152</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26</td>\n", | |
| " <td>16500.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>...</td>\n", | |
| " <td>109</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30</td>\n", | |
| " <td>13950.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>...</td>\n", | |
| " <td>136</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22</td>\n", | |
| " <td>17450.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 26 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration \\\n", | |
| "0 3 122 alfa-romero gas std \n", | |
| "1 3 122 alfa-romero gas std \n", | |
| "2 1 122 alfa-romero gas std \n", | |
| "3 2 164 audi gas std \n", | |
| "4 2 164 audi gas std \n", | |
| "\n", | |
| " num-of-doors body-style drive-wheels engine-location wheel-base ... \\\n", | |
| "0 two convertible rwd front 88.6 ... \n", | |
| "1 two convertible rwd front 88.6 ... \n", | |
| "2 two hatchback rwd front 94.5 ... \n", | |
| "3 four sedan fwd front 99.8 ... \n", | |
| "4 four sedan 4wd front 99.4 ... \n", | |
| "\n", | |
| " engine-size fuel-system bore stroke compression-ratio horsepower \\\n", | |
| "0 130 mpfi 3.47 2.68 9.0 111 \n", | |
| "1 130 mpfi 3.47 2.68 9.0 111 \n", | |
| "2 152 mpfi 2.68 3.47 9.0 154 \n", | |
| "3 109 mpfi 3.19 3.40 10.0 102 \n", | |
| "4 136 mpfi 3.19 3.40 8.0 115 \n", | |
| "\n", | |
| " peak-rpm city-mpg highway-mpg price \n", | |
| "0 5000.0 21 27 13495.0 \n", | |
| "1 5000.0 21 27 16500.0 \n", | |
| "2 5000.0 19 26 16500.0 \n", | |
| "3 5500.0 24 30 13950.0 \n", | |
| "4 5500.0 18 22 17450.0 \n", | |
| "\n", | |
| "[5 rows x 26 columns]" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Convert mpg to L/100km by mathematical operation (235 divided by mpg)\n", | |
| "df['city-L/100km'] = 235/df[\"city-mpg\"]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " <th>city-L/100km</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>13495.0</td>\n", | |
| " <td>11.190476</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27</td>\n", | |
| " <td>16500.0</td>\n", | |
| " <td>11.190476</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26</td>\n", | |
| " <td>16500.0</td>\n", | |
| " <td>12.368421</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30</td>\n", | |
| " <td>13950.0</td>\n", | |
| " <td>9.791667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22</td>\n", | |
| " <td>17450.0</td>\n", | |
| " <td>13.055556</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 27 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration \\\n", | |
| "0 3 122 alfa-romero gas std \n", | |
| "1 3 122 alfa-romero gas std \n", | |
| "2 1 122 alfa-romero gas std \n", | |
| "3 2 164 audi gas std \n", | |
| "4 2 164 audi gas std \n", | |
| "\n", | |
| " num-of-doors body-style drive-wheels engine-location wheel-base ... \\\n", | |
| "0 two convertible rwd front 88.6 ... \n", | |
| "1 two convertible rwd front 88.6 ... \n", | |
| "2 two hatchback rwd front 94.5 ... \n", | |
| "3 four sedan fwd front 99.8 ... \n", | |
| "4 four sedan 4wd front 99.4 ... \n", | |
| "\n", | |
| " fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n", | |
| "0 mpfi 3.47 2.68 9.0 111 5000.0 21 \n", | |
| "1 mpfi 3.47 2.68 9.0 111 5000.0 21 \n", | |
| "2 mpfi 2.68 3.47 9.0 154 5000.0 19 \n", | |
| "3 mpfi 3.19 3.40 10.0 102 5500.0 24 \n", | |
| "4 mpfi 3.19 3.40 8.0 115 5500.0 18 \n", | |
| "\n", | |
| " highway-mpg price city-L/100km \n", | |
| "0 27 13495.0 11.190476 \n", | |
| "1 27 16500.0 11.190476 \n", | |
| "2 26 16500.0 12.368421 \n", | |
| "3 30 13950.0 9.791667 \n", | |
| "4 22 17450.0 13.055556 \n", | |
| "\n", | |
| "[5 rows x 27 columns]" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# check your transformed data \n", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Question #2: \n", | |
| "According to the example above, transform mpg to L/100km in the column of \"highway-mpg\", and change the name of column to \"highway-L/100km\"." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# transform mpg to L/100km by mathematical operation (235 divided by mpg)\n", | |
| "df[\"highway-mpg\"] = 235/df[\"highway-mpg\"]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# rename column name from \"highway-mpg\" to \"highway-L/100km\"\n", | |
| "df.rename(columns={'\"highway-mpg\"':'highway-L/100km'}, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>fuel-type</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>...</th>\n", | |
| " <th>fuel-system</th>\n", | |
| " <th>bore</th>\n", | |
| " <th>stroke</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " <th>city-L/100km</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27.0</td>\n", | |
| " <td>13495.0</td>\n", | |
| " <td>11.190476</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27.0</td>\n", | |
| " <td>16500.0</td>\n", | |
| " <td>11.190476</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>2.68</td>\n", | |
| " <td>3.47</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26.0</td>\n", | |
| " <td>16500.0</td>\n", | |
| " <td>12.368421</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30.0</td>\n", | |
| " <td>13950.0</td>\n", | |
| " <td>9.791667</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>gas</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>...</td>\n", | |
| " <td>mpfi</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>3.40</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22.0</td>\n", | |
| " <td>17450.0</td>\n", | |
| " <td>13.055556</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 27 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make fuel-type aspiration \\\n", | |
| "0 3 122 alfa-romero gas std \n", | |
| "1 3 122 alfa-romero gas std \n", | |
| "2 1 122 alfa-romero gas std \n", | |
| "3 2 164 audi gas std \n", | |
| "4 2 164 audi gas std \n", | |
| "\n", | |
| " num-of-doors body-style drive-wheels engine-location wheel-base ... \\\n", | |
| "0 two convertible rwd front 88.6 ... \n", | |
| "1 two convertible rwd front 88.6 ... \n", | |
| "2 two hatchback rwd front 94.5 ... \n", | |
| "3 four sedan fwd front 99.8 ... \n", | |
| "4 four sedan 4wd front 99.4 ... \n", | |
| "\n", | |
| " fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n", | |
| "0 mpfi 3.47 2.68 9.0 111 5000.0 21 \n", | |
| "1 mpfi 3.47 2.68 9.0 111 5000.0 21 \n", | |
| "2 mpfi 2.68 3.47 9.0 154 5000.0 19 \n", | |
| "3 mpfi 3.19 3.40 10.0 102 5500.0 24 \n", | |
| "4 mpfi 3.19 3.40 8.0 115 5500.0 18 \n", | |
| "\n", | |
| " highway-mpg price city-L/100km \n", | |
| "0 27.0 13495.0 11.190476 \n", | |
| "1 27.0 16500.0 11.190476 \n", | |
| "2 26.0 16500.0 12.368421 \n", | |
| "3 30.0 13950.0 9.791667 \n", | |
| "4 22.0 17450.0 13.055556 \n", | |
| "\n", | |
| "[5 rows x 27 columns]" | |
| ] | |
| }, | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# check your transformed data \n", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# replace (original value) by (original value)/(maximum value)\n", | |
| "df['length'] = df['length']/df['length'].max()\n", | |
| "df['width'] = df['width']/df['width'].max()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Questiont #3: \n", | |
| "According to the example above, normalize the column \"height\"." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>length</th>\n", | |
| " <th>width</th>\n", | |
| " <th>height</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.811148</td>\n", | |
| " <td>0.890278</td>\n", | |
| " <td>0.816054</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.811148</td>\n", | |
| " <td>0.890278</td>\n", | |
| " <td>0.816054</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.822681</td>\n", | |
| " <td>0.909722</td>\n", | |
| " <td>0.876254</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.848630</td>\n", | |
| " <td>0.919444</td>\n", | |
| " <td>0.908027</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.848630</td>\n", | |
| " <td>0.922222</td>\n", | |
| " <td>0.908027</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " length width height\n", | |
| "0 0.811148 0.890278 0.816054\n", | |
| "1 0.811148 0.890278 0.816054\n", | |
| "2 0.822681 0.909722 0.876254\n", | |
| "3 0.848630 0.919444 0.908027\n", | |
| "4 0.848630 0.922222 0.908027" | |
| ] | |
| }, | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['height'] = df['height']/df['height'].max() \n", | |
| "# show the scaled columns\n", | |
| "df[[\"length\",\"width\",\"height\"]].head()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Binning Data In Pandas" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df[\"horsepower\"]=df[\"horsepower\"].astype(int, copy=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Plot the histogram of horspower, to see what the distribution of horsepower looks like.\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Text(0.5, 1.0, 'horsepower bins')" | |
| ] | |
| }, | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFV9JREFUeJzt3X+0ZWV93/H3hwHEH8CADIhgHKTEJVqDOkGoVSkm/gCbocafS83UkKBdanRFE8ekSU2rKRijUeuqCwthTFQgGoTGVqUogyaKDooIEgPioPyQGRAVbauC3/6xnynH8d65F5x9ztz7vF9rnXX2z7O/95kz53Oevc95TqoKSVK/dpt1AZKk2TIIJKlzBoEkdc4gkKTOGQSS1DmDQJI6ZxBoZpJsTvIrs65jV5fkuCQ37GD9u5P80TRr0vKy+6wLkPTzqaqXzboGLW32CLTkJVk2b2iW09+ipcMg0KwdleSKJN9Nck6SvbatSPLbSa5N8u0kFyR58MS6SvLyJNcA12TwtiRb2mNdkeRRbdv7JHlLkm8kuaWdSrlvW3dckhuS/EGSW9vpqhdOHGffJO9NsjXJ9Un+fZLd2rrrkzyuTb+o1XRkm/+tJB9u07slWZ/ka0luS3Jukv3butVtv5OTfAP4xHwNtYMaz0ryxu3+nte0trg5yUsmtj0hyVeS3JHkxiSv/Xn+8bQ8GASatecCTwcOAx4N/FuAJMcD/7mtPxi4Hjh7u31PAh4PHAk8FXgS8IvASuB5wG1tu9Pa8qOAfwYcAvzxxOM8CDigLV8HnJ7k4W3dO4F9gYcBTwZ+A9j2wroROK5NPwm4rm2zbX5jm/6dVuuTgQcDtwPv2u5veTLwCOBpc7TRQjXOte2+bduTgXcl2a+tOwN4aVXtDTyKHQSPOlJV3rzN5AZsBl40Mf9m4N1t+gzgzRPrHgD8GFjd5gs4fmL98cA/AccAu00sD/AD4PCJZccCX2/TxwF3AvefWH8u8EfACuCHwJET614KXNymTwYuaNNXA78FnN3mrwceO7HuKROPcXD7W3YHVre/5WE7aKd5a2zTZwFvnNj2/wC7T2y7BTimTX+j/Q37zPrf39uuc7NHoFn71sT0/2Z4wYfhnfP121ZU1fcZ3uEfMrH9NyfWfwL4LwzvtG9JcnqSfYBVwP2Ay5J8J8l3gI+25dvcXlU/mJi/vh3/AGDPyTra9LYaNgJPTPIghtA4B3hCktUM78gvb9s9FDhv4vhXA3cBB831t8xjvhrncltV3TkxP9muvw6cAFyfZGOSYxc4rjpgEGhXdRPDCygASe4PPBC4cWKbnxo6t6reUVWPAx7JcCro94BbGd4hP7KqVrbbvlX1gIld92uPv80vtOPfyvDO/aHbrbuxHe9ahhfZ3wEuqao7GILtFODTVfWTts83gWdMHH9lVe1VVfP+LXOYr8Z7pKo+X1VrgQOBDzP0LNQ5g0C7qvcDL0lyVJL7AH8KXFpVm+faOMkvJ3l8kj0YTgX9X+Cu9mL8HuBtSQ5s2x6SZPtz8X+SZM8kTwSeCfxNVd3F8EL5piR7J3ko8LvAX0/stxF4BXdfD7h4u3mAd7fHeGg7/qoka+9Fm/xMjfdk57bvC5PsW1U/Br7H0DNR5wwC7ZKq6iKG8/QfAm4GDgeev4Nd9mF4wb+d4bTJbcBb2rrXAdcCn03yPeB/AZMXWr/V9rsJeB/wsqr6x7bulQzBch3waYaAOnNi343A3sAl88wDvB24APh4kjuAzzJc5L4ndlTjPfFiYHNrh5cBL7oXj6FlJlX+MI36leQ44K+r6tBZ1yLNij0CSeqcQSBJnfPUkCR1btRxTZJsBu5g+GTCnVW1pn21/hyGL9JsBp5bVbePWYckaX6j9ghaEKypqlsnlr0Z+HZVnZpkPbBfVb1uR49zwAEH1OrVq0erU5KWo8suu+zWqlq10HazGOlwLXePz7KB4XPXOwyC1atXs2nTpnGrkqRlJsn1C281/sXiYvjs9GVJTmnLDqqqmwHa/YFz7ZjklCSbkmzaunXryGVKUr/G7hE8oapuat/ovDDJor8AU1WnA6cDrFmzxivakjSSUXsEVXVTu98CnAcczTAg2MEA7X7LmDVIknZstCBIcv8ke2+bZhgv/kqGr9qva5utA84fqwZJ0sLGPDV0EMPQu9uO8/6q+miSzwPnJjmZYWz054xYgyRpAaMFQVVdB/zSHMtvA54y1nElSfeMQ0xIUucMAknqnEEgSZ2bxTeLNbLV6z8yk+NuPvXEmRxX0s/HHoEkdc4gkKTOGQSS1DmDQJI6ZxBIUucMAknqnEEgSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOuegcyOZ1cBvknRP2SOQpM4ZBJLUOYNAkjq37K8ReK5eknbMHoEkdc4gkKTOGQSS1DmDQJI6ZxBIUucMAknqnEEgSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOmcQSFLnRg+CJCuSfDHJ37X5w5JcmuSaJOck2XPsGiRJ85tGj+BVwNUT86cBb6uqI4DbgZOnUIMkaR6jBkGSQ4ETgf/W5gMcD3ywbbIBOGnMGiRJOzZ2j+AvgN8HftLmHwh8p6rubPM3AIfMtWOSU5JsSrJp69atI5cpSf0aLQiSPBPYUlWXTS6eY9Oaa/+qOr2q1lTVmlWrVo1SoyRp3J+qfALwa0lOAPYC9mHoIaxMsnvrFRwK3DRiDZKkBYzWI6iq11fVoVW1Gng+8ImqeiHwSeDZbbN1wPlj1SBJWtgsvkfwOuB3k1zLcM3gjBnUIElqxjw19P9V1cXAxW36OuDoaRxXkrQwv1ksSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOmcQSFLnDAJJ6pxBIEmdMwgkqXMGgSR1ziCQpM4ZBJLUOYNAkjpnEEhS5wwCSeqcQSBJnTMIJKlzBoEkdc4gkKTOGQSS1DmDQJI6ZxBIUucMAknqnEEgSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOmcQSFLnDAJJ6pxBIEmdMwgkqXOjBUGSvZJ8LsmXklyV5E/a8sOSXJrkmiTnJNlzrBokSQsbs0fwQ+D4qvol4Cjg6UmOAU4D3lZVRwC3AyePWIMkaQGjBUENvt9m92i3Ao4HPtiWbwBOGqsGSdLCRr1GkGRFksuBLcCFwNeA71TVnW2TG4BD5tn3lCSbkmzaunXrmGVKUtdGDYKququqjgIOBY4GHjHXZvPse3pVramqNatWrRqzTEnq2lQ+NVRV3wEuBo4BVibZva06FLhpGjVIkuY25qeGViVZ2abvC/wKcDXwSeDZbbN1wPlj1SBJWtjuC29yrx0MbEiygiFwzq2qv0vyFeDsJG8EvgicMWINkqQFjBYEVXUF8Jg5ll/HcL1AkrQL8JvFktQ5g0CSOmcQSFLnDAJJ6tyigiDJRYtZJklaenb4qaEkewH3Aw5Ish+Qtmof4MEj1yZJmoKFPj76UuDVDC/6l3F3EHwPeNeIdUmSpmSHQVBVbwfenuSVVfXOKdUkSZqiRX2hrKremeRfAKsn96mq945UlyRpShYVBEn+CjgcuBy4qy0uwCCQpCVusUNMrAGOrKo5h4yWJC1di/0ewZXAg8YsRJI0G4vtERwAfCXJ5xh+ixiAqvq1UaqSJE3NYoPgDWMWIUmancV+amjj2IVIkmZjsZ8auoO7f1t4T2AP4AdVtc9YhUmSpmOxPYK9J+eTnIQ/LiNJy8K9Gn20qj4MHL+Ta5EkzcBiTw09a2J2N4bvFfidAklaBhb7qaF/PTF9J7AZWLvTq5EkTd1irxG8ZOxCtPStXv+RmR1786knzuzY0lK32B+mOTTJeUm2JLklyYeSHDp2cZKk8S32YvFfAhcw/C7BIcB/b8skSUvcYoNgVVX9ZVXd2W5nAatGrEuSNCWLDYJbk7woyYp2exFw25iFSZKmY7FB8JvAc4FvATcDzwa8gCxJy8BiPz76n4B1VXU7QJL9gbcwBIQkaQlbbI/g0dtCAKCqvg08ZpySJEnTtNgg2C3JfttmWo9gsb0JSdIubLEv5n8O/EOSDzIMLfFc4E2jVSVJmprFfrP4vUk2MQw0F+BZVfWVUSuTJE3Fok/vtBd+X/wlaZm5V8NQS5KWD4NAkjo3WhAkeUiSTya5OslVSV7Vlu+f5MIk17T7/RZ6LEnSeMbsEdwJvKaqHgEcA7w8yZHAeuCiqjoCuKjNS5JmZLQgqKqbq+oLbfoO4GqGkUvXAhvaZhuAk8aqQZK0sKlcI0iymuGbyJcCB1XVzTCEBXDgNGqQJM1t9CBI8gDgQ8Crq+p792C/U5JsSrJp69at4xUoSZ0bNQiS7MEQAu+rqr9ti29JcnBbfzCwZa59q+r0qlpTVWtWrfKnDyRpLGN+aijAGcDVVfXWiVUXAOva9Drg/LFqkCQtbMyB454AvBj4cpLL27I/AE4Fzk1yMvAN4Dkj1iBJWsBoQVBVn2YYl2guTxnruJKke8ZvFktS5wwCSeqcQSBJnTMIJKlzBoEkdc4gkKTOGQSS1DmDQJI6ZxBIUucMAknqnEEgSZ0zCCSpc2OOPiote6vXf2Rmx9586okzO7aWF3sEktQ5g0CSOmcQSFLnDAJJ6pxBIEmdMwgkqXMGgSR1zu8RaFmY5ef5paXOHoEkdc4gkKTOGQSS1DmDQJI6ZxBIUucMAknqnEEgSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOmcQSFLnRguCJGcm2ZLkyoll+ye5MMk17X6/sY4vSVqcMXsEZwFP327ZeuCiqjoCuKjNS5JmaLQgqKpLgG9vt3gtsKFNbwBOGuv4kqTFmfY1goOq6maAdn/glI8vSdrOLnuxOMkpSTYl2bR169ZZlyNJy9a0g+CWJAcDtPst821YVadX1ZqqWrNq1aqpFShJvZl2EFwArGvT64Dzp3x8SdJ2xvz46AeAzwAPT3JDkpOBU4FfTXIN8KttXpI0Q7uP9cBV9YJ5Vj1lrGNKku65XfZisSRpOgwCSeqcQSBJnTMIJKlzBoEkdc4gkKTOGQSS1DmDQJI6ZxBIUucMAknqnEEgSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOjfaD9NIGtfq9R+ZyXE3n3riTI6r8dgjkKTOGQSS1DmDQJI6ZxBIUue8WCzpHpnVRWrwQvVY7BFIUucMAknqnEEgSZ0zCCSpcwaBJHXOIJCkzhkEktQ5g0CSOmcQSFLnDAJJ6pxBIEmdc6whSVrAcv8RIHsEktS5mQRBkqcn+WqSa5Osn0UNkqTB1IMgyQrgXcAzgCOBFyQ5ctp1SJIGs+gRHA1cW1XXVdWPgLOBtTOoQ5LEbC4WHwJ8c2L+BuDx22+U5BTglDb7/SRf3QnHPgC4dSc8znJk28zPtpnfVNsmp03rSDvFz902O+HvfehiNppFEGSOZfUzC6pOB07fqQdONlXVmp35mMuFbTM/22Z+ts38llLbzOLU0A3AQybmDwVumkEdkiRmEwSfB45IcliSPYHnAxfMoA5JEjM4NVRVdyZ5BfAxYAVwZlVdNaXD79RTTcuMbTM/22Z+ts38lkzbpOpnTs9LkjriN4slqXMGgSR1btkGQZLNSb6c5PIkm9qy/ZNcmOSadr/frOucliRnJtmS5MqJZXO2RwbvaEOAXJHksbOrfHzztM0bktzYnj+XJzlhYt3rW9t8NcnTZlP1dCR5SJJPJrk6yVVJXtWWd//c2UHbLL3nTlUtyxuwGThgu2VvBta36fXAabOuc4rt8STgscCVC7UHcALwPxm+83EMcOms659B27wBeO0c2x4JfAm4D3AY8DVgxaz/hhHb5mDgsW16b+CfWht0/9zZQdssuefOsu0RzGMtsKFNbwBOmmEtU1VVlwDf3m7xfO2xFnhvDT4LrExy8HQqnb552mY+a4Gzq+qHVfV14FqGYVOWpaq6uaq+0KbvAK5mGB2g++fODtpmPrvsc2c5B0EBH09yWRuuAuCgqroZhn9E4MCZVbdrmK895hoGZEdP8OXqFe30xpkTpxG7bZskq4HHAJfic+enbNc2sMSeO8s5CJ5QVY9lGOX05UmeNOuClpBFDQOyzP1X4HDgKOBm4M/b8i7bJskDgA8Br66q7+1o0zmWLev2maNtltxzZ9kGQVXd1O63AOcxdMFu2dZNbfdbZlfhLmG+9uh+GJCquqWq7qqqnwDv4e4ufHdtk2QPhhe691XV37bFPneYu22W4nNnWQZBkvsn2XvbNPBU4EqGoSzWtc3WAefPpsJdxnztcQHwG+0TIMcA3912GqAX253X/jcMzx8Y2ub5Se6T5DDgCOBz065vWpIEOAO4uqreOrGq++fOfG2zJJ87s75aPcYNeBjD1fkvAVcBf9iWPxC4CLim3e8/61qn2CYfYOim/pjhncnJ87UHQxf2XQyfavgysGbW9c+gbf6q/e1XMPwHPnhi+z9sbfNV4Bmzrn/ktvmXDKcvrgAub7cTfO7ssG2W3HPHISYkqXPL8tSQJGnxDAJJ6pxBIEmdMwgkqXMGgSR1ziDQspJk9eQoopIWZhBITZKp/3TrvbFU6tTSYRBoOVqR5D1tjPiPJ7lvkqOSfLYNBHbexPj5Fyf50yQbgVcleU6SK5N8KcklbZsVSf4syefb/i9ty49Lckl7vK8keXeS3dq6F2T4PYwrk5zWlj03yVvb9KuSXNemD0/y6Tb9uCQb22CJH5sYxuGn6pxuc2q5852FlqMjgBdU1W8nORf4deD3gVdW1cYk/xH4D8Cr2/Yrq+rJAEm+DDytqm5MsrKtP5lhqIRfTnIf4O+TfLytO5phnPnrgY8Cz0ryD8BpwOOA2xlGwT0JuAT4vbbfE4HbkhzC8A3VT7Vxa94JrK2qrUmeB7wJ+M3t65R2JoNAy9HXq+ryNn0Zw0iQK6tqY1u2Afibie3PmZj+e+CsFiDbBlh7KvDoJM9u8/syhM2PgM9V1bZ39h9geFH/MXBxVW1ty98HPKmqPpzkAW0crIcA72f4UZwntmM9HHgUcOEwjA0rGIa+mKtOaacxCLQc/XBi+i5g5XwbNj/YNlFVL0vyeOBE4PIkRzGMn/PKqvrY5E5JjuNnhxEu5h5ueJvPAC9hGGvmUwzv9o8FXgP8AnBVVR27UJ3SzuQ1AvXgu8DtSZ7Y5l8MbJxrwySHV9WlVfXHwK0M79w/Bvy7duqGJL/YRrUFODrJYe3awPOATzP8OMmTkxyQZAXwgonjXQK8tt1/EfhXwA+r6rsM4bAqybHtOHskeeTOawZpbvYI1It1wLuT3A+4juFd+Vz+LMkRDO/qL2IYwfYKYDXwhTb08Fbu/mnGzwCnAv+c4cX9vKr6SZLXA59sj/M/qmrbMM2fYgiXS6rqriTfBP4RoKp+1E4/vSPJvgz/P/+CYQRdaTSOPirdS+3U0Gur6pmzrkX6eXhqSJI6Z49Akjpnj0CSOmcQSFLnDAJJ6pxBIEmdMwgkqXP/D0NhJEwAoV1NAAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "import matplotlib as plt\n", | |
| "from matplotlib import pyplot\n", | |
| "plt.pyplot.hist(df[\"horsepower\"])\n", | |
| "\n", | |
| "# set x/y labels and plot title\n", | |
| "plt.pyplot.xlabel(\"horsepower\")\n", | |
| "plt.pyplot.ylabel(\"count\")\n", | |
| "plt.pyplot.title(\"horsepower bins\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([ 48. , 119.33333333, 190.66666667, 262. ])" | |
| ] | |
| }, | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "bins = np.linspace(min(df[\"horsepower\"]), max(df[\"horsepower\"]), 4)\n", | |
| "bins" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "group_names = ['Low', 'Medium', 'High']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>horsepower-binned</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>111</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>111</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>154</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>102</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>115</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>110</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>110</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>110</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>140</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>101</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>101</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>121</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>121</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>121</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>182</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>182</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>182</td>\n", | |
| " <td>Medium</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>48</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>70</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>70</td>\n", | |
| " <td>Low</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " horsepower horsepower-binned\n", | |
| "0 111 Low\n", | |
| "1 111 Low\n", | |
| "2 154 Medium\n", | |
| "3 102 Low\n", | |
| "4 115 Low\n", | |
| "5 110 Low\n", | |
| "6 110 Low\n", | |
| "7 110 Low\n", | |
| "8 140 Medium\n", | |
| "9 101 Low\n", | |
| "10 101 Low\n", | |
| "11 121 Medium\n", | |
| "12 121 Medium\n", | |
| "13 121 Medium\n", | |
| "14 182 Medium\n", | |
| "15 182 Medium\n", | |
| "16 182 Medium\n", | |
| "17 48 Low\n", | |
| "18 70 Low\n", | |
| "19 70 Low" | |
| ] | |
| }, | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Apply the function \"cut\" the determine what each value of \"df['horsepower']\" belongs to.\n", | |
| "\n", | |
| "df['horsepower-binned'] = pd.cut(df['horsepower'], bins, labels=group_names, include_lowest=True )\n", | |
| "df[['horsepower','horsepower-binned']].head(20)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Low 153\n", | |
| "Medium 43\n", | |
| "High 5\n", | |
| "Name: horsepower-binned, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 53, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Lets see the number of vehicles in each bin\n", | |
| "\n", | |
| "df[\"horsepower-binned\"].value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Plot the distribution of each bin" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Text(0.5, 1.0, 'horsepower bins')" | |
| ] | |
| }, | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGStJREFUeJzt3XmUXnWd5/H3B1ARQQKmUEyAII1Oo8OgBkQdkJZuxWUMxxVGMSJ2xKFRR1GxnVZbpRuXadtt5MQGARcQF4RpbZVGIdgKGhRBwCWDLAGEQllcQeA7f9xfyUNxk1RinnoqqffrnOfUvb/7u8/9Vj2n6lN3+91UFZIkTbbJqAuQJM1MBoQkqZcBIUnqZUBIknoZEJKkXgaEJKmXAaEZKcmVSf5y1HXMdEn2S7JyNcuPS/J301mTNh6bjboAScNTVYePugZtuNyD0EYtyUbzT9DG9L1ow2BAaCbbI8nFSW5N8pkkm08sSPLXSVYk+WWSM5M8fGBZJTkiyU+Bn6bz/iQ3tve6OMljWt8HJHlfkquT3NAOyTywLdsvycokf5vkpnbY68UD29k6yclJxpNcleR/JdmkLbsqyePb9EtaTbu1+Vck+WKb3iTJ0Un+X5JfJDktybZt2YK23mFJrga+vqof1GpqPDHJuyZ9P69vP4vrkxw60PeZSS5L8qsk1yY56k/58LThMyA0k70QOADYGdgdeBlAkqcC/9iWbw9cBZw6ad0DgScAuwFPA/YFHgnMAV4E/KL1e3dr3wP4M2Ae8NaB93kYMLe1LwaWJnlUW/YhYGvgEcBTgJcCE39wzwX2a9P7Ale0PhPz57bpV7danwI8HLgZ+Mik7+UpwJ8DT+/5Ga2pxr6+W7e+hwEfSbJNW3Y88Mqq2gp4DKsJJM0SVeXL14x7AVcCLxmYfw9wXJs+HnjPwLItgT8AC9p8AU8dWP5U4CfA3sAmA+0BfgPsMtD2ROBnbXo/4E7gQQPLTwP+DtgUuB3YbWDZK4Fz2vRhwJlt+nLgFcCpbf4q4HEDy/YfeI/t2/eyGbCgfS+PWM3PaZU1tukTgXcN9P0dsNlA3xuBvdv01e17ePCoP39fM+PlHoRmsp8PTP+WLgig+0/7qokFVfVruj2CeQP9rxlY/nXgw3T/md+QZGmSBwNjwBbAhUluSXIL8JXWPuHmqvrNwPxVbftzgfsP1tGmJ2o4F9gnycPowuQzwJOTLKD7D/6i1m8n4PSB7V8O3AU8tO97WYVV1djnF1V158D84M/1ecAzgauSnJvkiWvYrjZyBoQ2RNfR/WEFIMmDgIcA1w70udcwxVX1wap6PPBoukNKbwBuovuP+tFVNae9tq6qLQdW3aa9/4Qd2/ZvovtPf6dJy65t21tB98f31cCyqvoVXeAtAb5ZVXe3da4BnjGw/TlVtXlVrfJ76bGqGtdKVX23qhYB2wFfpNsT0SxmQGhD9Gng0CR7JHkA8A/ABVV1ZV/nJHsmeUKS+9EdUvo9cFf7I/0x4P1Jtmt95yWZfKz/75PcP8k+wLOBz1bVXXR/QI9JslWSnYDXAZ8cWO9c4G+453zDOZPmAY5r77FT2/5YkkXr8DO5T41rs3Jb98VJtq6qPwC30e3JaBYzILTBqaqz6c4DfB64HtgFOGg1qzyYLghupjv88gvgfW3Zm4AVwPlJbgP+HRg8wfvztt51wKeAw6vqR23ZkXSBcwXwTbrgOmFg3XOBrYBlq5gH+ABwJvC1JL8Czqc7ub42Vlfj2jgEuLL9HA4HXrIO76GNSKp8YJDUJ8l+wCerav6oa5FGwT0ISVIvA0KS1GtoAZHkhHa35g8ntR+Z5MdJLk3ynoH2N7c7Y3/cc5JQmnZVdY6HlzSbDXNslxPprj0/eaIhyV8Ai4Ddq+r2gStHdqM7yfhouuu3/z3JI9uVIpKkERhaQFTVsnZT0KBXAcdW1e2tz42tfRHdXaa3Az9LsgLYC/j26rYxd+7cWrBg8iYkSatz4YUX3lRVY2vqN92jQz6S7u7SY+iuRT+qqr5Ld/fp+QP9VnLvu2L/KMkSupuN2HHHHVm+fPlwK5akjUySq9bca/pPUm8GbEM3Js4bgNOShG5MnMl6r7+tqqVVtbCqFo6NrTEAJUnraLoDYiXwhep8B7ibbkyblcAOA/3msw5DBUiS1p/pDogv0o2sSZJH0g12dhPdnaQHtbH5dwZ2Bb4zzbVJkgYM7RxEklPohheem+6ZuW+jG4bghHbp6x3A4upu5b40yWnAZXRDFx/hFUySNFob9FAbCxcuLE9SS9LaSXJhVS1cUz/vpJYk9TIgJEm9DAhJUi8DQpLUa7rvpJ4xFhz9pVGXsNG68thnjboESeuBexCSpF4GhCSplwEhSeplQEiSehkQkqReBoQkqZcBIUnqZUBIknoZEJKkXgaEJKmXASFJ6mVASJJ6GRCSpF5DC4gkJyS5sT1/evKyo5JUkrltPkk+mGRFkouTPG5YdUmSpmaYexAnAgdMbkyyA/BXwNUDzc8Adm2vJcBHh1iXJGkKhhYQVbUM+GXPovcDbwRqoG0RcHJ1zgfmJNl+WLVJktZsWs9BJHkOcG1V/WDSonnANQPzK1tb33ssSbI8yfLx8fEhVSpJmraASLIF8BbgrX2Le9qqp42qWlpVC6tq4djY2PosUZI0YDofOboLsDPwgyQA84HvJdmLbo9hh4G+84HrprE2SdIk07YHUVWXVNV2VbWgqhbQhcLjqurnwJnAS9vVTHsDt1bV9dNVmyTpvoZ5mespwLeBRyVZmeSw1XT/MnAFsAL4GPA/hlWXJGlqhnaIqaoOXsPyBQPTBRwxrFokSWvPO6klSb0MCElSLwNCktTLgJAk9TIgJEm9DAhJUi8DQpLUy4CQJPUyICRJvQwISVIvA0KS1MuAkCT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9DAhJUq9hPpP6hCQ3JvnhQNt7k/woycVJTk8yZ2DZm5OsSPLjJE8fVl2SpKkZ5h7EicABk9rOAh5TVbsDPwHeDJBkN+Ag4NFtnf+TZNMh1iZJWoOhBURVLQN+Oanta1V1Z5s9H5jfphcBp1bV7VX1M2AFsNewapMkrdkoz0G8HPi3Nj0PuGZg2crWdh9JliRZnmT5+Pj4kEuUpNlrJAGR5C3AncCnJpp6ulXfulW1tKoWVtXCsbGxYZUoSbPeZtO9wSSLgWcD+1fVRAisBHYY6DYfuG66a5Mk3WNa9yCSHAC8CXhOVf12YNGZwEFJHpBkZ2BX4DvTWZsk6d6GtgeR5BRgP2BukpXA2+iuWnoAcFYSgPOr6vCqujTJacBldIeejqiqu4ZVmyRpzYYWEFV1cE/z8avpfwxwzLDqkSStHe+kliT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9DAhJUi8DQpLUy4CQJPUyICRJvQwISVIvA0KS1MuAkCT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9hhYQSU5IcmOSHw60bZvkrCQ/bV+3ae1J8sEkK5JcnORxw6pLkjQ1w9yDOBE4YFLb0cDZVbUrcHabB3gGsGt7LQE+OsS6JElTMLSAqKplwC8nNS8CTmrTJwEHDrSfXJ3zgTlJth9WbZKkNZvucxAPrarrAdrX7Vr7POCagX4rW9t9JFmSZHmS5ePj40MtVpJms5lykjo9bdXXsaqWVtXCqlo4NjY25LIkafaa7oC4YeLQUft6Y2tfCeww0G8+cN001yZJGjDdAXEmsLhNLwbOGGh/abuaaW/g1olDUZKk0dhsWG+c5BRgP2BukpXA24BjgdOSHAZcDbygdf8y8ExgBfBb4NBh1SVJmpqhBURVHbyKRfv39C3giGHVIklaezPlJLUkaYYxICRJvQwISVIvA0KS1MuAkCT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9phQQSc6eSpskaeOx2rGYkmwObEE34N423PPchgcDDx9ybZKkEVrTYH2vBF5LFwYXck9A3AZ8ZIh1SZJGbLUBUVUfAD6Q5Miq+tA01SRJmgGmNNx3VX0oyZOABYPrVNXJQ6pLkjRiUwqIJJ8AdgEuAu5qzQUYEJK0kZrqA4MWAru1B/tIkmaBqd4H8UPgYcMsRJI0s0x1D2IucFmS7wC3TzRW1XPWZaNJ/ifwCrrDVJfQPYN6e+BUYFvge8AhVXXHury/JOlPN9WAePv62mCSecCr6Q5Z/S7JacBBwDOB91fVqUmOAw4DPrq+titJWjtTvYrp3CFs94FJ/kB3I971wFOB/96Wn0QXSgaEJI3IVIfa+FWS29rr90nuSnLbumywqq4F3gdcTRcMt9LdhHdLVd3Zuq0E5q3L+0uS1o+p7kFsNTif5EBgr3XZYBuyYxGwM3AL8FngGX2bXcX6S4AlADvuuOO6lCBJmoJ1Gs21qr5Id0hoXfwl8LOqGq+qPwBfAJ4EzEkyEVjzgetWse2lVbWwqhaOjY2tYwmSpDWZ6o1yzx2Y3YTuvoh1vSfiamDvJFsAvwP2B5YD3wCeT3cl02LgjHV8f0nSejDVq5j+28D0ncCVdIeJ1lpVXZDkc3SXst4JfB9YCnwJODXJu1rb8evy/pKk9WOq5yAOXZ8braq3AW+b1HwF63heQ5K0/k31Kqb5SU5PcmOSG5J8Psn8YRcnSRqdqZ6k/jhwJt1zIeYB/7e1SZI2UlMNiLGq+nhV3dleJwJeQiRJG7GpBsRNSV6SZNP2egnwi2EWJkkarakGxMuBFwI/p7v7+fl0A+xJkjZSU73M9Z3A4qq6GSDJtnTDZbx8WIVJkkZrqnsQu0+EA0BV/RJ47HBKkiTNBFMNiE3aGErAH/cgprr3IUnaAE31j/z/Br7V7oAuuvMRxwytKknSyE31TuqTkyynG6AvwHOr6rKhViZJGqkpHyZqgWAoSNIssU7DfUuSNn4GhCSplwEhSeplQEiSehkQkqReBoQkqZcBIUnqNZKASDInyeeS/CjJ5UmemGTbJGcl+Wn7us2a30mSNCyj2oP4APCVqvpPwH8BLgeOBs6uql2Bs9u8JGlEpj0gkjwY2Bc4HqCq7qiqW4BFwEmt20nAgdNdmyTpHqPYg3gEMA58PMn3k/xLkgcBD62q6wHa1+1GUJskqRlFQGwGPA74aFU9FvgNa3E4KcmSJMuTLB8fHx9WjZI0640iIFYCK6vqgjb/ObrAuCHJ9gDt6419K1fV0qpaWFULx8bGpqVgSZqNpj0gqurnwDVJHtWa9qcbJfZMYHFrWwycMd21SZLuMaqnwh0JfCrJ/YErgEPpwuq0JIcBVwMvGFFtkiRGFBBVdRGwsGfR/tNdiySpn3dSS5J6GRCSpF4GhCSplwEhSeplQEiSehkQkqReBoQkqZcBIUnqZUBIknoZEJKkXgaEJKmXASFJ6mVASJJ6GRCSpF4GhCSplwEhSeplQEiSehkQkqReo3omNUk2BZYD11bVs5PsDJwKbAt8Dzikqu4YVX2aeRYc/aVRl7DRuvLYZ426BM1Ao9yDeA1w+cD8u4H3V9WuwM3AYSOpSpIEjCggkswHngX8S5sP8FTgc63LScCBo6hNktQZ1R7EPwNvBO5u8w8BbqmqO9v8SmDeKAqTJHWmPSCSPBu4saouHGzu6VqrWH9JkuVJlo+Pjw+lRknSaPYgngw8J8mVdCeln0q3RzEnycRJ8/nAdX0rV9XSqlpYVQvHxsamo15JmpWmPSCq6s1VNb+qFgAHAV+vqhcD3wCe37otBs6Y7tokSfeYSfdBvAl4XZIVdOckjh9xPZI0q43sPgiAqjoHOKdNXwHsNcp6JEn3mEl7EJKkGcSAkCT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9DAhJUi8DQpLUy4CQJPUyICRJvQwISVIvA0KS1MuAkCT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9pj0gkuyQ5BtJLk9yaZLXtPZtk5yV5Kft6zbTXZsk6R6j2IO4E3h9Vf05sDdwRJLdgKOBs6tqV+DsNi9JGpFpD4iqur6qvtemfwVcDswDFgEntW4nAQdOd22SpHuM9BxEkgXAY4ELgIdW1fXQhQiw3SrWWZJkeZLl4+Pj01WqJM06IwuIJFsCnwdeW1W3TXW9qlpaVQurauHY2NjwCpSkWW4kAZHkfnTh8Kmq+kJrviHJ9m359sCNo6hNktQZxVVMAY4HLq+qfxpYdCawuE0vBs6Y7tokSffYbATbfDJwCHBJkota298CxwKnJTkMuBp4wQhqkyQ10x4QVfVNIKtYvP901iJJWjXvpJYk9TIgJEm9DAhJUi8DQpLUy4CQJPUyICRJvQwISVIvA0KS1MuAkCT1MiAkSb0MCElSLwNCktTLgJAk9TIgJEm9RvE8CEmzxIKjvzTqEjZaVx77rKFvwz0ISVIvA0KS1MuAkCT1mnEBkeSAJD9OsiLJ0aOuR5JmqxkVEEk2BT4CPAPYDTg4yW6jrUqSZqcZFRDAXsCKqrqiqu4ATgUWjbgmSZqVZtplrvOAawbmVwJPGOyQZAmwpM3+OsmPp6m2UZsL3DTqIqYi7x51BTOGn9mGZYP5vOBP/sx2mkqnmRYQ6Wmre81ULQWWTk85M0eS5VW1cNR1aOr8zDYsfl73NdMOMa0EdhiYnw9cN6JaJGlWm2kB8V1g1yQ7J7k/cBBw5ohrkqRZaUYdYqqqO5P8DfBVYFPghKq6dMRlzRSz7rDaRsDPbMPi5zVJqmrNvSRJs85MO8QkSZohDAhJUi8DYgZI8utR16BOkkryiYH5zZKMJ/nXtXyfc5IsbNNfTjJnfdeqqZn8+5XkZUk+3KYPT/LSNaz/x/6zzYw6SS3NAL8BHpPkgVX1O+CvgGv/lDesqmeul8q03lXVcaOuYSZzD2KGSrJTkrOTXNy+7phk0yRXpDMnyd1J9m39z0vyZ6OueyPxb8DE01gOBk6ZWJDkQUlOSPLdJN9Psqi1PzDJqe3z+gzwwIF1rkwyN8mCJD8caD8qydvb9DlJ3p9kWZLLk+yZ5AtJfprkXdPwPc9KSd6e5Kg2vWf7/L6d5L2DnxXw8CRfaZ/He0ZU7rQzIGauDwMnV9XuwKeAD1bVXcBP6AYy/K/AhcA+SR4AzK+qFSOrduNyKnBQks2B3YELBpa9Bfh6Ve0J/AXw3iQPAl4F/LZ9XscAj1+H7d5RVfsCxwFnAEcAjwFeluQh6/zd6IFJLpp4Ae9YRb+PA4dX1ROBuyYt2wN4EfCfgRcl2WHyyhsjA2LmeiLw6Tb9CbpAADgP2Le9/rG170l3k6HWg6q6GFhAt/fw5UmLnwYc3f7QnANsDuxI93l8cmD9i9dh0xM3hV4CXFpV11fV7cAV3HuEAa2d31XVHhMv4K2TO7RzRFtV1bda06cndTm7qm6tqt8DlzHFsYw2dAbEhmPihpXzgH3oRr79MjAH2A9YNpqyNlpnAu9j4PBSE+B5A39wdqyqy9uyNd1UdCf3/p3bfNLy29vXuwemJ+Y9XzhcfePADRr8PO5ilnweBsTM9S26oUYAXgx8s01fADwJuLv9N3MR8Eq64ND6cwLwjqq6ZFL7V4EjkwQgyWNb+zK6z4kkj6E7NDXZDcB2SR7SDgs+eyiVa61V1c3Ar5Ls3ZoOWl3/2cKAmBm2SLJy4PU64NXAoUkuBg4BXgPQDjlcA5zf1j0P2IrusITWk6paWVUf6Fn0TuB+wMXtJOY7W/tHgS3b5/VG4Ds97/kHuuPfFwD/CvxoGLVrnR0GLE3ybbo9iltHXM/IOdSGJAFJtqyqX7fpo4Htq+o1Iy5rpGbFcTRJmoJnJXkz3d/Fq4CXjbac0XMPQpLUy3MQkqReBoQkqZcBIUnqZUBoVpg8DpKkNTMgpDVIskFc7beh1KkNhwGh2WTTJB9LcmmSr7URWPdIcn4bxfP0JNvAH0dX/Yck5wKvSfKCJD9M8oMky1qfTduon99t67+yte/XRmU9PcllSY5LsklbdnCSS9p7vbu1vTDJP7Xp1yS5ok3vkuSbbfrxSc5NcmGSrybZvq/O6f1xamPnfxyaTXYFDq6qv05yGvA8uruej6yqc5O8A3gb8NrWf05VPQUgySXA06vq2tzz8J/DgFuras82dMZ/JPlaW7YX3ai7VwFfAZ6b5FvAu+lGer0Z+FqSA+mG6XhDW28f4BdJ5tENxHhekvsBHwIWVdV4khfRjRj78sl1SuuTAaHZ5GdVdVGbvhDYhe6P67mt7STgswP9PzMw/R/AiS1YvtDangbsnuT5bX5ruhC6A/hOVU3sCZxC98f+D8A5VTXe2j8F7FtVX0yyZZKt6EZt/TTd6LD7tG09im7Y77PaEFCbAtevok5pvTEgNJtMHpFzTY8B/c3ERFUdnuQJdA8SuijJHnTj9RxZVV8dXCnJftx3ZNdi9SOGfhs4FPgx3fhaL6cb8v31dMOJX9qeU7DaOqX1yXMQms1uBW5Osk+bPwQ4t69jkl2q6oKqeitwE91/+l8FXtUOAZHkke3hQQB7Jdm5nXt4Ed1ovBcAT0n3dLlN6Z43MbG9ZcBR7ev36R5GdHtV3UoXGmNJnti2c78kj15/Pwapn3sQmu0WA8cl2YLuwTyHrqLfe5PsSrcXcDbwA7qHAi0AvteG/x4HDmz9vw0cS/cEsmXA6VV1dxvr5xvtfb5cVWe0/ufRhc6yqroryTW00V6r6o52GOuDSbam+739Z+DS9fQzkHo5FpO0nrVDTEdVlc970AbNQ0ySpF7uQUiSerkHIUnqZUBIknoZEJKkXgaEJKmXASFJ6vX/ARI/imTARbPNAAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "import matplotlib as plt\n", | |
| "from matplotlib import pyplot\n", | |
| "pyplot.bar(group_names, df[\"horsepower-binned\"].value_counts())\n", | |
| "\n", | |
| "# set x/y labels and plot title\n", | |
| "plt.pyplot.xlabel(\"horsepower\")\n", | |
| "plt.pyplot.ylabel(\"count\")\n", | |
| "plt.pyplot.title(\"horsepower bins\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Bins visualization\n", | |
| "Normally, a histogram is used to visualize the distribution of bins we created above." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Text(0.5, 1.0, 'horsepower bins')" | |
| ] | |
| }, | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGMVJREFUeJzt3Xu4XXV95/H3B1C8IQFzUEzAIBOdouMgniLUESl0FNAxjPUC4yWD2GgHUaeiQp2KnZEOqNWqtfLEgkBLQbwgzKNVKUqirYABkatIyjWA5HCnaoHgd/5Y6zSb4zrJIWbvfZL9fj3PfvZav7X2Xt/zy875nHX77VQVkiRNtcWwC5AkzU4GhCSpkwEhSepkQEiSOhkQkqROBoQkqZMBoVkpyY1Jfm/Ydcx2SfZNsmody09M8ieDrEmbj62GXYCk/qmqdwy7Bm263IPQZi3JZvNH0Ob0s2jTYEBoNts9yeVJ7kvyxSRPmFyQ5A+SrExyd5JzkzyzZ1klOSLJdcB1aXwyyer2vS5P8vx23a2TfDzJzUnuaA/JPLFdtm+SVUn+OMmd7WGvN/ZsZ9skpyWZSHJTkv+VZIt22U1JXtROv6mtabd2/m1JvtZOb5Hk6CT/nOSuJGcl2b5dtqB93eFJbga+M11HraPGU5J8ZMrP8962L25PcljPugcluTrJA0luTXLUb/KPp02fAaHZ7PXAAcAuwAuA/w6QZD/g/7bLdwRuAs6c8tqDgRcDuwEvB/YBngPMAd4A3NWud0Lbvjvw74B5wId63ucZwNy2fTGwNMlz22WfAbYFng28DHgLMPkLdxmwbzu9D3B9u87k/LJ2+l1trS8DngncA3x2ys/yMuC3gFd09NH6auxad9t23cOBzybZrl12EvD2qtoGeD7rCCSNiKry4WPWPYAbgTf1zH8UOLGdPgn4aM+ypwAPAwva+QL261m+H/BTYC9gi572AD8Hdu1p2xu4oZ3eF1gDPLln+VnAnwBbAg8Cu/UseztwQTt9OHBuO30N8DbgzHb+JmCPnmX797zHju3PshWwoP1Znr2Ofpq2xnb6FOAjPev+EtiqZ93VwF7t9M3tz/DUYf/7+5gdD/cgNJv9rGf6FzRBAM1f2jdNLqiqf6HZI5jXs/4tPcu/A/wlzV/mdyRZmuSpwBjwJOCSJPcmuRf4Zts+6Z6q+nnP/E3t9ucCj++to52erGEZ8NIkz6AJky8CL0mygOYv+Mva9Z4FnN2z/WuAR4Cnd/0s05iuxi53VdWanvnefv194CDgpiTLkuy9nu1qM2dAaFN0G80vVgCSPBl4GnBrzzqPGqa4qj5dVS8CnkdzSOl9wJ00f1E/r6rmtI9tq+opPS/drn3/STu327+T5i/9Z01Zdmu7vZU0v3zfBSyvqgdoAm8J8P2q+lX7mluAA3u2P6eqnlBV0/4sHaar8TGpqh9W1SJgB+BrNHsiGmEGhDZFfwcclmT3JFsDfwZcVFU3dq2c5LeTvDjJ42gOKf0r8Ej7S/rzwCeT7NCuOy/J1GP9f5rk8UleCrwK+FJVPULzC/S4JNskeRbwR8Df9rxuGfBO1p5vuGDKPMCJ7Xs8q93+WJJFG9Anv1bjY3lx+9o3Jtm2qh4G7qfZk9EIMyC0yamq82nOA3wFuB3YFThkHS95Kk0Q3ENz+OUu4OPtsg8AK4ELk9wP/APQe4L3Z+3rbgNOB95RVT9plx1JEzjXA9+nCa6Te167DNgGWD7NPMCngHOBbyd5ALiQ5uT6Y7GuGh+LNwM3tv3wDuBNG/Ae2oykyi8Mkrok2Rf426qaP+xapGFwD0KS1MmAkCR16ltAJDm5vVvzyintRya5NslVST7a035Me2fstR0nCaWBq6oLPLykUdbPsV1Oobn2/LTJhiS/CywCXlBVD/ZcObIbzUnG59Fcv/0PSZ7TXikiSRqCvgVEVS1vbwrq9YfA8VX1YLvO6rZ9Ec1dpg8CNyRZCewJ/GBd25g7d24tWDB1E5KkdbnkkkvurKqx9a036NEhn0Nzd+lxNNeiH1VVP6S5+/TCnvVW8ei7Yv9NkiU0Nxux8847s2LFiv5WLEmbmSQ3rX+twZ+k3grYjmZMnPcBZyUJzZg4U3Vef1tVS6tqvKrGx8bWG4CSpA006IBYBXy1GhcDv6IZ02YVsFPPevPZgKECJEkbz6AD4ms0I2uS5Dk0g53dSXMn6SHt2Py7AAuBiwdcmySpR9/OQSQ5g2Z44blpvjP3WJphCE5uL319CFhcza3cVyU5C7iaZujiI7yCSZKGa5MeamN8fLw8SS1Jj02SS6pqfH3reSe1JKmTASFJ6mRASJI6GRCSpE6DvpN61lhw9NeHXYI63Hj8K4ddgqSWexCSpE4GhCSpkwEhSepkQEiSOhkQkqROBoQkqZMBIUnqZEBIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE59C4gkJydZ3X7/9NRlRyWpJHPb+ST5dJKVSS5Pske/6pIkzUw/9yBOAQ6Y2phkJ+A/Azf3NB8ILGwfS4DP9bEuSdIM9C0gqmo5cHfHok8C7weqp20RcFo1LgTmJNmxX7VJktZvoOcgkrwauLWqfjxl0Tzglp75VW1b13ssSbIiyYqJiYk+VSpJGlhAJHkS8EHgQ12LO9qqo42qWlpV41U1PjY2tjFLlCT1GORXju4K7AL8OAnAfODSJHvS7DHs1LPufOC2AdYmSZpiYHsQVXVFVe1QVQuqagFNKOxRVT8DzgXe0l7NtBdwX1XdPqjaJEm/rp+XuZ4B/AB4bpJVSQ5fx+rfAK4HVgKfB/5Hv+qSJM1M3w4xVdWh61m+oGe6gCP6VYsk6bHzTmpJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1Kmf30l9cpLVSa7saftYkp8kuTzJ2Unm9Cw7JsnKJNcmeUW/6pIkzUw/9yBOAQ6Y0nYe8PyqegHwU+AYgCS7AYcAz2tf81dJtuxjbZKk9ehbQFTVcuDuKW3frqo17eyFwPx2ehFwZlU9WFU3ACuBPftVmyRp/YZ5DuKtwN+30/OAW3qWrWrbfk2SJUlWJFkxMTHR5xIlaXQNJSCSfBBYA5w+2dSxWnW9tqqWVtV4VY2PjY31q0RJGnlbDXqDSRYDrwL2r6rJEFgF7NSz2nzgtkHXJklaa6B7EEkOAD4AvLqqftGz6FzgkCRbJ9kFWAhcPMjaJEmP1rc9iCRnAPsCc5OsAo6luWppa+C8JAAXVtU7quqqJGcBV9Mcejqiqh7pV22SpPXrW0BU1aEdzSetY/3jgOP6VY8k6bHxTmpJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1KlvAZHk5CSrk1zZ07Z9kvOSXNc+b9e2J8mnk6xMcnmSPfpVlyRpZvq5B3EKcMCUtqOB86tqIXB+Ow9wILCwfSwBPtfHuiRJM9C3gKiq5cDdU5oXAae206cCB/e0n1aNC4E5SXbsV22SpPUb9DmIp1fV7QDt8w5t+zzglp71VrVtvybJkiQrkqyYmJjoa7GSNMpmy0nqdLRV14pVtbSqxqtqfGxsrM9lSdLoGnRA3DF56Kh9Xt22rwJ26llvPnDbgGuTJPUYdECcCyxupxcD5/S0v6W9mmkv4L7JQ1GSpOHYql9vnOQMYF9gbpJVwLHA8cBZSQ4HbgZe167+DeAgYCXwC+CwftUlSZqZvgVEVR06zaL9O9Yt4Ih+1SJJeuxmy0lqSdIsY0BIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE4GhCSpkwEhSepkQEiSOs0oIJKcP5M2SdLmY51jMSV5AvAkmgH3tmPt9zY8FXhmn2uTJA3R+gbrezvwHpowuIS1AXE/8Nk+1iVJGrJ1BkRVfQr4VJIjq+ozA6pJkjQLzGi476r6TJLfARb0vqaqTutTXZKkIZtRQCT5G2BX4DLgkba5AANCkjZTM/3CoHFgt/aLfSRJI2Cm90FcCTyjn4VIkmaXme5BzAWuTnIx8OBkY1W9ekM2muR/Am+jOUx1Bc13UO8InAlsD1wKvLmqHtqQ95ck/eZmGhAf3lgbTDIPeBfNIatfJjkLOAQ4CPhkVZ2Z5ETgcOBzG2u7kqTHZqZXMS3rw3afmORhmhvxbgf2A/5bu/xUmlAyICRpSGY61MYDSe5vH/+a5JEk92/IBqvqVuDjwM00wXAfzU1491bVmna1VcC8DXl/SdLGMdM9iG1655McDOy5IRtsh+xYBOwC3At8CTiwa7PTvH4JsARg55133pASJEkzsEGjuVbV12gOCW2I3wNuqKqJqnoY+CrwO8CcJJOBNR+4bZptL62q8aoaHxsb28ASJEnrM9Mb5V7TM7sFzX0RG3pPxM3AXkmeBPwS2B9YAXwXeC3NlUyLgXM28P0lSRvBTK9i+i8902uAG2kOEz1mVXVRki/TXMq6BvgRsBT4OnBmko+0bSdtyPtLkjaOmZ6DOGxjbrSqjgWOndJ8PRt4XkOStPHN9Cqm+UnOTrI6yR1JvpJkfr+LkyQNz0xPUn8BOJfmeyHmAf+vbZMkbaZmGhBjVfWFqlrTPk4BvIRIkjZjMw2IO5O8KcmW7eNNwF39LEySNFwzDYi3Aq8HfkZz9/NraQbYkyRtpmZ6mev/ARZX1T0ASbanGS7jrf0qTJI0XDPdg3jBZDgAVNXdwAv7U5IkaTaYaUBs0Y6hBPzbHsRM9z4kSZugmf6S/3Pgn9o7oIvmfMRxfatKkjR0M72T+rQkK2gG6Avwmqq6uq+VSZKGasaHidpAMBQkaURs0HDfkqTNnwEhSepkQEiSOhkQkqROBoQkqZMBIUnqZEBIkjoNJSCSzEny5SQ/SXJNkr2TbJ/kvCTXtc/brf+dJEn9Mqw9iE8B36yqfw/8R+Aa4Gjg/KpaCJzfzkuShmTgAZHkqcA+wEkAVfVQVd0LLAJObVc7FTh40LVJktYaxh7Es4EJ4AtJfpTkr5M8GXh6Vd0O0D7vMITaJEmtYQTEVsAewOeq6oXAz3kMh5OSLEmyIsmKiYmJftUoSSNvGAGxClhVVRe181+mCYw7kuwI0D6v7npxVS2tqvGqGh8bGxtIwZI0igYeEFX1M+CWJM9tm/anGSX2XGBx27YYOGfQtUmS1hrWt8IdCZye5PHA9cBhNGF1VpLDgZuB1w2pNkkSQwqIqroMGO9YtP+ga5EkdfNOaklSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUaVjfSU2SLYEVwK1V9aokuwBnAtsDlwJvrqqHhlWfhmPB0V8fdgma4sbjXznsEjQkw9yDeDdwTc/8CcAnq2ohcA9w+FCqkiQBQwqIJPOBVwJ/3c4H2A/4crvKqcDBw6hNktQY1h7EXwDvB37Vzj8NuLeq1rTzq4B5wyhMktQYeEAkeRWwuqou6W3uWLWmef2SJCuSrJiYmOhLjZKk4exBvAR4dZIbaU5K70ezRzEnyeRJ8/nAbV0vrqqlVTVeVeNjY2ODqFeSRtLAA6Kqjqmq+VW1ADgE+E5VvRH4LvDadrXFwDmDrk2StNZsug/iA8AfJVlJc07ipCHXI0kjbWj3QQBU1QXABe309cCew6xHkrTWbNqDkCTNIgaEJKmTASFJ6mRASJI6GRCSpE4GhCSpkwEhSepkQEiSOhkQkqROBoQkqZMBIUnqZEBIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE4GhCSpkwEhSeo08IBIslOS7ya5JslVSd7dtm+f5Lwk17XP2w26NknSWsPYg1gDvLeqfgvYCzgiyW7A0cD5VbUQOL+dlyQNycADoqpur6pL2+kHgGuAecAi4NR2tVOBgwddmyRpraGeg0iyAHghcBHw9Kq6HZoQAXaY5jVLkqxIsmJiYmJQpUrSyBlaQCR5CvAV4D1Vdf9MX1dVS6tqvKrGx8bG+legJI24oQREksfRhMPpVfXVtvmOJDu2y3cEVg+jNklSYxhXMQU4Cbimqj7Rs+hcYHE7vRg4Z9C1SZLW2moI23wJ8GbgiiSXtW1/DBwPnJXkcOBm4HVDqE2S1Bp4QFTV94FMs3j/QdYiSZqed1JLkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE4GhCSpkwEhSepkQEiSOhkQkqROBoQkqZMBIUnqZEBIkjoZEJKkTsP4PghJm5AFR3992CWow43Hv7Lv23APQpLUyYCQJHUyICRJnWZdQCQ5IMm1SVYmOXrY9UjSqJpVAZFkS+CzwIHAbsChSXYbblWSNJpmVUAAewIrq+r6qnoIOBNYNOSaJGkkzbbLXOcBt/TMrwJe3LtCkiXAknb2X5JcuxG2Oxe4cyO8z+bIvpmefbNu9s/0fuO+yQm/0fafNZOVZltApKOtHjVTtRRYulE3mqyoqvGN+Z6bC/tmevbNutk/09tU+ma2HWJaBezUMz8fuG1ItUjSSJttAfFDYGGSXZI8HjgEOHfINUnSSJpVh5iqak2SdwLfArYETq6qqwaw6Y16yGozY99Mz75ZN/tneptE36Sq1r+WJGnkzLZDTJKkWcKAkCR1GsmASHJjkiuSXJZkRdu2fZLzklzXPm837DoHIcnJSVYnubKnrbMv0vh0OwzK5Un2GF7l/TdN33w4ya3tZ+eyJAf1LDum7Ztrk7xiOFUPRpKdknw3yTVJrkry7rZ95D876+ibTe+zU1Uj9wBuBOZOafsocHQ7fTRwwrDrHFBf7APsAVy5vr4ADgL+nuZ+lb2Ai4Zd/xD65sPAUR3r7gb8GNga2AX4Z2DLYf8MfeybHYE92ultgJ+2fTDyn5119M0m99kZyT2IaSwCTm2nTwUOHmItA1NVy4G7pzRP1xeLgNOqcSEwJ8mOg6l08Kbpm+ksAs6sqger6gZgJc3QMZulqrq9qi5tpx8ArqEZCWHkPzvr6JvpzNrPzqgGRAHfTnJJO3QHwNOr6nZo/oGBHYZW3fBN1xddQ6Gs64O/uXpne5jk5J5DkSPbN0kWAC8ELsLPzqNM6RvYxD47oxoQL6mqPWhGjT0iyT7DLmgTsd6hUEbA54Bdgd2B24E/b9tHsm+SPAX4CvCeqrp/Xat2tG3W/dPRN5vcZ2ckA6KqbmufVwNn0+zO3TG5y9s+rx5ehUM3XV+M/FAoVXVHVT1SVb8CPs/aQwEj1zdJHkfzC/D0qvpq2+xnh+6+2RQ/OyMXEEmenGSbyWng5cCVNEN6LG5XWwycM5wKZ4Xp+uJc4C3tFSl7AfdNHk4YFVOOm/9Xms8ONH1zSJKtk+wCLAQuHnR9g5IkwEnANVX1iZ5FI//Zma5vNsnPzrDPkg/6ATyb5oqBHwNXAR9s258GnA9c1z5vP+xaB9QfZ9Ds7j5M85fM4dP1Bc2u8GdprrK4Ahgfdv1D6Ju/aX/2y2n+Y+/Ys/4H2765Fjhw2PX3uW/+E81hkMuBy9rHQX521tk3m9xnx6E2JEmdRu4QkyRpZgwISVInA0KS1MmAkCR1MiAkSZ0MCI2EJAt6R2WVtH4GhLQeSWbVV/NOZ1OpU5sOA0KjZMskn2/H6P92kicm2T3Jhe0Aamf3fH/BBUn+LMky4N1JXpfkyiQ/TrK8XWfLJB9L8sP29W9v2/dNsrx9v6uTnJhki3bZoWm+i+TKJCe0ba9P8ol2+t1Jrm+nd03y/Xb6RUmWtQNMfqtnOItH1TnY7tTmzr84NEoWAodW1R8kOQv4feD9wJFVtSzJ/waOBd7Trj+nql4GkOQK4BVVdWuSOe3yw2mGjPjtJFsD/5jk2+2yPWnG+b8J+CbwmiT/BJwAvAi4h2ZE4YOB5cD72te9FLgryTyaO3K/147r8xlgUVVNJHkDcBzw1ql1ShuTAaFRckNVXdZOX0IzsuacqlrWtp0KfKln/S/2TP8jcEobLJMD070ceEGS17bz29KE0EPAxVU1uSdwBs0v+4eBC6pqom0/Hdinqr6W5CntGGE7AX9H82VFL2239Vzg+cB5zTA/bEkzBEhXndJGY0BolDzYM/0IMGe6FVs/n5yoqnckeTHwSuCyJLvTjC90ZFV9q/dFSfbl14drLrqHdZ70A+AwmrF4vkezd7A38F5gZ+Cqqtp7fXVKG5PnIDTK7gPuSfLSdv7NwLKuFZPsWlUXVdWHgDtp/tL/FvCH7SEgkjynHSEYYM8ku7TnHt4AfJ/mS2NelmRuki2BQ3u2txw4qn3+EfC7wINVdR9NaIwl2bvdzuOSPG/jdYPUzT0IjbrFwIlJngRcT/NXfJePJVlIsxdwPs1owJcDC4BL2yGeJ1j7FZs/AI4H/gPNL/2zq+pXSY4Bvtu+zzeqanI47O/RhM7yqnokyS3ATwCq6qH2MNank2xL8//2L2hGI5b6xtFcpY2sPcR0VFW9ati1SL8JDzFJkjq5ByFJ6uQehCSpkwEhSepkQEiSOhkQkqROBoQkqdP/B5Vxna43S1i+AAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "import matplotlib as plt\n", | |
| "from matplotlib import pyplot\n", | |
| "\n", | |
| "a = (0,1,2)\n", | |
| "\n", | |
| "# draw historgram of attribute \"horsepower\" with bins = 3\n", | |
| "plt.pyplot.hist(df[\"horsepower\"], bins = 3)\n", | |
| "\n", | |
| "# set x/y labels and plot title\n", | |
| "plt.pyplot.xlabel(\"horsepower\")\n", | |
| "plt.pyplot.ylabel(\"count\")\n", | |
| "plt.pyplot.title(\"horsepower bins\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Indicator variable (or dummy variable)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Use the panda's method 'get_dummies' to assign numerical values to different categories of fuel type." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',\n", | |
| " 'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',\n", | |
| " 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type',\n", | |
| " 'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke',\n", | |
| " 'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg',\n", | |
| " 'highway-mpg', 'price', 'city-L/100km', 'horsepower-binned'],\n", | |
| " dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Get indicator variables and assign it to data frame \"dummy_variable_1\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>diesel</th>\n", | |
| " <th>gas</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " diesel gas\n", | |
| "0 0 1\n", | |
| "1 0 1\n", | |
| "2 0 1\n", | |
| "3 0 1\n", | |
| "4 0 1" | |
| ] | |
| }, | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dummy_variable_1 = pd.get_dummies(df[\"fuel-type\"])\n", | |
| "dummy_variable_1.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 61, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#Change column names for clarity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>diesel</th>\n", | |
| " <th>gas</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " diesel gas\n", | |
| "0 0 1\n", | |
| "1 0 1\n", | |
| "2 0 1\n", | |
| "3 0 1\n", | |
| "4 0 1" | |
| ] | |
| }, | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dummy_variable_1.rename(columns={'fuel-type-diesel':'gas', 'fuel-type-diesel':'diesel'}, inplace=True)\n", | |
| "dummy_variable_1.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 63, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#The value 0 to represents \"gas\" and 1 to represent \"diesel\" in the column \"fuel-type\". We will now insert this column back into our original dataset." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 64, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# merge data frame \"df\" and \"dummy_variable_1\" \n", | |
| "df = pd.concat([df, dummy_variable_1], axis=1)\n", | |
| "\n", | |
| "# drop original column \"fuel-type\" from \"df\"\n", | |
| "df.drop(\"fuel-type\", axis = 1, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 65, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>symboling</th>\n", | |
| " <th>normalized-losses</th>\n", | |
| " <th>make</th>\n", | |
| " <th>aspiration</th>\n", | |
| " <th>num-of-doors</th>\n", | |
| " <th>body-style</th>\n", | |
| " <th>drive-wheels</th>\n", | |
| " <th>engine-location</th>\n", | |
| " <th>wheel-base</th>\n", | |
| " <th>length</th>\n", | |
| " <th>...</th>\n", | |
| " <th>compression-ratio</th>\n", | |
| " <th>horsepower</th>\n", | |
| " <th>peak-rpm</th>\n", | |
| " <th>city-mpg</th>\n", | |
| " <th>highway-mpg</th>\n", | |
| " <th>price</th>\n", | |
| " <th>city-L/100km</th>\n", | |
| " <th>horsepower-binned</th>\n", | |
| " <th>diesel</th>\n", | |
| " <th>gas</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>0.811148</td>\n", | |
| " <td>...</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27.0</td>\n", | |
| " <td>13495.0</td>\n", | |
| " <td>11.190476</td>\n", | |
| " <td>Low</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>convertible</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>88.6</td>\n", | |
| " <td>0.811148</td>\n", | |
| " <td>...</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>111</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>21</td>\n", | |
| " <td>27.0</td>\n", | |
| " <td>16500.0</td>\n", | |
| " <td>11.190476</td>\n", | |
| " <td>Low</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>122</td>\n", | |
| " <td>alfa-romero</td>\n", | |
| " <td>std</td>\n", | |
| " <td>two</td>\n", | |
| " <td>hatchback</td>\n", | |
| " <td>rwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>94.5</td>\n", | |
| " <td>0.822681</td>\n", | |
| " <td>...</td>\n", | |
| " <td>9.0</td>\n", | |
| " <td>154</td>\n", | |
| " <td>5000.0</td>\n", | |
| " <td>19</td>\n", | |
| " <td>26.0</td>\n", | |
| " <td>16500.0</td>\n", | |
| " <td>12.368421</td>\n", | |
| " <td>Medium</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>fwd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.8</td>\n", | |
| " <td>0.848630</td>\n", | |
| " <td>...</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>102</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>24</td>\n", | |
| " <td>30.0</td>\n", | |
| " <td>13950.0</td>\n", | |
| " <td>9.791667</td>\n", | |
| " <td>Low</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2</td>\n", | |
| " <td>164</td>\n", | |
| " <td>audi</td>\n", | |
| " <td>std</td>\n", | |
| " <td>four</td>\n", | |
| " <td>sedan</td>\n", | |
| " <td>4wd</td>\n", | |
| " <td>front</td>\n", | |
| " <td>99.4</td>\n", | |
| " <td>0.848630</td>\n", | |
| " <td>...</td>\n", | |
| " <td>8.0</td>\n", | |
| " <td>115</td>\n", | |
| " <td>5500.0</td>\n", | |
| " <td>18</td>\n", | |
| " <td>22.0</td>\n", | |
| " <td>17450.0</td>\n", | |
| " <td>13.055556</td>\n", | |
| " <td>Low</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 29 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " symboling normalized-losses make aspiration num-of-doors \\\n", | |
| "0 3 122 alfa-romero std two \n", | |
| "1 3 122 alfa-romero std two \n", | |
| "2 1 122 alfa-romero std two \n", | |
| "3 2 164 audi std four \n", | |
| "4 2 164 audi std four \n", | |
| "\n", | |
| " body-style drive-wheels engine-location wheel-base length ... \\\n", | |
| "0 convertible rwd front 88.6 0.811148 ... \n", | |
| "1 convertible rwd front 88.6 0.811148 ... \n", | |
| "2 hatchback rwd front 94.5 0.822681 ... \n", | |
| "3 sedan fwd front 99.8 0.848630 ... \n", | |
| "4 sedan 4wd front 99.4 0.848630 ... \n", | |
| "\n", | |
| " compression-ratio horsepower peak-rpm city-mpg highway-mpg price \\\n", | |
| "0 9.0 111 5000.0 21 27.0 13495.0 \n", | |
| "1 9.0 111 5000.0 21 27.0 16500.0 \n", | |
| "2 9.0 154 5000.0 19 26.0 16500.0 \n", | |
| "3 10.0 102 5500.0 24 30.0 13950.0 \n", | |
| "4 8.0 115 5500.0 18 22.0 17450.0 \n", | |
| "\n", | |
| " city-L/100km horsepower-binned diesel gas \n", | |
| "0 11.190476 Low 0 1 \n", | |
| "1 11.190476 Low 0 1 \n", | |
| "2 12.368421 Medium 0 1 \n", | |
| "3 9.791667 Low 0 1 \n", | |
| "4 13.055556 Low 0 1 \n", | |
| "\n", | |
| "[5 rows x 29 columns]" | |
| ] | |
| }, | |
| "execution_count": 65, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Question #4: \n", | |
| "As above, create indicator variable to the column of \"aspiration\": \"std\" to 0, while \"turbo\" to 1." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 66, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# get indicator variables of aspiration and assign it to data frame \"dummy_variable_2\"\n", | |
| "dummy_variable_2 = pd.get_dummies(df['aspiration'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 67, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# change column names for clarity\n", | |
| "dummy_variable_2.rename(columns={'std':'aspiration-std', 'turbo': 'aspiration-turbo'}, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>aspiration-std</th>\n", | |
| " <th>aspiration-turbo</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " aspiration-std aspiration-turbo\n", | |
| "0 1 0\n", | |
| "1 1 0\n", | |
| "2 1 0\n", | |
| "3 1 0\n", | |
| "4 1 0" | |
| ] | |
| }, | |
| "execution_count": 68, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# show first 5 instances of data frame \"dummy_variable_1\"\n", | |
| "dummy_variable_2.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Question #5: \n", | |
| "Merge the new dataframe to the original dataframe then drop the column 'aspiration'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#merge the new dataframe to the original datafram\n", | |
| "df = pd.concat([df, dummy_variable_2], axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 70, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# drop original column \"aspiration\" from \"df\"\n", | |
| "df.drop('aspiration', axis = 1, inplace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Save the new csv" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df.to_csv('clean_df.csv')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.8" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment