Created
February 13, 2017 08:53
-
-
Save soumikghosal/34ce215b047056e258b31e17f7dd05b2 to your computer and use it in GitHub Desktop.
Implementing Naive Bayes without using sklearn.naive_bayes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "data=pd.read_csv(\"C:\\\\Users\\\\COM\\\\Desktop\\\\Test\\\\Q2-tennis.csv\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df=pd.DataFrame(data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Outlook</th>\n", | |
| " <th>Temp.</th>\n", | |
| " <th>Humidity</th>\n", | |
| " <th>Windy</th>\n", | |
| " <th>Play</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>sunny</td>\n", | |
| " <td>hot</td>\n", | |
| " <td>high</td>\n", | |
| " <td>false</td>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>sunny</td>\n", | |
| " <td>hot</td>\n", | |
| " <td>high</td>\n", | |
| " <td>true</td>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>overcast</td>\n", | |
| " <td>hot</td>\n", | |
| " <td>high</td>\n", | |
| " <td>false</td>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>rainy</td>\n", | |
| " <td>mild</td>\n", | |
| " <td>high</td>\n", | |
| " <td>false</td>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>rainy</td>\n", | |
| " <td>cool</td>\n", | |
| " <td>normal</td>\n", | |
| " <td>false</td>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Outlook Temp. Humidity Windy Play\n", | |
| "0 sunny hot high false no\n", | |
| "1 sunny hot high true no\n", | |
| "2 overcast hot high false yes\n", | |
| "3 rainy mild high false yes\n", | |
| "4 rainy cool normal false yes" | |
| ] | |
| }, | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "RangeIndex(start=0, stop=14, step=1)" | |
| ] | |
| }, | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.index" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['Outlook', 'Temp.', 'Humidity', 'Windy', 'Play'], dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "yes 0.642857\n", | |
| "no 0.357143\n", | |
| "Name: Play, dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "g=pd.value_counts(df.Play)/len(df.index)\n", | |
| "g" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Frequency Table" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>All</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Outlook</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>overcast</th>\n", | |
| " <td>0</td>\n", | |
| " <td>4</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>rainy</th>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>sunny</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>All</th>\n", | |
| " <td>5</td>\n", | |
| " <td>9</td>\n", | |
| " <td>14</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes All\n", | |
| "Outlook \n", | |
| "overcast 0 4 4\n", | |
| "rainy 2 3 5\n", | |
| "sunny 3 2 5\n", | |
| "All 5 9 14" | |
| ] | |
| }, | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Outlook_play=pd.crosstab(df.Outlook,df.Play,margins='TRUE')\n", | |
| "Outlook_play" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Smoothing: changing the value 0's to 1's" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "for i in range(len(Outlook_play.index)):\n", | |
| " for j in range(len(Outlook_play.columns)):\n", | |
| " if(Outlook_play.ix[i,j]==0):\n", | |
| " Outlook_play.ix[i,j]=1\n", | |
| " \n", | |
| "Outlook_play.ix[0,2]=Outlook_play.ix[0,1]+Outlook_play.ix[0,0]\n", | |
| "Outlook_play.ix[3,0]=Outlook_play.ix[2,0]+Outlook_play.ix[1,0]+Outlook_play.ix[0,0]\n", | |
| "Outlook_play.ix[3,2]=Outlook_play.ix[3,0]+Outlook_play.ix[3,1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>All</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Outlook</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>overcast</th>\n", | |
| " <td>1</td>\n", | |
| " <td>4</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>rainy</th>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>sunny</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>All</th>\n", | |
| " <td>6</td>\n", | |
| " <td>9</td>\n", | |
| " <td>15</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes All\n", | |
| "Outlook \n", | |
| "overcast 1 4 5\n", | |
| "rainy 2 3 5\n", | |
| "sunny 3 2 5\n", | |
| "All 6 9 15" | |
| ] | |
| }, | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Outlook_play" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>All</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Temp.</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>cool</th>\n", | |
| " <td>1</td>\n", | |
| " <td>3</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>hot</th>\n", | |
| " <td>2</td>\n", | |
| " <td>2</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>mild</th>\n", | |
| " <td>2</td>\n", | |
| " <td>4</td>\n", | |
| " <td>6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>All</th>\n", | |
| " <td>5</td>\n", | |
| " <td>9</td>\n", | |
| " <td>14</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes All\n", | |
| "Temp. \n", | |
| "cool 1 3 4\n", | |
| "hot 2 2 4\n", | |
| "mild 2 4 6\n", | |
| "All 5 9 14" | |
| ] | |
| }, | |
| "execution_count": 45, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Temp_play=pd.crosstab(df['Temp.'],df.Play,margins='TRUE')\n", | |
| "Temp_play" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>All</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Humidity</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>high</th>\n", | |
| " <td>4</td>\n", | |
| " <td>3</td>\n", | |
| " <td>7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>normal</th>\n", | |
| " <td>1</td>\n", | |
| " <td>6</td>\n", | |
| " <td>7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>All</th>\n", | |
| " <td>5</td>\n", | |
| " <td>9</td>\n", | |
| " <td>14</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes All\n", | |
| "Humidity \n", | |
| "high 4 3 7\n", | |
| "normal 1 6 7\n", | |
| "All 5 9 14" | |
| ] | |
| }, | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Humidity_play=pd.crosstab(df.Humidity,df.Play,margins='TRUE')\n", | |
| "Humidity_play" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>All</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Windy</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>false</th>\n", | |
| " <td>2</td>\n", | |
| " <td>6</td>\n", | |
| " <td>8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>true</th>\n", | |
| " <td>3</td>\n", | |
| " <td>3</td>\n", | |
| " <td>6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>All</th>\n", | |
| " <td>5</td>\n", | |
| " <td>9</td>\n", | |
| " <td>14</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes All\n", | |
| "Windy \n", | |
| "false 2 6 8\n", | |
| "true 3 3 6\n", | |
| "All 5 9 14" | |
| ] | |
| }, | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Windy_play=pd.crosstab(df.Windy,df.Play,margins='TRUE')\n", | |
| "Windy_play" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Frequency Table for probability" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "Outlook_play.ix[0:3:,1]=Outlook_play.ix[0:3,1]/Outlook_play.ix[3,1]\n", | |
| "Outlook_play.ix[0:3:,0]=Outlook_play.ix[0:3,0]/Outlook_play.ix[3,0]\n", | |
| "Outlook_play.ix[0:3:,2]=Outlook_play.ix[0:3,2]/Outlook_play.ix[3,2]\n", | |
| "Outlook_play.ix[3,:]=Outlook_play.ix[3,:]/Outlook_play.ix[3,2]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>P(x)</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Outlook</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>overcast</th>\n", | |
| " <td>0.166667</td>\n", | |
| " <td>0.444444</td>\n", | |
| " <td>0.333333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>rainy</th>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.333333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>sunny</th>\n", | |
| " <td>0.500000</td>\n", | |
| " <td>0.222222</td>\n", | |
| " <td>0.333333</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>P(c)</th>\n", | |
| " <td>0.400000</td>\n", | |
| " <td>0.600000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes P(x)\n", | |
| "Outlook \n", | |
| "overcast 0.166667 0.444444 0.333333\n", | |
| "rainy 0.333333 0.333333 0.333333\n", | |
| "sunny 0.500000 0.222222 0.333333\n", | |
| "P(c) 0.400000 0.600000 1.000000" | |
| ] | |
| }, | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Outlook_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "Temp_play.ix[0:3:,1]=Temp_play.ix[0:3,1]/Temp_play.ix[3,1]\n", | |
| "Temp_play.ix[0:3:,0]=Temp_play.ix[0:3,0]/Temp_play.ix[3,0]\n", | |
| "Temp_play.ix[0:3:,2]=Temp_play.ix[0:3,2]/Temp_play.ix[3,2]\n", | |
| "Temp_play.ix[3,:]=Temp_play.ix[3,:]/Temp_play.ix[3,2]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>P(x)</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Temp.</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>cool</th>\n", | |
| " <td>0.200000</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.285714</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>hot</th>\n", | |
| " <td>0.400000</td>\n", | |
| " <td>0.222222</td>\n", | |
| " <td>0.285714</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>mild</th>\n", | |
| " <td>0.400000</td>\n", | |
| " <td>0.444444</td>\n", | |
| " <td>0.428571</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>P(c)</th>\n", | |
| " <td>0.357143</td>\n", | |
| " <td>0.642857</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes P(x)\n", | |
| "Temp. \n", | |
| "cool 0.200000 0.333333 0.285714\n", | |
| "hot 0.400000 0.222222 0.285714\n", | |
| "mild 0.400000 0.444444 0.428571\n", | |
| "P(c) 0.357143 0.642857 1.000000" | |
| ] | |
| }, | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Temp_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "Humidity_play.ix[0:2:,1]=Humidity_play.ix[0:2,1]/Humidity_play.ix[2,1]\n", | |
| "Humidity_play.ix[0:2:,0]=Humidity_play.ix[0:2,0]/Humidity_play.ix[2,0]\n", | |
| "Humidity_play.ix[0:2:,2]=Humidity_play.ix[0:2,2]/Humidity_play.ix[2,2]\n", | |
| "Humidity_play.ix[2,:]=Humidity_play.ix[2,:]/Humidity_play.ix[2,2]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>P(x)</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Humidity</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>high</th>\n", | |
| " <td>0.800000</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>normal</th>\n", | |
| " <td>0.200000</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>P(c)</th>\n", | |
| " <td>0.357143</td>\n", | |
| " <td>0.642857</td>\n", | |
| " <td>1.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes P(x)\n", | |
| "Humidity \n", | |
| "high 0.800000 0.333333 0.5\n", | |
| "normal 0.200000 0.666667 0.5\n", | |
| "P(c) 0.357143 0.642857 1.0" | |
| ] | |
| }, | |
| "execution_count": 54, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Humidity_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "Windy_play.ix[0:2:,1]=Windy_play.ix[0:2,1]/Windy_play.ix[2,1]\n", | |
| "Windy_play.ix[0:2:,0]=Windy_play.ix[0:2,0]/Windy_play.ix[2,0]\n", | |
| "Windy_play.ix[0:2:,2]=Windy_play.ix[0:2,2]/Windy_play.ix[2,2]\n", | |
| "Windy_play.ix[2,:]=Windy_play.ix[2,:]/Windy_play.ix[2,2]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Play</th>\n", | |
| " <th>no</th>\n", | |
| " <th>yes</th>\n", | |
| " <th>P(x)</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Windy</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>false</th>\n", | |
| " <td>0.400000</td>\n", | |
| " <td>0.666667</td>\n", | |
| " <td>0.571429</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>true</th>\n", | |
| " <td>0.600000</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>0.428571</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>P(c)</th>\n", | |
| " <td>0.357143</td>\n", | |
| " <td>0.642857</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Play no yes P(x)\n", | |
| "Windy \n", | |
| "false 0.400000 0.666667 0.571429\n", | |
| "true 0.600000 0.333333 0.428571\n", | |
| "P(c) 0.357143 0.642857 1.000000" | |
| ] | |
| }, | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Windy_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Prediction" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "pred_play=[]\n", | |
| "for i in range(len(df.index)):\n", | |
| " pcxy=Outlook_play.ix[df.ix[i,0],'yes']*Temp_play.ix[df.ix[i,1],'yes']*Humidity_play.ix[df.ix[i,2],'yes']*Windy_play.ix[df.ix[i,3],'yes']*g['yes']\n", | |
| " pcxn=Outlook_play.ix[df.ix[i,0],'no']*Temp_play.ix[df.ix[i,1],'no']*Humidity_play.ix[df.ix[i,2],'no']*Windy_play.ix[df.ix[i,3],'no']*g['no']\n", | |
| " yes_prob=pcxy/(pcxy+pcxn)\n", | |
| " no_prob=pcxn/(pcxy+pcxn)\n", | |
| " if(yes_prob > no_prob):\n", | |
| " pred_play.append(\"yes\")\n", | |
| " elif(yes_prob < no_prob):\n", | |
| " pred_play.append(\"no\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0\n", | |
| "0 no\n", | |
| "1 no\n", | |
| "2 yes\n", | |
| "3 yes\n", | |
| "4 yes\n", | |
| "5 yes\n", | |
| "6 yes\n", | |
| "7 no\n", | |
| "8 yes\n", | |
| "9 yes\n", | |
| "10 yes\n", | |
| "11 yes\n", | |
| "12 yes\n", | |
| "13 no" | |
| ] | |
| }, | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pred_play=pd.DataFrame(pred_play)\n", | |
| "pred_play" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 60, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Play</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>yes</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>no</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Play\n", | |
| "0 no\n", | |
| "1 no\n", | |
| "2 yes\n", | |
| "3 yes\n", | |
| "4 yes\n", | |
| "5 no\n", | |
| "6 yes\n", | |
| "7 no\n", | |
| "8 yes\n", | |
| "9 yes\n", | |
| "10 yes\n", | |
| "11 yes\n", | |
| "12 yes\n", | |
| "13 no" | |
| ] | |
| }, | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Expected=df[['Play']]\n", | |
| "Expected" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Calculating the Accuracy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 61, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from sklearn import metrics" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 62, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0.928571428571\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(metrics.accuracy_score(Expected,pred_play))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python [default]", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 1 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
• Implement k-fold cross-validation (e.g., 5-fold) for the Naïve Bayesian classifier on a given dataset. Calculate the average accuracy of the classifier over the k folds and report the results.