Created
October 23, 2018 20:25
-
-
Save Katba-Caroline/e9dc33a73e4efeed059e5118ad16d69d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Incident ID</th>\n", | |
| " <th>Incident Date</th>\n", | |
| " <th>Incident Time</th>\n", | |
| " <th>Day</th>\n", | |
| " <th>Incident County Name</th>\n", | |
| " <th>Incident State</th>\n", | |
| " <th>Victim ID</th>\n", | |
| " <th>Gender Desc</th>\n", | |
| " <th>Age Start Description</th>\n", | |
| " <th>Age End Desc</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Naloxone Administered</th>\n", | |
| " <th>Administration ID</th>\n", | |
| " <th>Dose Count</th>\n", | |
| " <th>Dose Unit</th>\n", | |
| " <th>Dose Desc</th>\n", | |
| " <th>Response Time Desc</th>\n", | |
| " <th>Survive</th>\n", | |
| " <th>Response_Desc</th>\n", | |
| " <th>Revive_Action_Desc</th>\n", | |
| " <th>Third_Party_Admin_Desc</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>UNKNOWN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>GOOD SAMARITAN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>MG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>UNKNOWN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>GOOD SAMARITAN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>MG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>26</td>\n", | |
| " <td>01/26/2018</td>\n", | |
| " <td>9:14:00</td>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>5</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>25</td>\n", | |
| " <td>29</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>27</td>\n", | |
| " <td>01/24/2018</td>\n", | |
| " <td>23:32:00</td>\n", | |
| " <td>Wednesday</td>\n", | |
| " <td>Beaver</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>4</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>28</td>\n", | |
| " <td>01/15/2018</td>\n", | |
| " <td>23:41:00</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Bucks</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>6</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>25</td>\n", | |
| " <td>29</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>29</td>\n", | |
| " <td>01/15/2018</td>\n", | |
| " <td>10:54:00</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Bucks</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>7</td>\n", | |
| " <td>Female</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>30</td>\n", | |
| " <td>01/01/2018</td>\n", | |
| " <td>13:07:00</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Philadelphia</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>8</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>32</td>\n", | |
| " <td>01/24/2018</td>\n", | |
| " <td>0:01:00</td>\n", | |
| " <td>Wednesday</td>\n", | |
| " <td>Cumberland</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>14</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>32</td>\n", | |
| " <td>01/24/2018</td>\n", | |
| " <td>0:01:00</td>\n", | |
| " <td>Wednesday</td>\n", | |
| " <td>Cumberland</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>14</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>33</td>\n", | |
| " <td>01/13/2018</td>\n", | |
| " <td>0:30:00</td>\n", | |
| " <td>Saturday</td>\n", | |
| " <td>Northumberland</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>10</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>34</td>\n", | |
| " <td>01/22/2018</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Montgomery</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>23</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>35</td>\n", | |
| " <td>01/12/2018</td>\n", | |
| " <td>11:00:00</td>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Pike</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>11</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>40</td>\n", | |
| " <td>49</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>36</td>\n", | |
| " <td>01/10/2018</td>\n", | |
| " <td>18:00:00</td>\n", | |
| " <td>Wednesday</td>\n", | |
| " <td>Armstrong</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>12</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>40</td>\n", | |
| " <td>49</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>37</td>\n", | |
| " <td>01/01/2018</td>\n", | |
| " <td>19:10:00</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Carbon</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>13</td>\n", | |
| " <td>Female</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>38</td>\n", | |
| " <td>01/23/2018</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Tuesday</td>\n", | |
| " <td>Carbon</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>15</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>39</td>\n", | |
| " <td>01/15/2018</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Montgomery</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>18</td>\n", | |
| " <td>Female</td>\n", | |
| " <td>25</td>\n", | |
| " <td>29</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>39</td>\n", | |
| " <td>01/15/2018</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Montgomery</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>17</td>\n", | |
| " <td>Female</td>\n", | |
| " <td>30</td>\n", | |
| " <td>39</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>39</td>\n", | |
| " <td>01/15/2018</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Monday</td>\n", | |
| " <td>Montgomery</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>16</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>20</td>\n", | |
| " <td>24</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>20 rows × 27 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Incident ID Incident Date Incident Time Day Incident County Name \\\n", | |
| "0 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "1 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "2 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "3 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "4 26 01/26/2018 9:14:00 Friday Chester \n", | |
| "5 27 01/24/2018 23:32:00 Wednesday Beaver \n", | |
| "6 28 01/15/2018 23:41:00 Monday Bucks \n", | |
| "7 29 01/15/2018 10:54:00 Monday Bucks \n", | |
| "8 30 01/01/2018 13:07:00 Monday Philadelphia \n", | |
| "9 32 01/24/2018 0:01:00 Wednesday Cumberland \n", | |
| "10 32 01/24/2018 0:01:00 Wednesday Cumberland \n", | |
| "11 33 01/13/2018 0:30:00 Saturday Northumberland \n", | |
| "12 34 01/22/2018 NaN Monday Montgomery \n", | |
| "13 35 01/12/2018 11:00:00 Friday Pike \n", | |
| "14 36 01/10/2018 18:00:00 Wednesday Armstrong \n", | |
| "15 37 01/01/2018 19:10:00 Monday Carbon \n", | |
| "16 38 01/23/2018 NaN Tuesday Carbon \n", | |
| "17 39 01/15/2018 NaN Monday Montgomery \n", | |
| "18 39 01/15/2018 NaN Monday Montgomery \n", | |
| "19 39 01/15/2018 NaN Monday Montgomery \n", | |
| "\n", | |
| " Incident State Victim ID Gender Desc Age Start Description Age End Desc \\\n", | |
| "0 Pennsylvania 1 Male 50 59 \n", | |
| "1 Pennsylvania 1 Male 50 59 \n", | |
| "2 Pennsylvania 1 Male 50 59 \n", | |
| "3 Pennsylvania 1 Male 50 59 \n", | |
| "4 Pennsylvania 5 Male 25 29 \n", | |
| "5 Pennsylvania 4 Male 30 39 \n", | |
| "6 Pennsylvania 6 Male 25 29 \n", | |
| "7 Pennsylvania 7 Female 30 39 \n", | |
| "8 Pennsylvania 8 Male 50 59 \n", | |
| "9 Pennsylvania 14 Male 30 39 \n", | |
| "10 Pennsylvania 14 Male 30 39 \n", | |
| "11 Pennsylvania 10 Male 30 39 \n", | |
| "12 Pennsylvania 23 Male 30 39 \n", | |
| "13 Pennsylvania 11 Male 40 49 \n", | |
| "14 Pennsylvania 12 Male 40 49 \n", | |
| "15 Pennsylvania 13 Female 30 39 \n", | |
| "16 Pennsylvania 15 Male 30 39 \n", | |
| "17 Pennsylvania 18 Female 25 29 \n", | |
| "18 Pennsylvania 17 Female 30 39 \n", | |
| "19 Pennsylvania 16 Male 20 24 \n", | |
| "\n", | |
| " ... Naloxone Administered Administration ID Dose Count \\\n", | |
| "0 ... Y 1.0 1.0 \n", | |
| "1 ... Y 2.0 1.0 \n", | |
| "2 ... Y 1.0 1.0 \n", | |
| "3 ... Y 2.0 1.0 \n", | |
| "4 ... N NaN NaN \n", | |
| "5 ... N NaN NaN \n", | |
| "6 ... N NaN NaN \n", | |
| "7 ... N NaN NaN \n", | |
| "8 ... N NaN NaN \n", | |
| "9 ... N NaN NaN \n", | |
| "10 ... N NaN NaN \n", | |
| "11 ... N NaN NaN \n", | |
| "12 ... N NaN NaN \n", | |
| "13 ... N NaN NaN \n", | |
| "14 ... N NaN NaN \n", | |
| "15 ... N NaN NaN \n", | |
| "16 ... N NaN NaN \n", | |
| "17 ... N NaN NaN \n", | |
| "18 ... N NaN NaN \n", | |
| "19 ... N NaN NaN \n", | |
| "\n", | |
| " Dose Unit Dose Desc Response Time Desc Survive Response_Desc \\\n", | |
| "0 0.0 UNKNOWN NaN N NO RESPONSE TO NALOXONE \n", | |
| "1 4.0 MG NaN N NO RESPONSE TO NALOXONE \n", | |
| "2 0.0 UNKNOWN NaN N NO RESPONSE TO NALOXONE \n", | |
| "3 4.0 MG NaN N NO RESPONSE TO NALOXONE \n", | |
| "4 NaN NaN NaN N NaN \n", | |
| "5 NaN NaN NaN Y NaN \n", | |
| "6 NaN NaN NaN Y NaN \n", | |
| "7 NaN NaN NaN Y NaN \n", | |
| "8 NaN NaN NaN Y NaN \n", | |
| "9 NaN NaN NaN N NaN \n", | |
| "10 NaN NaN NaN N NaN \n", | |
| "11 NaN NaN NaN N NaN \n", | |
| "12 NaN NaN NaN N NaN \n", | |
| "13 NaN NaN NaN N NaN \n", | |
| "14 NaN NaN NaN N NaN \n", | |
| "15 NaN NaN NaN N NaN \n", | |
| "16 NaN NaN NaN Y NaN \n", | |
| "17 NaN NaN NaN Y NaN \n", | |
| "18 NaN NaN NaN Y NaN \n", | |
| "19 NaN NaN NaN N NaN \n", | |
| "\n", | |
| " Revive_Action_Desc Third_Party_Admin_Desc \n", | |
| "0 NaN GOOD SAMARITAN \n", | |
| "1 NaN NaN \n", | |
| "2 NaN GOOD SAMARITAN \n", | |
| "3 NaN NaN \n", | |
| "4 NaN NaN \n", | |
| "5 NaN NaN \n", | |
| "6 NaN NaN \n", | |
| "7 NaN NaN \n", | |
| "8 NaN NaN \n", | |
| "9 NaN NaN \n", | |
| "10 NaN NaN \n", | |
| "11 NaN NaN \n", | |
| "12 NaN NaN \n", | |
| "13 NaN NaN \n", | |
| "14 NaN NaN \n", | |
| "15 NaN NaN \n", | |
| "16 NaN NaN \n", | |
| "17 NaN NaN \n", | |
| "18 NaN NaN \n", | |
| "19 NaN NaN \n", | |
| "\n", | |
| "[20 rows x 27 columns]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Import libraries necessary for this project\n", | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "from time import time\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import matplotlib.patches as mpatches\n", | |
| "\n", | |
| "\n", | |
| "# Pretty display for notebooks\n", | |
| "%matplotlib inline\n", | |
| "\n", | |
| "# Load the Census dataset\n", | |
| "data = pd.read_csv(\"Overdose_info.csv\")\n", | |
| "\n", | |
| "# Success - Display the first record\n", | |
| "display(data.head(n=20))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(4625, 27)" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#getting info on the data\n", | |
| "data.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Incident ID</th>\n", | |
| " <th>Incident Date</th>\n", | |
| " <th>Incident Time</th>\n", | |
| " <th>Day</th>\n", | |
| " <th>Incident County Name</th>\n", | |
| " <th>Incident State</th>\n", | |
| " <th>Victim ID</th>\n", | |
| " <th>Gender Desc</th>\n", | |
| " <th>Age Start Description</th>\n", | |
| " <th>Age End Desc</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Administration ID</th>\n", | |
| " <th>Dose Count</th>\n", | |
| " <th>Dose Unit</th>\n", | |
| " <th>Dose Desc</th>\n", | |
| " <th>Response Time Desc</th>\n", | |
| " <th>Survive</th>\n", | |
| " <th>Response_Desc</th>\n", | |
| " <th>Revive_Action_Desc</th>\n", | |
| " <th>Third_Party_Admin_Desc</th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>UNKNOWN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>GOOD SAMARITAN</td>\n", | |
| " <td>80.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>MG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>80.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>UNKNOWN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>GOOD SAMARITAN</td>\n", | |
| " <td>80.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>MG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>80.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>26</td>\n", | |
| " <td>01/26/2018</td>\n", | |
| " <td>9:14:00</td>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>5</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>25</td>\n", | |
| " <td>29</td>\n", | |
| " <td>...</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>40.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 28 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Incident ID Incident Date Incident Time Day Incident County Name \\\n", | |
| "0 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "1 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "2 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "3 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "4 26 01/26/2018 9:14:00 Friday Chester \n", | |
| "\n", | |
| " Incident State Victim ID Gender Desc Age Start Description Age End Desc \\\n", | |
| "0 Pennsylvania 1 Male 50 59 \n", | |
| "1 Pennsylvania 1 Male 50 59 \n", | |
| "2 Pennsylvania 1 Male 50 59 \n", | |
| "3 Pennsylvania 1 Male 50 59 \n", | |
| "4 Pennsylvania 5 Male 25 29 \n", | |
| "\n", | |
| " ... Administration ID Dose Count Dose Unit Dose Desc \\\n", | |
| "0 ... 1.0 1.0 0.0 UNKNOWN \n", | |
| "1 ... 2.0 1.0 4.0 MG \n", | |
| "2 ... 1.0 1.0 0.0 UNKNOWN \n", | |
| "3 ... 2.0 1.0 4.0 MG \n", | |
| "4 ... NaN NaN NaN NaN \n", | |
| "\n", | |
| " Response Time Desc Survive Response_Desc Revive_Action_Desc \\\n", | |
| "0 NaN N NO RESPONSE TO NALOXONE NaN \n", | |
| "1 NaN N NO RESPONSE TO NALOXONE NaN \n", | |
| "2 NaN N NO RESPONSE TO NALOXONE NaN \n", | |
| "3 NaN N NO RESPONSE TO NALOXONE NaN \n", | |
| "4 NaN N NaN NaN \n", | |
| "\n", | |
| " Third_Party_Admin_Desc Avg_Age \n", | |
| "0 GOOD SAMARITAN 80.0 \n", | |
| "1 NaN 80.0 \n", | |
| "2 GOOD SAMARITAN 80.0 \n", | |
| "3 NaN 80.0 \n", | |
| "4 NaN 40.0 \n", | |
| "\n", | |
| "[5 rows x 28 columns]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#creating new calculated column for average age\n", | |
| "data['Avg_Age'] = round(data['Age Start Description'] + data['Age End Desc']/2)\n", | |
| "data.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Incident ID</th>\n", | |
| " <th>Incident Date</th>\n", | |
| " <th>Incident Time</th>\n", | |
| " <th>Day</th>\n", | |
| " <th>Incident County Name</th>\n", | |
| " <th>Incident State</th>\n", | |
| " <th>Victim ID</th>\n", | |
| " <th>Gender Desc</th>\n", | |
| " <th>Age Start Description</th>\n", | |
| " <th>Age End Desc</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Dose Count</th>\n", | |
| " <th>Dose Unit</th>\n", | |
| " <th>Dose Desc</th>\n", | |
| " <th>Response Time Desc</th>\n", | |
| " <th>Survive</th>\n", | |
| " <th>Response_Desc</th>\n", | |
| " <th>Revive_Action_Desc</th>\n", | |
| " <th>Third_Party_Admin_Desc</th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " <th>Incident_Month</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>UNKNOWN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>GOOD SAMARITAN</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>MG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>UNKNOWN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>GOOD SAMARITAN</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>01/04/2018</td>\n", | |
| " <td>0:42:00</td>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>1</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>50</td>\n", | |
| " <td>59</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>4.0</td>\n", | |
| " <td>MG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NO RESPONSE TO NALOXONE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>26</td>\n", | |
| " <td>01/26/2018</td>\n", | |
| " <td>9:14:00</td>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>5</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>25</td>\n", | |
| " <td>29</td>\n", | |
| " <td>...</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>40.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 29 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Incident ID Incident Date Incident Time Day Incident County Name \\\n", | |
| "0 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "1 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "2 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "3 1 01/04/2018 0:42:00 Thursday Delaware \n", | |
| "4 26 01/26/2018 9:14:00 Friday Chester \n", | |
| "\n", | |
| " Incident State Victim ID Gender Desc Age Start Description Age End Desc \\\n", | |
| "0 Pennsylvania 1 Male 50 59 \n", | |
| "1 Pennsylvania 1 Male 50 59 \n", | |
| "2 Pennsylvania 1 Male 50 59 \n", | |
| "3 Pennsylvania 1 Male 50 59 \n", | |
| "4 Pennsylvania 5 Male 25 29 \n", | |
| "\n", | |
| " ... Dose Count Dose Unit Dose Desc Response Time Desc Survive \\\n", | |
| "0 ... 1.0 0.0 UNKNOWN NaN N \n", | |
| "1 ... 1.0 4.0 MG NaN N \n", | |
| "2 ... 1.0 0.0 UNKNOWN NaN N \n", | |
| "3 ... 1.0 4.0 MG NaN N \n", | |
| "4 ... NaN NaN NaN NaN N \n", | |
| "\n", | |
| " Response_Desc Revive_Action_Desc Third_Party_Admin_Desc Avg_Age \\\n", | |
| "0 NO RESPONSE TO NALOXONE NaN GOOD SAMARITAN 80.0 \n", | |
| "1 NO RESPONSE TO NALOXONE NaN NaN 80.0 \n", | |
| "2 NO RESPONSE TO NALOXONE NaN GOOD SAMARITAN 80.0 \n", | |
| "3 NO RESPONSE TO NALOXONE NaN NaN 80.0 \n", | |
| "4 NaN NaN NaN 40.0 \n", | |
| "\n", | |
| " Incident_Month \n", | |
| "0 1 \n", | |
| "1 1 \n", | |
| "2 1 \n", | |
| "3 1 \n", | |
| "4 1 \n", | |
| "\n", | |
| "[5 rows x 29 columns]" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Extracting only the month out of that data\n", | |
| "data['Incident_Month'] = pd.DatetimeIndex(data['Incident Date']).month\n", | |
| "data.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Day</th>\n", | |
| " <th>Incident County Name</th>\n", | |
| " <th>Gender Desc</th>\n", | |
| " <th>Race</th>\n", | |
| " <th>Ethnicity Description</th>\n", | |
| " <th>Victim State</th>\n", | |
| " <th>Victim County</th>\n", | |
| " <th>Accidental Exposure</th>\n", | |
| " <th>Susp OD Drug Desc</th>\n", | |
| " <th>Naloxone Administered</th>\n", | |
| " <th>Survive</th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " <th>Incident_Month</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>COCAINE/CRACK</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>COCAINE/CRACK</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>40.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Day Incident County Name Gender Desc Race Ethnicity Description \\\n", | |
| "0 Thursday Delaware Male White Not Hispanic \n", | |
| "1 Thursday Delaware Male White Not Hispanic \n", | |
| "2 Thursday Delaware Male White Not Hispanic \n", | |
| "3 Thursday Delaware Male White Not Hispanic \n", | |
| "4 Friday Chester Male White Not Hispanic \n", | |
| "\n", | |
| " Victim State Victim County Accidental Exposure Susp OD Drug Desc \\\n", | |
| "0 Pennsylvania Delaware N COCAINE/CRACK \n", | |
| "1 Pennsylvania Delaware N COCAINE/CRACK \n", | |
| "2 Pennsylvania Delaware N HEROIN \n", | |
| "3 Pennsylvania Delaware N HEROIN \n", | |
| "4 Pennsylvania Chester N HEROIN \n", | |
| "\n", | |
| " Naloxone Administered Survive Avg_Age Incident_Month \n", | |
| "0 Y N 80.0 1 \n", | |
| "1 Y N 80.0 1 \n", | |
| "2 Y N 80.0 1 \n", | |
| "3 Y N 80.0 1 \n", | |
| "4 N N 40.0 1 " | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Removing columns in place here\n", | |
| "#FYI it would be great if we could use Response Time, please fill it out!\n", | |
| "data.drop(columns = ['Incident ID','Incident Time','Incident Date','Incident State',\n", | |
| " 'Victim ID','Age Start Description','Age End Desc',\n", | |
| " 'Administration ID', 'Dose Count', 'Dose Unit','Dose Desc',\n", | |
| " 'Response Time Desc', 'Victim OD Drug ID', 'Response_Desc',\n", | |
| " 'Revive_Action_Desc', 'Third_Party_Admin_Desc'], inplace=True )\n", | |
| "data.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Day 0\n", | |
| "Incident County Name 0\n", | |
| "Gender Desc 0\n", | |
| "Race 0\n", | |
| "Ethnicity Description 0\n", | |
| "Victim State 0\n", | |
| "Victim County 0\n", | |
| "Accidental Exposure 0\n", | |
| "Susp OD Drug Desc 0\n", | |
| "Naloxone Administered 0\n", | |
| "Survive 0\n", | |
| "Avg_Age 0\n", | |
| "Incident_Month 0\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#checking how many null values are available\n", | |
| "data.isnull().sum()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(4625, 13)" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Day</th>\n", | |
| " <th>Incident_County_Name</th>\n", | |
| " <th>Gender</th>\n", | |
| " <th>Race</th>\n", | |
| " <th>Ethnicity</th>\n", | |
| " <th>Victim_State</th>\n", | |
| " <th>Victim_County</th>\n", | |
| " <th>Accidental_Exposure</th>\n", | |
| " <th>Susp_OD_Drug</th>\n", | |
| " <th>Naloxone_Administered</th>\n", | |
| " <th>Survive</th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " <th>Incident_Month</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>COCAINE/CRACK</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>COCAINE/CRACK</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>40.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Day Incident_County_Name Gender Race Ethnicity Victim_State \\\n", | |
| "0 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "1 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "2 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "3 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "4 Friday Chester Male White Not Hispanic Pennsylvania \n", | |
| "\n", | |
| " Victim_County Accidental_Exposure Susp_OD_Drug Naloxone_Administered \\\n", | |
| "0 Delaware N COCAINE/CRACK Y \n", | |
| "1 Delaware N COCAINE/CRACK Y \n", | |
| "2 Delaware N HEROIN Y \n", | |
| "3 Delaware N HEROIN Y \n", | |
| "4 Chester N HEROIN N \n", | |
| "\n", | |
| " Survive Avg_Age Incident_Month \n", | |
| "0 N 80.0 1 \n", | |
| "1 N 80.0 1 \n", | |
| "2 N 80.0 1 \n", | |
| "3 N 80.0 1 \n", | |
| "4 N 40.0 1 " | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# renaming the columns for easier handling\n", | |
| "data.rename(columns={'Incident County Name': 'Incident_County_Name', \n", | |
| " 'Gender Desc': 'Gender',\n", | |
| " 'Ethnicity Description': 'Ethnicity', \n", | |
| " 'Victim State': 'Victim_State', \n", | |
| " 'Victim County': 'Victim_County',\n", | |
| " 'Accidental Exposure': 'Accidental_Exposure', \n", | |
| " 'Susp OD Drug Desc': 'Susp_OD_Drug',\n", | |
| " 'Naloxone Administered': 'Naloxone_Administered'}, inplace=True)\n", | |
| "data.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x2bd412a9cf8>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# here we set the figure size to 15x10\n", | |
| "plt.figure(figsize=(15, 10))\n", | |
| "\n", | |
| "plt.scatter(data.Avg_Age, data.Susp_OD_Drug)\n", | |
| "plt.xlabel(\"Average Age\", fontsize=20)\n", | |
| "plt.ylabel(\"Drug\", fontsize=20)\n", | |
| "plt.title(\"Scatter plot of Suspected Drugs and Age\",fontsize=22)\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Y 2993\n", | |
| "N 1632\n", | |
| "Name: Naloxone_Administered, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data['Naloxone_Administered'].value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x2bd4190da58>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "plt.figure(figsize=(18,10))\n", | |
| "data.Naloxone_Administered.value_counts().nlargest(20).plot(kind='barh')\n", | |
| "plt.xlabel('Naloxone Administered', fontsize=20)\n", | |
| "plt.title(\"Number of Naloxone Doses Administered\",fontsize=20)\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Y 3445\n", | |
| "N 929\n", | |
| "U 251\n", | |
| "Name: Survive, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data['Survive'].value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(4374, 13)" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Dropping the U (unknown if survive or no)\n", | |
| "\n", | |
| "data.drop(data[data['Survive']==\"U\"].index, inplace=True)\n", | |
| "data.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x2bd3f6bec50>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "plt.figure(figsize=(17,8))\n", | |
| "data.Survive.value_counts().plot(kind='bar')\n", | |
| "plt.xlabel('Survive', fontsize=20)\n", | |
| "plt.title(\"Number of Survivers of Overdoses\",fontsize=20)\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Correlation Matrix" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " Avg_Age Incident_Month\n", | |
| "Avg_Age 1.00000 0.03179\n", | |
| "Incident_Month 0.03179 1.00000\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x2bd439f9860>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "import seaborn as sns\n", | |
| "sns.set_style(\"white\")\n", | |
| "#Correlation Matrix\n", | |
| "# Generate a custom diverging colormap\n", | |
| "cmap = sns.diverging_palette(220, 10, as_cmap=True)\n", | |
| "# Set up the matplotlib figure\n", | |
| "f, ax = plt.subplots(figsize=(15, 10))\n", | |
| "# Compute the correlation matrix\n", | |
| "corr = data.corr()\n", | |
| "print(corr)\n", | |
| "# Generate a mask for the upper triangle\n", | |
| "mask = np.zeros_like(corr, dtype=np.bool)\n", | |
| "mask[np.triu_indices_from(mask)] = True\n", | |
| "# Draw the heatmap with the mask and correct aspect ratio\n", | |
| "sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,\n", | |
| " square=True, linewidths=.5, cbar_kws={\"shrink\": .5})\n", | |
| "plt.title('Correlation matrix', \n", | |
| " fontsize = 20)\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x2bd41965908>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "plt.matshow(data.corr())\n", | |
| "plt.xticks(range(len(data.columns)), data.columns)\n", | |
| "plt.yticks(range(len(data.columns)), data.columns)\n", | |
| "plt.colorbar()\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### these graphs above show us that there really isn't any correlation so we need to explore different avenue to understand the data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Day</th>\n", | |
| " <th>Incident_County_Name</th>\n", | |
| " <th>Gender</th>\n", | |
| " <th>Race</th>\n", | |
| " <th>Ethnicity</th>\n", | |
| " <th>Victim_State</th>\n", | |
| " <th>Victim_County</th>\n", | |
| " <th>Accidental_Exposure</th>\n", | |
| " <th>Susp_OD_Drug</th>\n", | |
| " <th>Naloxone_Administered</th>\n", | |
| " <th>Survive</th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " <th>Incident_Month</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>COCAINE/CRACK</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>COCAINE/CRACK</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Thursday</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Delaware</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>Y</td>\n", | |
| " <td>N</td>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Friday</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>Male</td>\n", | |
| " <td>White</td>\n", | |
| " <td>Not Hispanic</td>\n", | |
| " <td>Pennsylvania</td>\n", | |
| " <td>Chester</td>\n", | |
| " <td>N</td>\n", | |
| " <td>HEROIN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>40.0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Day Incident_County_Name Gender Race Ethnicity Victim_State \\\n", | |
| "0 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "1 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "2 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "3 Thursday Delaware Male White Not Hispanic Pennsylvania \n", | |
| "4 Friday Chester Male White Not Hispanic Pennsylvania \n", | |
| "\n", | |
| " Victim_County Accidental_Exposure Susp_OD_Drug Naloxone_Administered \\\n", | |
| "0 Delaware N COCAINE/CRACK Y \n", | |
| "1 Delaware N COCAINE/CRACK Y \n", | |
| "2 Delaware N HEROIN Y \n", | |
| "3 Delaware N HEROIN Y \n", | |
| "4 Chester N HEROIN N \n", | |
| "\n", | |
| " Survive Avg_Age Incident_Month \n", | |
| "0 N 80.0 1 \n", | |
| "1 N 80.0 1 \n", | |
| "2 N 80.0 1 \n", | |
| "3 N 80.0 1 \n", | |
| "4 N 40.0 1 " | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Dealing with categorical Values\n", | |
| "## Preprocessing to get it cleaned up" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X = data[['Day', 'Incident_County_Name', 'Gender', 'Race', 'Ethnicity',\n", | |
| " 'Victim_State', 'Victim_County', 'Accidental_Exposure',\n", | |
| " 'Susp_OD_Drug','Naloxone_Administered', 'Avg_Age', 'Incident_Month' ]]\n", | |
| "Y = data[['Survive']]\n", | |
| "\n", | |
| "X = pd.get_dummies(data=X)\n", | |
| "Y = pd.get_dummies(data=Y)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " <th>Incident_Month</th>\n", | |
| " <th>Day_Friday</th>\n", | |
| " <th>Day_Monday</th>\n", | |
| " <th>Day_Saturday</th>\n", | |
| " <th>Day_Sunday</th>\n", | |
| " <th>Day_Thursday</th>\n", | |
| " <th>Day_Tuesday</th>\n", | |
| " <th>Day_Wednesday</th>\n", | |
| " <th>Incident_County_Name_Adams</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Susp_OD_Drug_METHAMPHETAMINE</th>\n", | |
| " <th>Susp_OD_Drug_OTHER</th>\n", | |
| " <th>Susp_OD_Drug_PHARMACEUTICAL OPIOID</th>\n", | |
| " <th>Susp_OD_Drug_PHARMACEUTICAL OTHER</th>\n", | |
| " <th>Susp_OD_Drug_PHARMACEUTICAL STIMULANT</th>\n", | |
| " <th>Susp_OD_Drug_SUBOXONE</th>\n", | |
| " <th>Susp_OD_Drug_SYNTHETIC MARIJUANA</th>\n", | |
| " <th>Susp_OD_Drug_UNKNOWN</th>\n", | |
| " <th>Naloxone_Administered_N</th>\n", | |
| " <th>Naloxone_Administered_Y</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>40.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 185 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Avg_Age Incident_Month Day_Friday Day_Monday Day_Saturday Day_Sunday \\\n", | |
| "0 80.0 1 0 0 0 0 \n", | |
| "1 80.0 1 0 0 0 0 \n", | |
| "2 80.0 1 0 0 0 0 \n", | |
| "3 80.0 1 0 0 0 0 \n", | |
| "4 40.0 1 1 0 0 0 \n", | |
| "\n", | |
| " Day_Thursday Day_Tuesday Day_Wednesday Incident_County_Name_Adams \\\n", | |
| "0 1 0 0 0 \n", | |
| "1 1 0 0 0 \n", | |
| "2 1 0 0 0 \n", | |
| "3 1 0 0 0 \n", | |
| "4 0 0 0 0 \n", | |
| "\n", | |
| " ... Susp_OD_Drug_METHAMPHETAMINE Susp_OD_Drug_OTHER \\\n", | |
| "0 ... 0 0 \n", | |
| "1 ... 0 0 \n", | |
| "2 ... 0 0 \n", | |
| "3 ... 0 0 \n", | |
| "4 ... 0 0 \n", | |
| "\n", | |
| " Susp_OD_Drug_PHARMACEUTICAL OPIOID Susp_OD_Drug_PHARMACEUTICAL OTHER \\\n", | |
| "0 0 0 \n", | |
| "1 0 0 \n", | |
| "2 0 0 \n", | |
| "3 0 0 \n", | |
| "4 0 0 \n", | |
| "\n", | |
| " Susp_OD_Drug_PHARMACEUTICAL STIMULANT Susp_OD_Drug_SUBOXONE \\\n", | |
| "0 0 0 \n", | |
| "1 0 0 \n", | |
| "2 0 0 \n", | |
| "3 0 0 \n", | |
| "4 0 0 \n", | |
| "\n", | |
| " Susp_OD_Drug_SYNTHETIC MARIJUANA Susp_OD_Drug_UNKNOWN \\\n", | |
| "0 0 0 \n", | |
| "1 0 0 \n", | |
| "2 0 0 \n", | |
| "3 0 0 \n", | |
| "4 0 0 \n", | |
| "\n", | |
| " Naloxone_Administered_N Naloxone_Administered_Y \n", | |
| "0 0 1 \n", | |
| "1 0 1 \n", | |
| "2 0 1 \n", | |
| "3 0 1 \n", | |
| "4 1 0 \n", | |
| "\n", | |
| "[5 rows x 185 columns]" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Survive_N</th>\n", | |
| " <th>Survive_Y</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Survive_N Survive_Y\n", | |
| "0 1 0\n", | |
| "1 1 0\n", | |
| "2 1 0\n", | |
| "3 1 0\n", | |
| "4 1 0" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Y.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Survive</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Survive\n", | |
| "0 0\n", | |
| "1 0\n", | |
| "2 0\n", | |
| "3 0\n", | |
| "4 0" | |
| ] | |
| }, | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Y = Y.drop(['Survive_N'], axis=1)\n", | |
| "Y.rename(columns={'Survive_Y': 'Survive'}, inplace=True)\n", | |
| "Y.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Preparing and Training the Model" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1 3445\n", | |
| "0 929\n", | |
| "Name: Survive, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Y.Survive.value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "C:\\Users\\Frank the Tank\\Anaconda3\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", | |
| " from ._conv import register_converters as _register_converters\n", | |
| "Using TensorFlow backend.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[[80. 1. 0. ... 0. 0. 1.]\n", | |
| " [80. 1. 0. ... 0. 0. 1.]\n", | |
| " [80. 1. 0. ... 0. 0. 1.]\n", | |
| " ...\n", | |
| " [50. 1. 0. ... 0. 1. 0.]\n", | |
| " [80. 1. 0. ... 0. 1. 0.]\n", | |
| " [50. 1. 0. ... 0. 1. 0.]]\n", | |
| "[[1. 0.]\n", | |
| " [1. 0.]\n", | |
| " [1. 0.]\n", | |
| " [1. 0.]\n", | |
| " [1. 0.]\n", | |
| " [0. 1.]\n", | |
| " [0. 1.]\n", | |
| " [0. 1.]\n", | |
| " [0. 1.]\n", | |
| " [1. 0.]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import keras\n", | |
| "\n", | |
| "# Separate data and one-hot encode the output\n", | |
| "# Note: We're also turning the data into numpy arrays, in order to train the model in Keras\n", | |
| "X = np.array(X)\n", | |
| "Y = np.array(keras.utils.to_categorical(Y, 2))\n", | |
| "\n", | |
| "\n", | |
| "print(X[:10])\n", | |
| "print(Y[:10])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "_________________________________________________________________\n", | |
| "Layer (type) Output Shape Param # \n", | |
| "=================================================================\n", | |
| "dense_1 (Dense) (None, 286) 53196 \n", | |
| "_________________________________________________________________\n", | |
| "dropout_1 (Dropout) (None, 286) 0 \n", | |
| "_________________________________________________________________\n", | |
| "dense_2 (Dense) (None, 64) 18368 \n", | |
| "_________________________________________________________________\n", | |
| "dropout_2 (Dropout) (None, 64) 0 \n", | |
| "_________________________________________________________________\n", | |
| "dense_3 (Dense) (None, 2) 130 \n", | |
| "=================================================================\n", | |
| "Total params: 71,694\n", | |
| "Trainable params: 71,694\n", | |
| "Non-trainable params: 0\n", | |
| "_________________________________________________________________\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Imports\n", | |
| "import numpy as np\n", | |
| "from keras.models import Sequential\n", | |
| "from keras.layers.core import Dense, Dropout, Activation\n", | |
| "from keras.optimizers import SGD\n", | |
| "from keras.utils import np_utils\n", | |
| "\n", | |
| "# Building the model\n", | |
| "model = Sequential()\n", | |
| "\n", | |
| "#get number of columns in training data\n", | |
| "n_cols = X.shape[1]\n", | |
| "#rely is rectified linear, default go-to activiation function, can be tweeked\n", | |
| "model.add(Dense(286, activation='relu', input_shape=(n_cols,)))\n", | |
| "model.add(Dropout(.2))\n", | |
| "model.add(Dense(64, activation='relu'))\n", | |
| "model.add(Dropout(.1))\n", | |
| "#this here is our output layer and will have our number of classifications\n", | |
| "# so we will use softmas for a probability distribution\n", | |
| "model.add(Dense(2, activation='softmax'))\n", | |
| "\n", | |
| "# Compiling the model\n", | |
| "model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", | |
| "model.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Training the model\n", | |
| "from keras.callbacks import EarlyStopping\n", | |
| "#set early stopping monitor so the model stops training when it won't improve anymore\n", | |
| "early_stopping_monitor = EarlyStopping(patience=3)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Train on 3061 samples, validate on 1313 samples\n", | |
| "Epoch 1/200\n", | |
| "3061/3061 [==============================] - 1s 322us/step - loss: 0.6806 - acc: 0.7119 - val_loss: 0.5522 - val_acc: 0.8111\n", | |
| "Epoch 2/200\n", | |
| "3061/3061 [==============================] - 0s 86us/step - loss: 0.5405 - acc: 0.7615 - val_loss: 0.4793 - val_acc: 0.8111\n", | |
| "Epoch 3/200\n", | |
| "3061/3061 [==============================] - 0s 98us/step - loss: 0.5075 - acc: 0.7805 - val_loss: 0.4310 - val_acc: 0.8119\n", | |
| "Epoch 4/200\n", | |
| "3061/3061 [==============================] - 0s 82us/step - loss: 0.4653 - acc: 0.7893 - val_loss: 0.4634 - val_acc: 0.8119\n", | |
| "Epoch 5/200\n", | |
| "3061/3061 [==============================] - 0s 77us/step - loss: 0.4537 - acc: 0.7890 - val_loss: 0.4139 - val_acc: 0.8111\n", | |
| "Epoch 6/200\n", | |
| "3061/3061 [==============================] - 0s 80us/step - loss: 0.4406 - acc: 0.8063 - val_loss: 0.3995 - val_acc: 0.8324\n", | |
| "Epoch 7/200\n", | |
| "3061/3061 [==============================] - 0s 72us/step - loss: 0.4270 - acc: 0.8122 - val_loss: 0.4095 - val_acc: 0.8180\n", | |
| "Epoch 8/200\n", | |
| "3061/3061 [==============================] - 0s 71us/step - loss: 0.4175 - acc: 0.8118 - val_loss: 0.4069 - val_acc: 0.8126\n", | |
| "Epoch 9/200\n", | |
| "3061/3061 [==============================] - 0s 66us/step - loss: 0.4186 - acc: 0.8053 - val_loss: 0.4093 - val_acc: 0.8210\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<keras.callbacks.History at 0x2bd47b00a20>" | |
| ] | |
| }, | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model.fit(X, Y, validation_split=0.3, epochs=200, batch_size=100, \n", | |
| " verbose=1, callbacks=[early_stopping_monitor] )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "4374/4374 [==============================] - 0s 35us/step\n", | |
| "\n", | |
| " Model Accuracy: 0.8214449016645612\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Scoring the model\n", | |
| "# Evaluating the model on the training and testing set\n", | |
| "score = model.evaluate(X, Y)\n", | |
| "print(\"\\n Model Accuracy:\", score[1])\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Save the Model to .h5 format" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<keras.engine.sequential.Sequential at 0x2bd494b37b8>" | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model.save('OD_Survive2.h5')\n", | |
| "Survive_model = keras.models.load_model('OD_Survive2.h5')\n", | |
| "Survive_model" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Making a new Prediction\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Avg_Age</th>\n", | |
| " <th>Incident_Month</th>\n", | |
| " <th>Day_Friday</th>\n", | |
| " <th>Day_Monday</th>\n", | |
| " <th>Day_Saturday</th>\n", | |
| " <th>Day_Sunday</th>\n", | |
| " <th>Day_Thursday</th>\n", | |
| " <th>Day_Tuesday</th>\n", | |
| " <th>Day_Wednesday</th>\n", | |
| " <th>Incident_County_Name_Adams</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Susp_OD_Drug_METHAMPHETAMINE</th>\n", | |
| " <th>Susp_OD_Drug_OTHER</th>\n", | |
| " <th>Susp_OD_Drug_PHARMACEUTICAL OPIOID</th>\n", | |
| " <th>Susp_OD_Drug_PHARMACEUTICAL OTHER</th>\n", | |
| " <th>Susp_OD_Drug_PHARMACEUTICAL STIMULANT</th>\n", | |
| " <th>Susp_OD_Drug_SUBOXONE</th>\n", | |
| " <th>Susp_OD_Drug_SYNTHETIC MARIJUANA</th>\n", | |
| " <th>Susp_OD_Drug_UNKNOWN</th>\n", | |
| " <th>Naloxone_Administered_N</th>\n", | |
| " <th>Naloxone_Administered_Y</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>80.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>40.0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 185 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Avg_Age Incident_Month Day_Friday Day_Monday Day_Saturday Day_Sunday \\\n", | |
| "0 80.0 1 0 0 0 0 \n", | |
| "1 80.0 1 0 0 0 0 \n", | |
| "2 80.0 1 0 0 0 0 \n", | |
| "3 80.0 1 0 0 0 0 \n", | |
| "4 40.0 1 1 0 0 0 \n", | |
| "\n", | |
| " Day_Thursday Day_Tuesday Day_Wednesday Incident_County_Name_Adams \\\n", | |
| "0 1 0 0 0 \n", | |
| "1 1 0 0 0 \n", | |
| "2 1 0 0 0 \n", | |
| "3 1 0 0 0 \n", | |
| "4 0 0 0 0 \n", | |
| "\n", | |
| " ... Susp_OD_Drug_METHAMPHETAMINE Susp_OD_Drug_OTHER \\\n", | |
| "0 ... 0 0 \n", | |
| "1 ... 0 0 \n", | |
| "2 ... 0 0 \n", | |
| "3 ... 0 0 \n", | |
| "4 ... 0 0 \n", | |
| "\n", | |
| " Susp_OD_Drug_PHARMACEUTICAL OPIOID Susp_OD_Drug_PHARMACEUTICAL OTHER \\\n", | |
| "0 0 0 \n", | |
| "1 0 0 \n", | |
| "2 0 0 \n", | |
| "3 0 0 \n", | |
| "4 0 0 \n", | |
| "\n", | |
| " Susp_OD_Drug_PHARMACEUTICAL STIMULANT Susp_OD_Drug_SUBOXONE \\\n", | |
| "0 0 0 \n", | |
| "1 0 0 \n", | |
| "2 0 0 \n", | |
| "3 0 0 \n", | |
| "4 0 0 \n", | |
| "\n", | |
| " Susp_OD_Drug_SYNTHETIC MARIJUANA Susp_OD_Drug_UNKNOWN \\\n", | |
| "0 0 0 \n", | |
| "1 0 0 \n", | |
| "2 0 0 \n", | |
| "3 0 0 \n", | |
| "4 0 0 \n", | |
| "\n", | |
| " Naloxone_Administered_N Naloxone_Administered_Y \n", | |
| "0 0 1 \n", | |
| "1 0 1 \n", | |
| "2 0 1 \n", | |
| "3 0 1 \n", | |
| "4 1 0 \n", | |
| "\n", | |
| "[5 rows x 185 columns]" | |
| ] | |
| }, | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# create a clean list of dummy variables for the prediction\n", | |
| "X_use = data[['Day', 'Incident_County_Name', 'Gender', 'Race', 'Ethnicity',\n", | |
| " 'Victim_State', 'Victim_County', 'Accidental_Exposure',\n", | |
| " 'Susp_OD_Drug','Naloxone_Administered', 'Avg_Age', 'Incident_Month' ]]\n", | |
| "Y_use = data.Survive\n", | |
| "X_use = pd.get_dummies(data=X_use)\n", | |
| "X_use.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 79, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# a good chuck of this code was adapted from this code here, they did great workd\n", | |
| "#https://github.com/PaacMaan/cars-price-predictor/blob/master/cars_price_predictor.ipynb\n", | |
| "\n", | |
| "#user_input = [1, 35, 'Monday', 'Delaware', 'Male', 'White', 'Not Hispanic',\n", | |
| "# 'Pennsylvania', 'Delaware', 'N', 'COCAINE/CRACK', 'Y']\n", | |
| "user_input = {'Incident_Month':1, 'Avg_Age':35, 'Day':'Monday', \n", | |
| " 'Incident_County_Name':'Delaware', 'Gender':'Male',\n", | |
| " 'Race': 'White', 'Ethnicity': 'Not Hispanic',\n", | |
| " 'Victim_State': 'Pennsylvania','Victim_County': 'Delaware',\n", | |
| " 'Accidental_Exposure':'N','Susp_OD_Drug':'COCAINE/CRACK',\n", | |
| " 'Naloxone_Administered':'Y'}\n", | |
| "\n", | |
| "def input_to_one_hot(data):\n", | |
| " #initialize the target vector with zero values\n", | |
| " enc_input = np.zeros(185)\n", | |
| " #set the numerical inputs as they are\n", | |
| "\n", | |
| " enc_input[0] = data['Incident_Month']\n", | |
| " enc_input[1] = data['Avg_Age']\n", | |
| " #Define the columns\n", | |
| " cols = ['Day', 'Incident_County_Name', 'Gender', 'Race', 'Ethnicity',\n", | |
| " 'Victim_State', 'Victim_County', 'Accidental_Exposure', 'Susp_OD_Drug',\n", | |
| " 'Naloxone_Administered', 'Survive', 'Avg_Age', 'Incident_Month']\n", | |
| " #get the array for categorical variables\n", | |
| " Days = ['Thursday', 'Friday', 'Wednesday', 'Monday',\n", | |
| " 'Saturday', 'Tuesday', 'Sunday']\n", | |
| "\n", | |
| " #redefine the user input to match the column name\n", | |
| " redefined_user_input = 'Day_'+data['Day']\n", | |
| " #search for the index in columns name list\n", | |
| " Day_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " #print(Day_column_index)\n", | |
| " #fulfill the found index with 1\n", | |
| " enc_input[Day_column_index] = 1\n", | |
| " \n", | |
| " #repeat for all other categorical variables\n", | |
| " Incident_County_Names = ['Delaware', 'Chester', 'Beaver', 'Bucks', 'Philadelphia',\n", | |
| " 'Cumberland', 'Northumberland', 'Montgomery', 'Pike', 'Armstrong',\n", | |
| " 'Carbon', 'Bradford', 'Dauphin', 'Lehigh', 'Erie', 'York',\n", | |
| " 'Lebanon', 'Monroe', 'Franklin', 'Lancaster', 'Berks', 'Jefferson',\n", | |
| " 'Mifflin', 'Westmoreland', 'Crawford', 'Blair', 'Allegheny',\n", | |
| " 'Washington', 'Luzerne', 'Susquehanna', 'Elk', 'Lycoming',\n", | |
| " 'Snyder', 'Lackawanna', 'Lawrence', 'Wayne', 'Potter', 'Centre',\n", | |
| " 'Juniata', 'Perry', 'Northampton', 'Adams', 'Cambria', 'Wyoming',\n", | |
| " 'Schuylkill', 'Clearfield', 'Tioga', 'Columbia', 'Fulton',\n", | |
| " 'Mercer', 'Indiana', 'Union', 'Butler', 'Somerset', 'Fayette',\n", | |
| " 'Clarion', 'Montour', 'Bedford', 'Greene', 'Huntingdon', 'McKean',\n", | |
| " 'Forest', 'Clinton']\n", | |
| " redefined_user_input = 'Incident_County_Name_'+data['Incident_County_Name']\n", | |
| " Incident_County_Name_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Incident_County_Name_column_index] = 1\n", | |
| " \n", | |
| "\n", | |
| " # Gender\n", | |
| " Genders = ['Male', 'Female', 'Unknown']\n", | |
| " redefined_user_input = 'Gender_'+data['Gender']\n", | |
| " Gender_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Gender_column_index] = 1\n", | |
| " \n", | |
| " # Race\n", | |
| " Races = ['White', 'Black', 'Unknown', 'Asian or Pacific Islander',\n", | |
| " 'American Indian or Alaskan Native']\n", | |
| " redefined_user_input = 'Race_'+data['Race']\n", | |
| " Race_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Race_column_index] = 1\n", | |
| " \n", | |
| " # Ethnicity_Desc\n", | |
| " Ethnicities = ['Not Hispanic', 'Unknown', 'Hispanic', 'Mongolian']\n", | |
| " redefined_user_input = 'Ethnicity_'+data['Ethnicity']\n", | |
| " Ethnicity_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Ethnicity_column_index] = 1\n", | |
| " \n", | |
| " # Victim_State\n", | |
| " Victim_States = ['Pennsylvania', 'New Jersey', 'Delaware', 'Maryland', 'New York',\n", | |
| " 'Ohio', 'West Virginia', 'South Carolina', 'Florida', 'Vermont',\n", | |
| " 'Virginia', 'Arizona', 'Oklahoma']\n", | |
| " redefined_user_input = 'Victim_State_'+data['Victim_State']\n", | |
| " Victim_State_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Victim_State_column_index] = 1\n", | |
| " \n", | |
| " # Victim_County\n", | |
| " Victim_Counties = ['Delaware', 'Chester', 'Beaver', 'Montgomery', 'Bucks',\n", | |
| " 'Cumberland', 'Northumberland', 'Pike', 'Armstrong', 'Carbon',\n", | |
| " 'Out of State', 'Bradford', 'Dauphin', 'Lehigh', 'Erie', 'York',\n", | |
| " 'Adams', 'Lebanon', 'Monroe', 'Franklin', 'Lancaster', 'Berks',\n", | |
| " 'Northampton', 'Jefferson', 'Mifflin', 'Allegheny', 'Westmoreland',\n", | |
| " 'Crawford', 'Blair', 'Washington', 'Luzerne', 'Lackawanna', 'Elk',\n", | |
| " 'Lycoming', 'Philadelphia', 'Snyder', 'Perry', 'Lawrence', 'Wayne',\n", | |
| " 'Potter', 'Centre', 'Juniata', 'Cambria', 'Wyoming', 'Susquehanna',\n", | |
| " 'Schuylkill', 'Clearfield', 'Tioga', 'Columbia', 'Fulton',\n", | |
| " 'Mercer', 'Indiana', 'Union', 'Butler', 'Somerset', 'Fayette',\n", | |
| " 'Clarion', 'Montour', 'Bedford', 'Greene', 'Huntingdon', 'McKean',\n", | |
| " 'Forest', 'Clinton', 'Venango']\n", | |
| " redefined_user_input = 'Victim_County_'+data['Victim_County']\n", | |
| " Victim_County_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Victim_County_column_index] = 1\n", | |
| " \n", | |
| " # Accidental_Exposure\n", | |
| " Accidental_Exposures = ['N', 'Y']\n", | |
| " redefined_user_input = 'Accidental_Exposure_'+data['Accidental_Exposure']\n", | |
| " Accidental_Exposure_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Accidental_Exposure_column_index] = 1\n", | |
| " \n", | |
| " # Susp_OD_Drug\n", | |
| " Susp_OD_Drugs = ['COCAINE/CRACK', 'HEROIN', 'FENTANYL',\n", | |
| " 'FENTANYL ANALOG/OTHER SYNTHETIC OPIOID', 'PHARMACEUTICAL OPIOID',\n", | |
| " 'UNKNOWN', 'MARIJUANA', 'ALCOHOL', 'SYNTHETIC MARIJUANA',\n", | |
| " 'PHARMACEUTICAL OTHER',\n", | |
| " 'BENZODIAZEPINES (I.E.VALIUM, XANAX, ATIVAN, ETC)', 'OTHER',\n", | |
| " 'BARBITURATES (I.E. AMYTAL, NEMBUTAL, ETC)', 'CARFENTANIL',\n", | |
| " 'SUBOXONE', 'METHADONE', 'METHAMPHETAMINE', 'BATH SALTS',\n", | |
| " 'PHARMACEUTICAL STIMULANT']\n", | |
| " redefined_user_input = 'Susp_OD_Drug_'+data['Susp_OD_Drug']\n", | |
| " Susp_OD_Drug_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Susp_OD_Drug_column_index] = 1\n", | |
| " \n", | |
| " # Naloxone_Administered\n", | |
| " Naloxone_Administereds = ['Y', 'N']\n", | |
| " redefined_user_input = 'Naloxone_Administered_'+data['Naloxone_Administered']\n", | |
| " Naloxone_Administered_column_index = X_use.columns.tolist().index(redefined_user_input)\n", | |
| " enc_input[Naloxone_Administered_column_index] = 1\n", | |
| " return enc_input\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 80, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[ 1. 35. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 1.]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(input_to_one_hot(user_input))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 81, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "a = input_to_one_hot(user_input)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 91, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "prediction1 = np.argmax([a])\n", | |
| "print(prediction1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## we can see that with this new random prediction, our person will actually survive, great new!\n", | |
| "\n", | |
| "## Let's try for a different person" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 95, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#user_input = [7, 25, 'Monday', 'Allegheny', 'Female', 'Black', 'Not Hispanic',\n", | |
| "# 'Pennsylvania', 'Philadelphia', 'N', 'HEROIN', 'Y']\n", | |
| "user_input2 = {'Incident_Month':7, 'Avg_Age':25, 'Day':'Monday', \n", | |
| " 'Incident_County_Name':'Allegheny', 'Gender':'Female',\n", | |
| " 'Race': 'Black', 'Ethnicity': 'Not Hispanic',\n", | |
| " 'Victim_State': 'Pennsylvania','Victim_County': 'Philadelphia',\n", | |
| " 'Accidental_Exposure':'N','Susp_OD_Drug':'HEROIN',\n", | |
| " 'Naloxone_Administered':'N'}\n", | |
| "\n", | |
| "def input_to_one_hot(data):\n", | |
| " #initialize the target vector with zero values\n", | |
| " enc_input = np.zeros(185)\n", | |
| " #set the numerical inputs as they are\n", | |
| "\n", | |
| " enc_input[0] = data['Incident_Month']\n", | |
| " enc_input[1] = data['Avg_Age']\n", | |
| " #Define the columns\n", | |
| " cols = ['Day', 'Incident_County_Name', 'Gender', 'Race', 'Ethnicity',\n", | |
| " 'Victim_State', 'Victim_County', 'Accidental_Exposure', 'Susp_OD_Drug',\n", | |
| " 'Naloxone_Administered', 'Survive', 'Avg_Age', 'Incident_Month']\n", | |
| " #get the array for categorical variables\n", | |
| " Days = ['Thursday', 'Friday', 'Wednesday', 'Monday',\n", | |
| " 'Saturday', 'Tuesday', 'Sunday']\n", | |
| "\n", | |
| " #redefine the user input to match the column name\n", | |
| " redefined_user_input2 = 'Day_'+data['Day']\n", | |
| " #search for the index in columns name list\n", | |
| " Day_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " #print(Day_column_index)\n", | |
| " #fulfill the found index with 1\n", | |
| " enc_input[Day_column_index] = 1\n", | |
| " \n", | |
| " #repeat for all other categorical variables\n", | |
| " Incident_County_Names = ['Delaware', 'Chester', 'Beaver', 'Bucks', 'Philadelphia',\n", | |
| " 'Cumberland', 'Northumberland', 'Montgomery', 'Pike', 'Armstrong',\n", | |
| " 'Carbon', 'Bradford', 'Dauphin', 'Lehigh', 'Erie', 'York',\n", | |
| " 'Lebanon', 'Monroe', 'Franklin', 'Lancaster', 'Berks', 'Jefferson',\n", | |
| " 'Mifflin', 'Westmoreland', 'Crawford', 'Blair', 'Allegheny',\n", | |
| " 'Washington', 'Luzerne', 'Susquehanna', 'Elk', 'Lycoming',\n", | |
| " 'Snyder', 'Lackawanna', 'Lawrence', 'Wayne', 'Potter', 'Centre',\n", | |
| " 'Juniata', 'Perry', 'Northampton', 'Adams', 'Cambria', 'Wyoming',\n", | |
| " 'Schuylkill', 'Clearfield', 'Tioga', 'Columbia', 'Fulton',\n", | |
| " 'Mercer', 'Indiana', 'Union', 'Butler', 'Somerset', 'Fayette',\n", | |
| " 'Clarion', 'Montour', 'Bedford', 'Greene', 'Huntingdon', 'McKean',\n", | |
| " 'Forest', 'Clinton']\n", | |
| " redefined_user_input2= 'Incident_County_Name_'+data['Incident_County_Name']\n", | |
| " Incident_County_Name_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Incident_County_Name_column_index] = 1\n", | |
| " \n", | |
| "\n", | |
| " # Gender\n", | |
| " Genders = ['Male', 'Female', 'Unknown']\n", | |
| " redefined_user_input2 = 'Gender_'+data['Gender']\n", | |
| " Gender_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Gender_column_index] = 1\n", | |
| " \n", | |
| " # Race\n", | |
| " Races = ['White', 'Black', 'Unknown', 'Asian or Pacific Islander',\n", | |
| " 'American Indian or Alaskan Native']\n", | |
| " redefined_user_input2 = 'Race_'+data['Race']\n", | |
| " Race_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Race_column_index] = 1\n", | |
| " \n", | |
| " # Ethnicity_Desc\n", | |
| " Ethnicities = ['Not Hispanic', 'Unknown', 'Hispanic', 'Mongolian']\n", | |
| " redefined_user_input2 = 'Ethnicity_'+data['Ethnicity']\n", | |
| " Ethnicity_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Ethnicity_column_index] = 1\n", | |
| " \n", | |
| " # Victim_State\n", | |
| " Victim_States = ['Pennsylvania', 'New Jersey', 'Delaware', 'Maryland', 'New York',\n", | |
| " 'Ohio', 'West Virginia', 'South Carolina', 'Florida', 'Vermont',\n", | |
| " 'Virginia', 'Arizona', 'Oklahoma']\n", | |
| " redefined_user_input2 = 'Victim_State_'+data['Victim_State']\n", | |
| " Victim_State_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Victim_State_column_index] = 1\n", | |
| " \n", | |
| " # Victim_County\n", | |
| " Victim_Counties = ['Delaware', 'Chester', 'Beaver', 'Montgomery', 'Bucks',\n", | |
| " 'Cumberland', 'Northumberland', 'Pike', 'Armstrong', 'Carbon',\n", | |
| " 'Out of State', 'Bradford', 'Dauphin', 'Lehigh', 'Erie', 'York',\n", | |
| " 'Adams', 'Lebanon', 'Monroe', 'Franklin', 'Lancaster', 'Berks',\n", | |
| " 'Northampton', 'Jefferson', 'Mifflin', 'Allegheny', 'Westmoreland',\n", | |
| " 'Crawford', 'Blair', 'Washington', 'Luzerne', 'Lackawanna', 'Elk',\n", | |
| " 'Lycoming', 'Philadelphia', 'Snyder', 'Perry', 'Lawrence', 'Wayne',\n", | |
| " 'Potter', 'Centre', 'Juniata', 'Cambria', 'Wyoming', 'Susquehanna',\n", | |
| " 'Schuylkill', 'Clearfield', 'Tioga', 'Columbia', 'Fulton',\n", | |
| " 'Mercer', 'Indiana', 'Union', 'Butler', 'Somerset', 'Fayette',\n", | |
| " 'Clarion', 'Montour', 'Bedford', 'Greene', 'Huntingdon', 'McKean',\n", | |
| " 'Forest', 'Clinton', 'Venango']\n", | |
| " redefined_user_input2 = 'Victim_County_'+data['Victim_County']\n", | |
| " Victim_County_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Victim_County_column_index] = 1\n", | |
| " \n", | |
| " # Accidental_Exposure\n", | |
| " Accidental_Exposures = ['N', 'Y']\n", | |
| " redefined_user_input2 = 'Accidental_Exposure_'+data['Accidental_Exposure']\n", | |
| " Accidental_Exposure_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Accidental_Exposure_column_index] = 1\n", | |
| " \n", | |
| " # Susp_OD_Drug\n", | |
| " Susp_OD_Drugs = ['COCAINE/CRACK', 'HEROIN', 'FENTANYL',\n", | |
| " 'FENTANYL ANALOG/OTHER SYNTHETIC OPIOID', 'PHARMACEUTICAL OPIOID',\n", | |
| " 'UNKNOWN', 'MARIJUANA', 'ALCOHOL', 'SYNTHETIC MARIJUANA',\n", | |
| " 'PHARMACEUTICAL OTHER',\n", | |
| " 'BENZODIAZEPINES (I.E.VALIUM, XANAX, ATIVAN, ETC)', 'OTHER',\n", | |
| " 'BARBITURATES (I.E. AMYTAL, NEMBUTAL, ETC)', 'CARFENTANIL',\n", | |
| " 'SUBOXONE', 'METHADONE', 'METHAMPHETAMINE', 'BATH SALTS',\n", | |
| " 'PHARMACEUTICAL STIMULANT']\n", | |
| " redefined_user_input2 = 'Susp_OD_Drug_'+data['Susp_OD_Drug']\n", | |
| " Susp_OD_Drug_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Susp_OD_Drug_column_index] = 1\n", | |
| " \n", | |
| " # Naloxone_Administered\n", | |
| " Naloxone_Administereds = ['Y', 'N']\n", | |
| " redefined_user_input2 = 'Naloxone_Administered_'+data['Naloxone_Administered']\n", | |
| " Naloxone_Administered_column_index = X_use.columns.tolist().index(redefined_user_input2)\n", | |
| " enc_input[Naloxone_Administered_column_index] = 1\n", | |
| " return enc_input\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[ 7. 25. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
| " 0. 0. 0. 1. 0.]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(input_to_one_hot(user_input2))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 97, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "b = input_to_one_hot(user_input2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 98, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "prediction2 = np.argmax([b])\n", | |
| "print(prediction2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python [conda env:Anaconda3]", | |
| "language": "python", | |
| "name": "conda-env-Anaconda3-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment