Skip to content

Instantly share code, notes, and snippets.

@osipov
Created April 22, 2019 21:55
Show Gist options
  • Select an option

  • Save osipov/af1f6420cecc29b9bd8afee190663ee5 to your computer and use it in GitHub Desktop.

Select an option

Save osipov/af1f6420cecc29b9bd8afee190663ee5 to your computer and use it in GitHub Desktop.
parks.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "parks.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"metadata": {
"id": "CeoMKry-IWzB",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO import numpy and pandas, show pandas version\n",
"import numpy as np\n",
"import pandas as pd\n",
"print(pd.__version__)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Fx4hUgMhIfzA",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#read the dataset about national parks from 1904-2016 and get its summary statistics\n",
"df = pd.read_csv('National Parks Ranked by Total Visitation 1904-2016 and by Year.csv')\n",
"df.describe()"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "UNPGw0fRJtuc",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: review dataframe types\n",
"df.dtypes"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "SJWg6sMqiSBI",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: Notice the issue with the visitors column name\n",
"df.columns"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "jTp_ECWMiYrl",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: Make sure that the columns are properly named\n",
"df.rename(str.strip, axis='columns', inplace=True)\n",
"df.columns"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "KHR30Abpjr2c",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: convert Year values that are not \"Total\" to int\n",
"df = df[~df.YearRaw.isin([\"Total\"])]\n",
"df.YearRaw = df.YearRaw.astype(int)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "2NUZ5uMS9mpz",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO clean up visitors string values and return as int\n",
"df.Visitors = df.Visitors.apply(lambda s: int(s.strip().replace(',', '').replace('-', '0'))).astype(int)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "BBYqxvB2IjUp",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: create a pivot table showing park attendance in each park in each state from 2010-2016\n",
"df_pivot = df.pivot_table(index=[\"State\",\"Unit Name\"], columns=[\"YearRaw\"], values='Visitors', aggfunc=np.sum)\n",
"df_2010_2016 = df_pivot.loc[:, 2010:2016] \n",
"df_2010_2016"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "prJcmO7s8ZWt",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: show total park attendance in every state from 2010-2016\n",
"df_2010_2016 = df_2010_2016.groupby(\"State\").agg(np.sum)\n",
"df_2010_2016[\"Total\"] = df_2010_2016.sum(axis=1)\n",
"df_2010_2016"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "iJ-WfqfmAAPM",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#TODO: show the top 5 most visited parks from 2010-2016\n",
"df_2010_2016.nlargest(5, columns=[\"Total\"])"
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment