Created
April 22, 2019 21:55
-
-
Save osipov/af1f6420cecc29b9bd8afee190663ee5 to your computer and use it in GitHub Desktop.
parks.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "parks.ipynb", | |
| "version": "0.3.2", | |
| "provenance": [], | |
| "collapsed_sections": [] | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "metadata": { | |
| "id": "CeoMKry-IWzB", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO import numpy and pandas, show pandas version\n", | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "print(pd.__version__)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "Fx4hUgMhIfzA", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#read the dataset about national parks from 1904-2016 and get its summary statistics\n", | |
| "df = pd.read_csv('National Parks Ranked by Total Visitation 1904-2016 and by Year.csv')\n", | |
| "df.describe()" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "UNPGw0fRJtuc", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: review dataframe types\n", | |
| "df.dtypes" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "SJWg6sMqiSBI", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: Notice the issue with the visitors column name\n", | |
| "df.columns" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "jTp_ECWMiYrl", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: Make sure that the columns are properly named\n", | |
| "df.rename(str.strip, axis='columns', inplace=True)\n", | |
| "df.columns" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "KHR30Abpjr2c", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: convert Year values that are not \"Total\" to int\n", | |
| "df = df[~df.YearRaw.isin([\"Total\"])]\n", | |
| "df.YearRaw = df.YearRaw.astype(int)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "2NUZ5uMS9mpz", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO clean up visitors string values and return as int\n", | |
| "df.Visitors = df.Visitors.apply(lambda s: int(s.strip().replace(',', '').replace('-', '0'))).astype(int)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "BBYqxvB2IjUp", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: create a pivot table showing park attendance in each park in each state from 2010-2016\n", | |
| "df_pivot = df.pivot_table(index=[\"State\",\"Unit Name\"], columns=[\"YearRaw\"], values='Visitors', aggfunc=np.sum)\n", | |
| "df_2010_2016 = df_pivot.loc[:, 2010:2016] \n", | |
| "df_2010_2016" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "prJcmO7s8ZWt", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: show total park attendance in every state from 2010-2016\n", | |
| "df_2010_2016 = df_2010_2016.groupby(\"State\").agg(np.sum)\n", | |
| "df_2010_2016[\"Total\"] = df_2010_2016.sum(axis=1)\n", | |
| "df_2010_2016" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "id": "iJ-WfqfmAAPM", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "cell_type": "code", | |
| "source": [ | |
| "#TODO: show the top 5 most visited parks from 2010-2016\n", | |
| "df_2010_2016.nlargest(5, columns=[\"Total\"])" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment