-
-
Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.
Annotations of Pandas DataFrame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "import string\n", | |
| "# https://www.ritchieng.com/creating-dataframe-from-objects/" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'ABCDEFGHIJKLMNOPQRSTUVWXYZ'" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "string.ascii_uppercase" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " <th>D</th>\n", | |
| " <th>E</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>91</td>\n", | |
| " <td>81</td>\n", | |
| " <td>76</td>\n", | |
| " <td>30</td>\n", | |
| " <td>19</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>28</td>\n", | |
| " <td>30</td>\n", | |
| " <td>49</td>\n", | |
| " <td>71</td>\n", | |
| " <td>32</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>69</td>\n", | |
| " <td>77</td>\n", | |
| " <td>33</td>\n", | |
| " <td>16</td>\n", | |
| " <td>17</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>34</td>\n", | |
| " <td>59</td>\n", | |
| " <td>62</td>\n", | |
| " <td>87</td>\n", | |
| " <td>57</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>93</td>\n", | |
| " <td>64</td>\n", | |
| " <td>50</td>\n", | |
| " <td>33</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>46</td>\n", | |
| " <td>71</td>\n", | |
| " <td>35</td>\n", | |
| " <td>33</td>\n", | |
| " <td>31</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>3</td>\n", | |
| " <td>69</td>\n", | |
| " <td>65</td>\n", | |
| " <td>30</td>\n", | |
| " <td>92</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>33</td>\n", | |
| " <td>46</td>\n", | |
| " <td>64</td>\n", | |
| " <td>16</td>\n", | |
| " <td>82</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>26</td>\n", | |
| " <td>58</td>\n", | |
| " <td>78</td>\n", | |
| " <td>19</td>\n", | |
| " <td>87</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>16</td>\n", | |
| " <td>70</td>\n", | |
| " <td>68</td>\n", | |
| " <td>23</td>\n", | |
| " <td>11</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " A B C D E\n", | |
| "0 91 81 76 30 19\n", | |
| "1 28 30 49 71 32\n", | |
| "2 69 77 33 16 17\n", | |
| "3 34 59 62 87 57\n", | |
| "4 0 93 64 50 33\n", | |
| "5 46 71 35 33 31\n", | |
| "6 3 69 65 30 92\n", | |
| "7 33 46 64 16 82\n", | |
| "8 26 58 78 19 87\n", | |
| "9 16 70 68 23 11" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "size_columns = 5\n", | |
| "size_rows = 10\n", | |
| "max_value = 100\n", | |
| "_string = string.ascii_uppercase[:size_columns]\n", | |
| "df_int = pd.DataFrame(np.random.randint(0,max_value,size=(size_rows, size_columns)), columns=list(_string))\n", | |
| "df_int" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " <th>D</th>\n", | |
| " <th>E</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>-0.556790</td>\n", | |
| " <td>0.650430</td>\n", | |
| " <td>-1.894784</td>\n", | |
| " <td>-1.382816</td>\n", | |
| " <td>1.808950</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1.297879</td>\n", | |
| " <td>0.361739</td>\n", | |
| " <td>-1.239575</td>\n", | |
| " <td>0.769614</td>\n", | |
| " <td>0.574830</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.590421</td>\n", | |
| " <td>1.447465</td>\n", | |
| " <td>-0.868681</td>\n", | |
| " <td>-0.105521</td>\n", | |
| " <td>-1.796299</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.035956</td>\n", | |
| " <td>2.398848</td>\n", | |
| " <td>0.889333</td>\n", | |
| " <td>1.445073</td>\n", | |
| " <td>0.310777</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>-0.035894</td>\n", | |
| " <td>-0.801347</td>\n", | |
| " <td>1.406680</td>\n", | |
| " <td>1.230958</td>\n", | |
| " <td>0.049223</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0.338208</td>\n", | |
| " <td>0.073445</td>\n", | |
| " <td>1.074535</td>\n", | |
| " <td>0.448073</td>\n", | |
| " <td>-0.361343</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.282894</td>\n", | |
| " <td>-1.440926</td>\n", | |
| " <td>-1.309745</td>\n", | |
| " <td>-0.264459</td>\n", | |
| " <td>-0.335074</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0.853490</td>\n", | |
| " <td>1.519119</td>\n", | |
| " <td>0.574555</td>\n", | |
| " <td>1.323457</td>\n", | |
| " <td>0.727536</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>-1.823434</td>\n", | |
| " <td>0.010703</td>\n", | |
| " <td>-0.550191</td>\n", | |
| " <td>-0.639050</td>\n", | |
| " <td>1.143485</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0.713230</td>\n", | |
| " <td>-0.980415</td>\n", | |
| " <td>-1.481190</td>\n", | |
| " <td>3.381801</td>\n", | |
| " <td>-0.971942</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " A B C D E\n", | |
| "0 -0.556790 0.650430 -1.894784 -1.382816 1.808950\n", | |
| "1 1.297879 0.361739 -1.239575 0.769614 0.574830\n", | |
| "2 0.590421 1.447465 -0.868681 -0.105521 -1.796299\n", | |
| "3 0.035956 2.398848 0.889333 1.445073 0.310777\n", | |
| "4 -0.035894 -0.801347 1.406680 1.230958 0.049223\n", | |
| "5 0.338208 0.073445 1.074535 0.448073 -0.361343\n", | |
| "6 0.282894 -1.440926 -1.309745 -0.264459 -0.335074\n", | |
| "7 0.853490 1.519119 0.574555 1.323457 0.727536\n", | |
| "8 -1.823434 0.010703 -0.550191 -0.639050 1.143485\n", | |
| "9 0.713230 -0.980415 -1.481190 3.381801 -0.971942" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.DataFrame(np.random.randn(size_rows,size_columns), columns=list(_string))\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city_id</th>\n", | |
| " <th>population</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>100</td>\n", | |
| " <td>23</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>101</td>\n", | |
| " <td>14</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>102</td>\n", | |
| " <td>43</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>103</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>104</td>\n", | |
| " <td>12</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>105</td>\n", | |
| " <td>92</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>106</td>\n", | |
| " <td>46</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>107</td>\n", | |
| " <td>53</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>108</td>\n", | |
| " <td>25</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>109</td>\n", | |
| " <td>62</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " city_id population\n", | |
| "0 100 23\n", | |
| "1 101 14\n", | |
| "2 102 43\n", | |
| "3 103 4\n", | |
| "4 104 12\n", | |
| "5 105 92\n", | |
| "6 106 46\n", | |
| "7 107 53\n", | |
| "8 108 25\n", | |
| "9 109 62" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "new_dict = {\n", | |
| " 'city_id': np.arange(100,110,1),\n", | |
| " 'population': np.random.randint(0,101,10)\n", | |
| "}\n", | |
| "df_pop = pd.DataFrame(new_dict)\n", | |
| "df_pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>city_id</th>\n", | |
| " <th>population</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>count</th>\n", | |
| " <td>10.00000</td>\n", | |
| " <td>10.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>mean</th>\n", | |
| " <td>104.50000</td>\n", | |
| " <td>37.400000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>std</th>\n", | |
| " <td>3.02765</td>\n", | |
| " <td>27.089153</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>min</th>\n", | |
| " <td>100.00000</td>\n", | |
| " <td>4.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>25%</th>\n", | |
| " <td>102.25000</td>\n", | |
| " <td>16.250000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>50%</th>\n", | |
| " <td>104.50000</td>\n", | |
| " <td>34.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>75%</th>\n", | |
| " <td>106.75000</td>\n", | |
| " <td>51.250000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>max</th>\n", | |
| " <td>109.00000</td>\n", | |
| " <td>92.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " city_id population\n", | |
| "count 10.00000 10.000000\n", | |
| "mean 104.50000 37.400000\n", | |
| "std 3.02765 27.089153\n", | |
| "min 100.00000 4.000000\n", | |
| "25% 102.25000 16.250000\n", | |
| "50% 104.50000 34.000000\n", | |
| "75% 106.75000 51.250000\n", | |
| "max 109.00000 92.000000" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_pop.describe()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment