Created
July 1, 2016 00:31
-
-
Save wlattner/4c34b9f0c6fe6dcd79983bc9fe6e70a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "cases = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/cases.csv\")\n", | |
| "storms = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/storms.csv\")\n", | |
| "pollution = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/pollution.csv\")\n", | |
| "tb = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/tb.csv\")\n", | |
| "iris = pd.read_csv(\"./iris.csv\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Gather columns into rows." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "```r\n", | |
| "> tidyr::gather(cases, \"year\", \"n\", 2:4)\n", | |
| " country year n\n", | |
| "1 FR 2011 7000\n", | |
| "2 DE 2011 5800\n", | |
| "3 US 2011 15000\n", | |
| "4 FR 2012 6900\n", | |
| "5 DE 2012 6000\n", | |
| "6 US 2012 14000\n", | |
| "7 FR 2013 7000\n", | |
| "8 DE 2013 6200\n", | |
| "9 US 2013 13000\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>country</th>\n", | |
| " <th>year</th>\n", | |
| " <th>n</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>FR</td>\n", | |
| " <td>2011</td>\n", | |
| " <td>7000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>DE</td>\n", | |
| " <td>2011</td>\n", | |
| " <td>5800</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>US</td>\n", | |
| " <td>2011</td>\n", | |
| " <td>15000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>FR</td>\n", | |
| " <td>2012</td>\n", | |
| " <td>6900</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>DE</td>\n", | |
| " <td>2012</td>\n", | |
| " <td>6000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>US</td>\n", | |
| " <td>2012</td>\n", | |
| " <td>14000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>FR</td>\n", | |
| " <td>2013</td>\n", | |
| " <td>7000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>DE</td>\n", | |
| " <td>2013</td>\n", | |
| " <td>6200</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>US</td>\n", | |
| " <td>2013</td>\n", | |
| " <td>13000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " country year n\n", | |
| "0 FR 2011 7000\n", | |
| "1 DE 2011 5800\n", | |
| "2 US 2011 15000\n", | |
| "3 FR 2012 6900\n", | |
| "4 DE 2012 6000\n", | |
| "5 US 2012 14000\n", | |
| "6 FR 2013 7000\n", | |
| "7 DE 2013 6200\n", | |
| "8 US 2013 13000" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.melt(cases, id_vars=\"country\", var_name=\"year\", value_name=\"n\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Spread rows into columns." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "```r\n", | |
| "> tidyr::spread(pollution, size, amount)\n", | |
| " city large small\n", | |
| "1 Beijing 121 56\n", | |
| "2 London 22 16\n", | |
| "3 New York 23 14\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>size</th>\n", | |
| " <th>large</th>\n", | |
| " <th>small</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>city</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>Beijing</th>\n", | |
| " <td>121</td>\n", | |
| " <td>56</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>London</th>\n", | |
| " <td>22</td>\n", | |
| " <td>16</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>New York</th>\n", | |
| " <td>23</td>\n", | |
| " <td>14</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "size large small\n", | |
| "city \n", | |
| "Beijing 121 56\n", | |
| "London 22 16\n", | |
| "New York 23 14" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pollution.pivot(index=\"city\", columns=\"size\", values=\"amount\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Separate one column into several." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> tidyr::separate(storms, date, c(\"y\", \"m\", \"d\"))\n", | |
| "# A tibble: 6 x 6\n", | |
| " storm wind pressure y m d\n", | |
| "* <chr> <int> <int> <chr> <chr> <chr>\n", | |
| "1 Alberto 110 1007 2000 08 03\n", | |
| "2 Alex 45 1009 1998 07 27\n", | |
| "3 Allison 65 1005 1995 06 03\n", | |
| "4 Ana 40 1013 1997 06 30\n", | |
| "5 Arlene 50 1010 1999 06 11\n", | |
| "6 Arthur 45 1010 1996 06 17\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>storm</th>\n", | |
| " <th>wind</th>\n", | |
| " <th>pressure</th>\n", | |
| " <th>d</th>\n", | |
| " <th>m</th>\n", | |
| " <th>y</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Alberto</td>\n", | |
| " <td>110</td>\n", | |
| " <td>1007</td>\n", | |
| " <td>03</td>\n", | |
| " <td>08</td>\n", | |
| " <td>2000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Alex</td>\n", | |
| " <td>45</td>\n", | |
| " <td>1009</td>\n", | |
| " <td>27</td>\n", | |
| " <td>07</td>\n", | |
| " <td>1998</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Allison</td>\n", | |
| " <td>65</td>\n", | |
| " <td>1005</td>\n", | |
| " <td>03</td>\n", | |
| " <td>06</td>\n", | |
| " <td>1995</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Ana</td>\n", | |
| " <td>40</td>\n", | |
| " <td>1013</td>\n", | |
| " <td>30</td>\n", | |
| " <td>06</td>\n", | |
| " <td>1997</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Arlene</td>\n", | |
| " <td>50</td>\n", | |
| " <td>1010</td>\n", | |
| " <td>11</td>\n", | |
| " <td>06</td>\n", | |
| " <td>1999</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Arthur</td>\n", | |
| " <td>45</td>\n", | |
| " <td>1010</td>\n", | |
| " <td>17</td>\n", | |
| " <td>06</td>\n", | |
| " <td>1996</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " storm wind pressure d m y\n", | |
| "0 Alberto 110 1007 03 08 2000\n", | |
| "1 Alex 45 1009 27 07 1998\n", | |
| "2 Allison 65 1005 03 06 1995\n", | |
| "3 Ana 40 1013 30 06 1997\n", | |
| "4 Arlene 50 1010 11 06 1999\n", | |
| "5 Arthur 45 1010 17 06 1996" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "storms.assign(\n", | |
| " y = storms.date.map(lambda x: x.split(\"-\")[0]),\n", | |
| " m = storms.date.map(lambda x: x.split(\"-\")[1]),\n", | |
| " d = storms.date.map(lambda x: x.split(\"-\")[2])\n", | |
| ").drop(['date'], axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Extract rows that meet logical criteria" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::filter(iris, Sepal.Length > 7)\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "1 7.1 3.0 5.9 2.1 virginica\n", | |
| "2 7.6 3.0 6.6 2.1 virginica\n", | |
| "3 7.3 2.9 6.3 1.8 virginica\n", | |
| "4 7.2 3.6 6.1 2.5 virginica\n", | |
| "5 7.7 3.8 6.7 2.2 virginica\n", | |
| "6 7.7 2.6 6.9 2.3 virginica\n", | |
| "7 7.7 2.8 6.7 2.0 virginica\n", | |
| "8 7.2 3.2 6.0 1.8 virginica\n", | |
| "9 7.2 3.0 5.8 1.6 virginica\n", | |
| "10 7.4 2.8 6.1 1.9 virginica\n", | |
| "11 7.9 3.8 6.4 2.0 virginica\n", | |
| "12 7.7 3.0 6.1 2.3 virginica\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>102</th>\n", | |
| " <td>7.1</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>5.9</td>\n", | |
| " <td>2.1</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>105</th>\n", | |
| " <td>7.6</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>6.6</td>\n", | |
| " <td>2.1</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>107</th>\n", | |
| " <td>7.3</td>\n", | |
| " <td>2.9</td>\n", | |
| " <td>6.3</td>\n", | |
| " <td>1.8</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>109</th>\n", | |
| " <td>7.2</td>\n", | |
| " <td>3.6</td>\n", | |
| " <td>6.1</td>\n", | |
| " <td>2.5</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>117</th>\n", | |
| " <td>7.7</td>\n", | |
| " <td>3.8</td>\n", | |
| " <td>6.7</td>\n", | |
| " <td>2.2</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>118</th>\n", | |
| " <td>7.7</td>\n", | |
| " <td>2.6</td>\n", | |
| " <td>6.9</td>\n", | |
| " <td>2.3</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>122</th>\n", | |
| " <td>7.7</td>\n", | |
| " <td>2.8</td>\n", | |
| " <td>6.7</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>125</th>\n", | |
| " <td>7.2</td>\n", | |
| " <td>3.2</td>\n", | |
| " <td>6.0</td>\n", | |
| " <td>1.8</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>129</th>\n", | |
| " <td>7.2</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>5.8</td>\n", | |
| " <td>1.6</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>130</th>\n", | |
| " <td>7.4</td>\n", | |
| " <td>2.8</td>\n", | |
| " <td>6.1</td>\n", | |
| " <td>1.9</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>131</th>\n", | |
| " <td>7.9</td>\n", | |
| " <td>3.8</td>\n", | |
| " <td>6.4</td>\n", | |
| " <td>2.0</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>135</th>\n", | |
| " <td>7.7</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>6.1</td>\n", | |
| " <td>2.3</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "102 7.1 3.0 5.9 2.1 virginica\n", | |
| "105 7.6 3.0 6.6 2.1 virginica\n", | |
| "107 7.3 2.9 6.3 1.8 virginica\n", | |
| "109 7.2 3.6 6.1 2.5 virginica\n", | |
| "117 7.7 3.8 6.7 2.2 virginica\n", | |
| "118 7.7 2.6 6.9 2.3 virginica\n", | |
| "122 7.7 2.8 6.7 2.0 virginica\n", | |
| "125 7.2 3.2 6.0 1.8 virginica\n", | |
| "129 7.2 3.0 5.8 1.6 virginica\n", | |
| "130 7.4 2.8 6.1 1.9 virginica\n", | |
| "131 7.9 3.8 6.4 2.0 virginica\n", | |
| "135 7.7 3.0 6.1 2.3 virginica" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris[iris['Sepal.Length'] > 7]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Remove duplicate rows." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::distinct(iris)\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "1 5.1 3.5 1.4 0.2 setosa\n", | |
| "2 4.9 3.0 1.4 0.2 setosa\n", | |
| "3 4.7 3.2 1.3 0.2 setosa\n", | |
| "4 4.6 3.1 1.5 0.2 setosa\n", | |
| "5 5.0 3.6 1.4 0.2 setosa\n", | |
| "6 5.4 3.9 1.7 0.4 setosa\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>5.1</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>4.9</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>4.7</td>\n", | |
| " <td>3.2</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4.6</td>\n", | |
| " <td>3.1</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5.0</td>\n", | |
| " <td>3.6</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "0 5.1 3.5 1.4 0.2 setosa\n", | |
| "1 4.9 3.0 1.4 0.2 setosa\n", | |
| "2 4.7 3.2 1.3 0.2 setosa\n", | |
| "3 4.6 3.1 1.5 0.2 setosa\n", | |
| "4 5.0 3.6 1.4 0.2 setosa" | |
| ] | |
| }, | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.drop_duplicates().head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Randomly select fraction of rows." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::sample_frac(iris, 0.5, replace = TRUE)\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "94 5.0 2.3 3.3 1.0 versicolor\n", | |
| "24 5.1 3.3 1.7 0.5 setosa\n", | |
| "4 4.6 3.1 1.5 0.2 setosa\n", | |
| "139 6.0 3.0 4.8 1.8 virginica\n", | |
| "82 5.5 2.4 3.7 1.0 versicolor\n", | |
| "128 6.1 3.0 4.9 1.8 virginica\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>36</th>\n", | |
| " <td>5.5</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>37</th>\n", | |
| " <td>4.9</td>\n", | |
| " <td>3.6</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>132</th>\n", | |
| " <td>6.4</td>\n", | |
| " <td>2.8</td>\n", | |
| " <td>5.6</td>\n", | |
| " <td>2.2</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>106</th>\n", | |
| " <td>4.9</td>\n", | |
| " <td>2.5</td>\n", | |
| " <td>4.5</td>\n", | |
| " <td>1.7</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>97</th>\n", | |
| " <td>6.2</td>\n", | |
| " <td>2.9</td>\n", | |
| " <td>4.3</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>versicolor</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "36 5.5 3.5 1.3 0.2 setosa\n", | |
| "37 4.9 3.6 1.4 0.1 setosa\n", | |
| "132 6.4 2.8 5.6 2.2 virginica\n", | |
| "106 4.9 2.5 4.5 1.7 virginica\n", | |
| "97 6.2 2.9 4.3 1.3 versicolor" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.sample(frac=0.5, replace=True).head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Randomly select n rows." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::sample_n(iris, 10, replace = TRUE)\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "13 4.8 3.0 1.4 0.1 setosa\n", | |
| "135 6.1 2.6 5.6 1.4 virginica\n", | |
| "69 6.2 2.2 4.5 1.5 versicolor\n", | |
| "31 4.8 3.1 1.6 0.2 setosa\n", | |
| "27 5.0 3.4 1.6 0.4 setosa\n", | |
| "100 5.7 2.8 4.1 1.3 versicolor\n", | |
| "86 6.0 3.4 4.5 1.6 versicolor\n", | |
| "150 5.9 3.0 5.1 1.8 virginica\n", | |
| "125 6.7 3.3 5.7 2.1 virginica\n", | |
| "122 5.6 2.8 4.9 2.0 virginica\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>81</th>\n", | |
| " <td>5.5</td>\n", | |
| " <td>2.4</td>\n", | |
| " <td>3.7</td>\n", | |
| " <td>1.0</td>\n", | |
| " <td>versicolor</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>107</th>\n", | |
| " <td>7.3</td>\n", | |
| " <td>2.9</td>\n", | |
| " <td>6.3</td>\n", | |
| " <td>1.8</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>143</th>\n", | |
| " <td>6.8</td>\n", | |
| " <td>3.2</td>\n", | |
| " <td>5.9</td>\n", | |
| " <td>2.3</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>127</th>\n", | |
| " <td>6.1</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>4.9</td>\n", | |
| " <td>1.8</td>\n", | |
| " <td>virginica</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>77</th>\n", | |
| " <td>6.7</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>1.7</td>\n", | |
| " <td>versicolor</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>5.4</td>\n", | |
| " <td>3.9</td>\n", | |
| " <td>1.7</td>\n", | |
| " <td>0.4</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>64</th>\n", | |
| " <td>5.6</td>\n", | |
| " <td>2.9</td>\n", | |
| " <td>3.6</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>versicolor</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>4.8</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>55</th>\n", | |
| " <td>5.7</td>\n", | |
| " <td>2.8</td>\n", | |
| " <td>4.5</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>versicolor</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>31</th>\n", | |
| " <td>5.4</td>\n", | |
| " <td>3.4</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>0.4</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "81 5.5 2.4 3.7 1.0 versicolor\n", | |
| "107 7.3 2.9 6.3 1.8 virginica\n", | |
| "143 6.8 3.2 5.9 2.3 virginica\n", | |
| "127 6.1 3.0 4.9 1.8 virginica\n", | |
| "77 6.7 3.0 5.0 1.7 versicolor\n", | |
| "5 5.4 3.9 1.7 0.4 setosa\n", | |
| "64 5.6 2.9 3.6 1.3 versicolor\n", | |
| "12 4.8 3.0 1.4 0.1 setosa\n", | |
| "55 5.7 2.8 4.5 1.3 versicolor\n", | |
| "31 5.4 3.4 1.5 0.4 setosa" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.sample(n=10, replace=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Select rows by position" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::slice(iris, 10:15)\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "1 4.9 3.1 1.5 0.1 setosa\n", | |
| "2 5.4 3.7 1.5 0.2 setosa\n", | |
| "3 4.8 3.4 1.6 0.2 setosa\n", | |
| "4 4.8 3.0 1.4 0.1 setosa\n", | |
| "5 4.3 3.0 1.1 0.1 setosa\n", | |
| "6 5.8 4.0 1.2 0.2 setosa\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>4.9</td>\n", | |
| " <td>3.1</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>5.4</td>\n", | |
| " <td>3.7</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>4.8</td>\n", | |
| " <td>3.4</td>\n", | |
| " <td>1.6</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>4.8</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>4.3</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.1</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "9 4.9 3.1 1.5 0.1 setosa\n", | |
| "10 5.4 3.7 1.5 0.2 setosa\n", | |
| "11 4.8 3.4 1.6 0.2 setosa\n", | |
| "12 4.8 3.0 1.4 0.1 setosa\n", | |
| "13 4.3 3.0 1.1 0.1 setosa" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.iloc[9:14] # 0-based vs 1-based indexing in R" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Select and order by top n entires." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::top_n(storms, 2, date)\n", | |
| "# A tibble: 2 x 4\n", | |
| " storm wind pressure date\n", | |
| " <chr> <int> <int> <date>\n", | |
| "1 Alberto 110 1007 2000-08-03\n", | |
| "2 Arlene 50 1010 1999-06-11\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>storm</th>\n", | |
| " <th>wind</th>\n", | |
| " <th>pressure</th>\n", | |
| " <th>date</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Alberto</td>\n", | |
| " <td>110</td>\n", | |
| " <td>1007</td>\n", | |
| " <td>2000-08-03</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Arlene</td>\n", | |
| " <td>50</td>\n", | |
| " <td>1010</td>\n", | |
| " <td>1999-06-11</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " storm wind pressure date\n", | |
| "0 Alberto 110 1007 2000-08-03\n", | |
| "4 Arlene 50 1010 1999-06-11" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "storms.sort_values('date', ascending=False).head(n=2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Select columns by name." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::select(iris, Sepal.Width, Petal.Length, Species)\n", | |
| " Sepal.Width Petal.Length Species\n", | |
| "1 3.5 1.4 setosa\n", | |
| "2 3.0 1.4 setosa\n", | |
| "3 3.2 1.3 setosa\n", | |
| "4 3.1 1.5 setosa\n", | |
| "5 3.6 1.4 setosa\n", | |
| "6 3.9 1.7 setosa\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3.2</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>3.1</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>3.6</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>setosa</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Width Petal.Length Species\n", | |
| "0 3.5 1.4 setosa\n", | |
| "1 3.0 1.4 setosa\n", | |
| "2 3.2 1.3 setosa\n", | |
| "3 3.1 1.5 setosa\n", | |
| "4 3.6 1.4 setosa" | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris[['Sepal.Width', 'Petal.Length', 'Species']].head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Summarize data into a single row of values." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::summarize(iris, avg = mean(Sepal.Length))\n", | |
| " avg\n", | |
| "1 5.843333\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.8433333333333337" | |
| ] | |
| }, | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris['Sepal.Length'].mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Apply a summary function to each column" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::summarize_each(iris, dplyr::funs(mean))\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "1 5.843333 3.057333 3.758 1.199333 NA\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Sepal.Length 5.843333\n", | |
| "Sepal.Width 3.057333\n", | |
| "Petal.Length 3.758000\n", | |
| "Petal.Width 1.199333\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.drop(['Species'], axis=1).apply(np.mean)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Count number of rows with each unique value of a variable." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::count(iris, Species)\n", | |
| "# A tibble: 3 x 2\n", | |
| " Species n\n", | |
| " <fctr> <int>\n", | |
| "1 setosa 50\n", | |
| "2 versicolor 50\n", | |
| "3 virginica 50\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Species\n", | |
| "setosa 50\n", | |
| "versicolor 50\n", | |
| "virginica 50\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.groupby('Species').size()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::mutate(iris, sepal = Sepal.Length + Sepal.Width)\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal\n", | |
| "1 5.1 3.5 1.4 0.2 setosa 8.6\n", | |
| "2 4.9 3.0 1.4 0.2 setosa 7.9\n", | |
| "3 4.7 3.2 1.3 0.2 setosa 7.9\n", | |
| "4 4.6 3.1 1.5 0.2 setosa 7.7\n", | |
| "5 5.0 3.6 1.4 0.2 setosa 8.6\n", | |
| "6 5.4 3.9 1.7 0.4 setosa 9.3\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " <th>sepal</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>5.1</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " <td>8.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>4.9</td>\n", | |
| " <td>3.0</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " <td>7.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>4.7</td>\n", | |
| " <td>3.2</td>\n", | |
| " <td>1.3</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " <td>7.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4.6</td>\n", | |
| " <td>3.1</td>\n", | |
| " <td>1.5</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " <td>7.7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5.0</td>\n", | |
| " <td>3.6</td>\n", | |
| " <td>1.4</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>setosa</td>\n", | |
| " <td>8.6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal\n", | |
| "0 5.1 3.5 1.4 0.2 setosa 8.6\n", | |
| "1 4.9 3.0 1.4 0.2 setosa 7.9\n", | |
| "2 4.7 3.2 1.3 0.2 setosa 7.9\n", | |
| "3 4.6 3.1 1.5 0.2 setosa 7.7\n", | |
| "4 5.0 3.6 1.4 0.2 setosa 8.6" | |
| ] | |
| }, | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.assign(\n", | |
| " sepal = iris['Sepal.Length'] + iris['Sepal.Width']\n", | |
| ").head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Apply window function to each column." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::mutate_each(iris, funs(min_rank))\n", | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "1 33 126 12 6 1\n", | |
| "2 17 58 12 6 1\n", | |
| "3 10 95 5 6 1\n", | |
| "4 6 84 25 6 1\n", | |
| "5 23 132 12 6 1\n", | |
| "6 47 145 45 42 1\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sepal.Length</th>\n", | |
| " <th>Sepal.Width</th>\n", | |
| " <th>Petal.Length</th>\n", | |
| " <th>Petal.Width</th>\n", | |
| " <th>Species</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>33.0</td>\n", | |
| " <td>126.0</td>\n", | |
| " <td>12.0</td>\n", | |
| " <td>6.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>17.0</td>\n", | |
| " <td>58.0</td>\n", | |
| " <td>12.0</td>\n", | |
| " <td>6.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>10.0</td>\n", | |
| " <td>95.0</td>\n", | |
| " <td>5.0</td>\n", | |
| " <td>6.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>6.0</td>\n", | |
| " <td>84.0</td>\n", | |
| " <td>25.0</td>\n", | |
| " <td>6.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>23.0</td>\n", | |
| " <td>132.0</td>\n", | |
| " <td>12.0</td>\n", | |
| " <td>6.0</td>\n", | |
| " <td>1.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
| "0 33.0 126.0 12.0 6.0 1.0\n", | |
| "1 17.0 58.0 12.0 6.0 1.0\n", | |
| "2 10.0 95.0 5.0 6.0 1.0\n", | |
| "3 6.0 84.0 25.0 6.0 1.0\n", | |
| "4 23.0 132.0 12.0 6.0 1.0" | |
| ] | |
| }, | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.apply(lambda x: x.rank(method=\"min\")).head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Compute one or more new columns. Drop original columns." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "```r\n", | |
| "> dplyr::transmute(iris, sepal = Sepal.Length + Sepal.Width)\n", | |
| " sepal\n", | |
| "1 8.6\n", | |
| "2 7.9\n", | |
| "3 7.9\n", | |
| "4 7.7\n", | |
| "5 8.6\n", | |
| "6 9.3\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>sepal</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>8.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>7.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>7.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>7.7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>8.6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " sepal\n", | |
| "0 8.6\n", | |
| "1 7.9\n", | |
| "2 7.9\n", | |
| "3 7.7\n", | |
| "4 8.6" | |
| ] | |
| }, | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "iris.assign(\n", | |
| " sepal = iris['Sepal.Length'] + iris['Sepal.Width']\n", | |
| ")[['sepal',]].head()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment