Created
March 27, 2023 15:44
-
-
Save justmarkham/6ff904431a9209c8d9b500b8c3435f46 to your computer and use it in GitHub Desktop.
Data School's Tuesday Tip #9
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "4958ee13", | |
| "metadata": {}, | |
| "source": [ | |
| "# [Tuesday Tip #9](https://tuesday.tips/): Calculate scoring runs in basketball 🏀\n", | |
| "\n", | |
| "Shout out to Josh Devlin's excellent blog post, [Calculating Streaks in Pandas](https://joshdevlin.com/blog/calculate-streaks-in-pandas/), for teaching me this approach!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "dea20756", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "888b33ce", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>team</th>\n", | |
| " <th>points</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>B</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " team points\n", | |
| "0 A 2\n", | |
| "1 B 1\n", | |
| "2 B 1\n", | |
| "3 B 3\n", | |
| "4 A 2\n", | |
| "5 A 3\n", | |
| "6 A 2\n", | |
| "7 A 2\n", | |
| "8 B 1\n", | |
| "9 A 3" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.DataFrame({'team':'A B B B A A A A B A'.split(),\n", | |
| " 'points':[2, 1, 1, 3, 2, 3, 2, 2, 1, 3]})\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "66ad0f45", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>team</th>\n", | |
| " <th>points</th>\n", | |
| " <th>previous_team</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>None</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>A</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>B</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>B</td>\n", | |
| " <td>3</td>\n", | |
| " <td>B</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>B</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " <td>A</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>A</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " <td>B</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " team points previous_team\n", | |
| "0 A 2 None\n", | |
| "1 B 1 A\n", | |
| "2 B 1 B\n", | |
| "3 B 3 B\n", | |
| "4 A 2 B\n", | |
| "5 A 3 A\n", | |
| "6 A 2 A\n", | |
| "7 A 2 A\n", | |
| "8 B 1 A\n", | |
| "9 A 3 B" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['previous_team'] = df['team'].shift(1)\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "c7efd8b1", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>team</th>\n", | |
| " <th>points</th>\n", | |
| " <th>previous_team</th>\n", | |
| " <th>start_of_run</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>None</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>A</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>B</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>B</td>\n", | |
| " <td>3</td>\n", | |
| " <td>B</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>B</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " <td>A</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>A</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " <td>B</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " team points previous_team start_of_run\n", | |
| "0 A 2 None True\n", | |
| "1 B 1 A True\n", | |
| "2 B 1 B False\n", | |
| "3 B 3 B False\n", | |
| "4 A 2 B True\n", | |
| "5 A 3 A False\n", | |
| "6 A 2 A False\n", | |
| "7 A 2 A False\n", | |
| "8 B 1 A True\n", | |
| "9 A 3 B True" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['start_of_run'] = df['team'] != df['previous_team']\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "9a8df09d", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>team</th>\n", | |
| " <th>points</th>\n", | |
| " <th>previous_team</th>\n", | |
| " <th>start_of_run</th>\n", | |
| " <th>run_id</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>None</td>\n", | |
| " <td>True</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>A</td>\n", | |
| " <td>True</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>B</td>\n", | |
| " <td>False</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>B</td>\n", | |
| " <td>3</td>\n", | |
| " <td>B</td>\n", | |
| " <td>False</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>B</td>\n", | |
| " <td>True</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " <td>A</td>\n", | |
| " <td>False</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " <td>False</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>A</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " <td>False</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>B</td>\n", | |
| " <td>1</td>\n", | |
| " <td>A</td>\n", | |
| " <td>True</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>A</td>\n", | |
| " <td>3</td>\n", | |
| " <td>B</td>\n", | |
| " <td>True</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " team points previous_team start_of_run run_id\n", | |
| "0 A 2 None True 1\n", | |
| "1 B 1 A True 2\n", | |
| "2 B 1 B False 2\n", | |
| "3 B 3 B False 2\n", | |
| "4 A 2 B True 3\n", | |
| "5 A 3 A False 3\n", | |
| "6 A 2 A False 3\n", | |
| "7 A 2 A False 3\n", | |
| "8 B 1 A True 4\n", | |
| "9 A 3 B True 5" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['run_id'] = df['start_of_run'].cumsum()\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "e125fdf4", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "run_id team\n", | |
| "1 A 2\n", | |
| "2 B 5\n", | |
| "3 A 9\n", | |
| "4 B 1\n", | |
| "5 A 3\n", | |
| "Name: points, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.groupby(['run_id', 'team'])['points'].sum()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.10.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment