Last active
August 2, 2024 16:05
-
-
Save camriddell/bfbe9c7425e230bcfe3c246f21c3329f to your computer and use it in GitHub Desktop.
notes-2024-07-16.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "604c1eec", | |
| "metadata": {}, | |
| "source": [ | |
| "# How do I even get started with Data Visualization?\n", | |
| "\n", | |
| "## Agenda\n", | |
| "\n", | |
| "0. Why bother with programmatic visualization at all?\n", | |
| "1. The types of visualizations\n", | |
| "2. The types of data-viz tools\n", | |
| "3. Demo & categorize data-viz tools\n", | |
| "4. Your turn!\n", | |
| "5. Final discusison & demonstrations" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "8d2a131a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from IPython.display import display\n", | |
| "\n", | |
| "display(\"Let's Get Started\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "ab7f49a3", | |
| "metadata": {}, | |
| "source": [ | |
| "## Google Colab: Install Missing Packages\n", | |
| "\n", | |
| "Google Colab comes with many Python packages pre-installed, but we will need the\n", | |
| "following pacakges as well." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "8c1d2eb4", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!pip install flexitext==0.2.0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "67445e46", | |
| "metadata": {}, | |
| "source": [ | |
| "## Types of Visualizations\n", | |
| "- Exploratory\n", | |
| "- Communicative\n", | |
| "\n", | |
| "## Why even use programming in the first place?\n", | |
| "\n", | |
| "## Types of Data Visualization Tools\n", | |
| "- Non-programmatic\n", | |
| " - low : hand/computer drawing\n", | |
| " - high: GUI chart builders* (excel, tableau, ...)\n", | |
| "- Programmatic\n", | |
| " - low : programmatic drawing\n", | |
| " - high: declarative & convenience" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "eca0d185", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from IPython.display import display\n", | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "anscombe = read_csv('https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/anscombe.csv')\n", | |
| "\n", | |
| "display(\n", | |
| " # anscombe.head(),\n", | |
| " # anscombe.tail(),\n", | |
| " anscombe.groupby('id')[['x', 'y']].agg(['mean', 'std']),\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "10a39e18", | |
| "metadata": {}, | |
| "source": [ | |
| "## Common Data Visualization APIs\n", | |
| "\n", | |
| "### Drawing (non-declarative)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "07bd9d3b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from pandas import read_csv\n", | |
| "from matplotlib.pyplot import subplots, show\n", | |
| "\n", | |
| "anscombe = read_csv('https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/anscombe.csv')\n", | |
| "\n", | |
| "# print(anscombe['id'].unique())\n", | |
| "fig, axes = subplots(nrows=2, ncols=2)\n", | |
| "\n", | |
| "for (label, group), ax in zip(anscombe.groupby('id'), axes.flat):\n", | |
| " ax.scatter(group['x'], group['y'], s=24)\n", | |
| " ax.set_title(label)\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "d2d22ced", | |
| "metadata": {}, | |
| "source": [ | |
| "### High Level - Declarative" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "a83af00d", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from matplotlib.pyplot import show\n", | |
| "from plotnine import ggplot, facet_wrap, geom_point, geom_smooth, labs, theme_minimal, aes\n", | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "anscombe = read_csv('https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/anscombe.csv')\n", | |
| "\n", | |
| "fig = (\n", | |
| " ggplot(anscombe, aes(x='x', y='y'))\n", | |
| " + facet_wrap('id', ncol=2)\n", | |
| " + geom_point()\n", | |
| " + labs(x='x variable', y='y variable', title='Anscombe’s Quartet')\n", | |
| " + theme_minimal()\n", | |
| ")\n", | |
| "\n", | |
| "print(fig)\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "8abd78d1", | |
| "metadata": {}, | |
| "source": [ | |
| "### High Level - Convenience" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "0462567b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from matplotlib.pyplot import show\n", | |
| "from seaborn import lmplot\n", | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "anscombe = read_csv('https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/anscombe.csv')\n", | |
| "\n", | |
| "fg = lmplot(anscombe, x='x', y='y', col='id', col_wrap=2)\n", | |
| "print(fg)\n", | |
| "# show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "d7c1239f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from seaborn.objects import Plot, Dots, PolyFit, Line\n", | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "anscombe = read_csv('https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/anscombe.csv')\n", | |
| "\n", | |
| "(\n", | |
| " Plot(anscombe, x='x', y='y')\n", | |
| " .facet(col='id', wrap=2)\n", | |
| " .add(Dots(color='black'))\n", | |
| " .add(Line(), PolyFit(1))\n", | |
| ").show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "c28501cb", | |
| "metadata": {}, | |
| "source": [ | |
| "### Is it worth it to learn multiple data visualization libraries/languages?\n", | |
| "\n", | |
| "## Let’s Make Some Viz!\n", | |
| "\n", | |
| "### Static Data Visualizations (explore, communicative, fun)\n", | |
| "\n", | |
| "**matplotlib**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "a0080d56", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from matplotlib.pyplot import figure, show\n", | |
| "from matplotlib.patches import Circle, Rectangle\n", | |
| "\n", | |
| "fig = figure(figsize=(6,6))\n", | |
| "\n", | |
| "c = Circle((.5, .8), .1)\n", | |
| "fig.add_artist(c)\n", | |
| "\n", | |
| "## uncomment below if running from Jupyter Notebook/Google Colab\n", | |
| "ax = fig.add_axes([0, 0, 0, 0])\n", | |
| "ax.set_visible(False)\n", | |
| "\n", | |
| "# body_rect = Rectangle((.47, .75), .06, -.5)\n", | |
| "# fig.add_artist(body_rect)\n", | |
| "\n", | |
| "# arms_rect = Rectangle((.3, .55), .4, .05)\n", | |
| "# fig.add_artist(arms_rect)\n", | |
| "\n", | |
| "# lleg_rect = Rectangle((.5, .3), .3, .05, angle=225)\n", | |
| "# fig.add_artist(lleg_rect)\n", | |
| "\n", | |
| "# rleg_rect = Rectangle((.47, .26), .3, .05, angle=-45)\n", | |
| "# fig.add_artist(rleg_rect)\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "17b7e400", | |
| "metadata": {}, | |
| "source": [ | |
| "**Useful Things to draw for Data Viz**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "f40441fa", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from numpy import linspace, pi, sin, cos\n", | |
| "from matplotlib.pyplot import figure, show, plot, rc\n", | |
| "\n", | |
| "rc('font', size=16)\n", | |
| "\n", | |
| "xs = linspace(0, 2 * pi)\n", | |
| "\n", | |
| "fig = figure()\n", | |
| " #X Y W H\n", | |
| "ax = fig.add_axes([.3, .3, .5, .5])\n", | |
| "\n", | |
| "ax.plot(xs, sin(xs))\n", | |
| "ax.plot(xs, cos(xs))\n", | |
| "\n", | |
| "ax.set_ylabel('this is my y label')\n", | |
| "fig.supylabel('this my figure y label')\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "6c0e14c6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from IPython.display import display\n", | |
| "\n", | |
| "from numpy import linspace, pi, sin, cos\n", | |
| "from matplotlib.pyplot import subplots, show\n", | |
| "\n", | |
| "# fig, ax = subplots()\n", | |
| "\n", | |
| "# fig, axes = subplots(nrows=1, ncols=2)\n", | |
| "fig, axes = subplots(nrows=2, ncols=2)\n", | |
| "# print(axes)\n", | |
| "# print(axes[:, 0])\n", | |
| "\n", | |
| "xs = linspace(0, 2 * pi)\n", | |
| "\n", | |
| "# display(axes)\n", | |
| "display(type(axes))\n", | |
| "\n", | |
| "for ax in axes[0, :]:\n", | |
| " ax.plot(xs, sin(xs))\n", | |
| "\n", | |
| "\n", | |
| "# display(axes, type(axes), axes[0])\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "ec54e83b", | |
| "metadata": {}, | |
| "source": [ | |
| "Matplotlib is object oriented\n", | |
| "- Containers → Artists\n", | |
| "- Figure → Axes →\n", | |
| " - X/YAxis\n", | |
| " - ticks\n", | |
| " - ticklabels\n", | |
| " - axis label\n", | |
| " - Primitives\n", | |
| " - Patches (Circle, Rectangle)\n", | |
| " - Line2d\n", | |
| " - Annotations/Text\n", | |
| " - ...\n", | |
| " - Legend\n", | |
| " - Primitives\n", | |
| " - Text (label & title)\n", | |
| "\n", | |
| "- Coordinate Spaces: values → ... → screen\n", | |
| " - Proportional coordinate space (Figure & Axes)\n", | |
| " - Data coordinate space (Axes)\n", | |
| " - Identity/point space (Figure)\n", | |
| "\n", | |
| "**Applied to Star Trader Data - Tracking Ship Failures**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "639b0358", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from matplotlib.pyplot import subplots, show\n", | |
| "from pandas import read_csv, to_datetime\n", | |
| "\n", | |
| "df = (\n", | |
| " read_csv(\n", | |
| " 'https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/failures.csv',\n", | |
| " index_col=['date', 'player','ship'],\n", | |
| " parse_dates=['date'],\n", | |
| " )\n", | |
| " .sort_index()\n", | |
| ")\n", | |
| "\n", | |
| "plot_data = (\n", | |
| " df.pivot_table(index='date', columns='player', values='faults', aggfunc='sum')\n", | |
| " .rolling('90D').mean()\n", | |
| ")\n", | |
| "\n", | |
| "ax = plot_data.plot(legend=False)\n", | |
| "for line in ax.lines:\n", | |
| " x, y = line.get_data()\n", | |
| " ax.annotate(\n", | |
| " line.get_label(), xy=(x[-1], y[-1]),\n", | |
| " xytext=(5, 0), textcoords='offset points',\n", | |
| " color=line.get_color()\n", | |
| " )\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "ee66b5fd", | |
| "metadata": {}, | |
| "source": [ | |
| "### Interactive Data Visualizations (explore, fun)\n", | |
| "\n", | |
| "- System/function Exploration\n", | |
| "- Data Exploration\n", | |
| "- System Observability\n", | |
| "\n", | |
| "*Limited Communicatve Ability unless STRONGLY guided*\n", | |
| "\n", | |
| "**bokeh & panel** - a powerful way to share your data on the web!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "04a3f3af", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from panel import extension\n", | |
| "\n", | |
| "# Increase font size of widgets\n", | |
| "css = '''\n", | |
| ".bk-root .bk, .bk-root .bk:before, .bk-root .bk:after {\n", | |
| " font-size: 110%;\n", | |
| "}\n", | |
| "'''\n", | |
| "extension(raw_css=[css]) # Connect `panel` application to notebook runtime" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "b35b4016", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from panel import Column\n", | |
| "from bokeh.plotting import figure\n", | |
| "from bokeh.models import ColumnDataSource\n", | |
| "\n", | |
| "from numpy import linspace, zeros\n", | |
| "from scipy.stats import skewnorm\n", | |
| "\n", | |
| "loc = 5\n", | |
| "scale = 1\n", | |
| "skew = 0\n", | |
| "\n", | |
| "cds = ColumnDataSource({\n", | |
| " 'x': linspace(-10, 10, 500),\n", | |
| " 'y1': zeros(shape=500),\n", | |
| "})\n", | |
| "cds.data['y2'] = skewnorm(loc=loc, scale=scale, a=skew).pdf(cds.data['x'])\n", | |
| "\n", | |
| "def update_plot(loc, scale, skew):\n", | |
| " cds.data['y2'] = skewnorm.pdf(x=cds.data['x'], a=skew, loc=loc, scale=scale)\n", | |
| "\n", | |
| "p = figure(y_range=(0, .5), width=500, height=300)\n", | |
| "p.varea(x='x', y1='y1', y2='y2', source=cds, alpha=.3)\n", | |
| "p.line(x='x', y='y2', source=cds, line_width=4)\n", | |
| "p.yaxis.major_label_text_font_size = \"20pt\"\n", | |
| "p.xaxis.major_label_text_font_size = \"20pt\"\n", | |
| "\n", | |
| "Column(p).servable()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "119af6e3", | |
| "metadata": {}, | |
| "source": [ | |
| "adding interactivity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "320bab7f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from panel import Column, bind\n", | |
| "from panel.widgets import FloatSlider\n", | |
| "from bokeh.plotting import figure\n", | |
| "from bokeh.models import ColumnDataSource\n", | |
| "\n", | |
| "from numpy import linspace, zeros\n", | |
| "from scipy.stats import skewnorm\n", | |
| "\n", | |
| "loc = FloatSlider(name='mean', value=0, start=-10, end=10)\n", | |
| "scale = FloatSlider(name='std. dev', value=1, start=.1, end=10)\n", | |
| "skew = FloatSlider(name='skew', value=0, start=-6, end=6)\n", | |
| "\n", | |
| "# Data abstraction\n", | |
| "cds = ColumnDataSource({\n", | |
| " 'x': linspace(-10, 10, 500),\n", | |
| " 'y1': zeros(shape=500),\n", | |
| " 'y2': zeros(shape=500),\n", | |
| "})\n", | |
| "\n", | |
| "def update_plot(loc, scale, skew):\n", | |
| " cds.data['y2'] = skewnorm.pdf(x=cds.data['x'], a=skew, loc=loc, scale=scale)\n", | |
| "\n", | |
| "p = figure(y_range=(0, .5), width=500, height=300)\n", | |
| "p.varea(x='x', y1='y1', y2='y2', source=cds, alpha=.3)\n", | |
| "p.line(x='x', y='y2', source=cds, line_width=4)\n", | |
| "p.yaxis.major_label_text_font_size = \"20pt\"\n", | |
| "p.xaxis.major_label_text_font_size = \"20pt\"\n", | |
| "\n", | |
| "Column(\n", | |
| " Column(loc, scale, skew), # render widgets\n", | |
| " p, # render plot\n", | |
| " bind(update_plot, skew=skew, loc=loc, scale=scale) # bind widgets to `update_plot` function\n", | |
| ").servable()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "68868054", | |
| "metadata": {}, | |
| "source": [ | |
| "**Applied to our Star Trader Data - Planetary Weather**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "8d5425bc", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "df = (\n", | |
| " read_csv(\n", | |
| " 'https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/weather_york.csv',\n", | |
| " usecols=['date', 'temperature_max', 'temperature_min'],\n", | |
| " parse_dates=['date'],\n", | |
| " index_col='date'\n", | |
| " )\n", | |
| ").loc['1990':'2000']\n", | |
| "\n", | |
| "# Long timeseries Zoom\n", | |
| "from bokeh.models import RangeTool\n", | |
| "from bokeh.plotting import figure, ColumnDataSource\n", | |
| "from panel import Column\n", | |
| "from pandas import to_datetime, DateOffset\n", | |
| "\n", | |
| "cds = ColumnDataSource(df)\n", | |
| "p = figure(\n", | |
| " width=1000, height=500, x_axis_type='datetime', y_range=[0, 110],\n", | |
| " x_range=[df.index.min(), df.index.min() + DateOffset(years=1, days=-1)],\n", | |
| ")\n", | |
| "p.vbar(x='date', bottom='temperature_min', top='temperature_max', source=cds, width=(24 * 60 * 60 * 1000))\n", | |
| "\n", | |
| "range_p = figure(\n", | |
| " width=p.width, height=p.height // 2, x_axis_type='datetime', y_range=[0, 110],\n", | |
| " x_range=[df.index.min(), df.index.max()],\n", | |
| ")\n", | |
| "range_p.vbar(x='date', bottom='temperature_min', top='temperature_max', source=cds, width=(24 * 60 * 60 * 1000))\n", | |
| "\n", | |
| "rangetool = RangeTool(x_range=p.x_range)\n", | |
| "range_p.add_tools(rangetool)\n", | |
| "\n", | |
| "Column(p, range_p).servable()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "ee6be309", | |
| "metadata": {}, | |
| "source": [ | |
| "**Looking for more resources to help get you started?** - try recreating examples from the documentation for these tools\n", | |
| "(see **Useful Links** below), or follow along with a tutorial for a tool of your choice!\n", | |
| "\n", | |
| "## Useful Links\n", | |
| "\n", | |
| "### Conceptual Guides\n", | |
| "\n", | |
| "[Data to Viz](https://www.data-to-viz.com/) provides a flowchart-style to the\n", | |
| "types of charts one can create given various types of data.\n", | |
| "\n", | |
| "[R Graph Gallery](https://r-graph-gallery.com/) and its counter part [Python Graph Gallery](https://python-graph-gallery.com/) provide a vast number of high quality charts written in either R or Python.\n", | |
| "\n", | |
| "### Tools\n", | |
| "\n", | |
| "**Matplotlib**\n", | |
| "- Tutorial: https://matplotlib.org/stable/tutorials/index.html\n", | |
| "- Cheatsheets: https://matplotlib.org/cheatsheets/\n", | |
| "- Examples: https://matplotlib.org/stable/gallery/index.html\n", | |
| "\n", | |
| "**Plotnine**\n", | |
| "- Tutorial: http://r-statistics.co/Complete-Ggplot2-Tutorial-Part1-With-R-Code.html (note that plotnine does not have official tutorials, so please refer to ggplot2)\n", | |
| "- Examples: https://plotnine.readthedocs.io/en/stable/gallery.html#\n", | |
| "\n", | |
| "**Bokeh**\n", | |
| "- Tutorial: https://docs.bokeh.org/en/latest/docs/first_steps.html#first-steps\n", | |
| "- Examples: https://docs.bokeh.org/en/latest/docs/gallery.html#gallery\n", | |
| "\n", | |
| "**Seaborn**\n", | |
| "- Tutorial: https://seaborn.pydata.org/tutorial/introduction.html\n", | |
| "- Examples: https://seaborn.pydata.org/examples/index.html\n", | |
| "\n", | |
| "## Your Turn…\n", | |
| "\n", | |
| "Take any of the tools we have discussed today and make 1 static or 1 interactive (web) chart.\n", | |
| "Remember before you start, think about what you want to create? Something exploratory, communicative?\n", | |
| "\n", | |
| "### Suggested Starting Points\n", | |
| "\n", | |
| "*static* using data/weather_york.csv datasets\n", | |
| "- Explore: visualize as much of the data as possible, do you notice any trends?\n", | |
| "- Communicate: Select one interesting feature to highlight and create a chart\n", | |
| " that communiates that feature." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "7aa0b19a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from warnings import simplefilter\n", | |
| "simplefilter('ignore')\n", | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "df = (\n", | |
| " read_csv(\n", | |
| " 'https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/weather_york.csv',\n", | |
| " usecols=['date', 'temperature_max', 'temperature_min'],\n", | |
| " parse_dates=['date'],\n", | |
| " )\n", | |
| ")\n", | |
| "\n", | |
| "from matplotlib.pyplot import subplots, show\n", | |
| "from pandas import DateOffset\n", | |
| "\n", | |
| "interest = 2022\n", | |
| "\n", | |
| "fig, ax = subplots()\n", | |
| "for year, group in df.groupby(df['date'].dt.year):\n", | |
| " normed_year = group['date'] + DateOffset(year=1900)\n", | |
| " if year == interest:\n", | |
| " ax.plot(normed_year, group['temperature_max'], alpha=1, lw=1, color='tab:red')\n", | |
| " else:\n", | |
| " ax.plot(normed_year, group['temperature_max'], alpha=.1, lw=.5, color='gray')\n", | |
| "\n", | |
| "from matplotlib.dates import MonthLocator, DateFormatter\n", | |
| "from matplotlib.ticker import NullFormatter\n", | |
| "\n", | |
| "ax.xaxis.set_major_locator(MonthLocator())\n", | |
| "ax.xaxis.set_major_formatter(NullFormatter())\n", | |
| "ax.xaxis.set_minor_locator(MonthLocator(bymonthday=15))\n", | |
| "ax.xaxis.set_minor_formatter(DateFormatter('%b'))\n", | |
| "ax.xaxis.set_tick_params(which='both', length=0)\n", | |
| "\n", | |
| "ax.margins(x=0)\n", | |
| "\n", | |
| "from itertools import pairwise\n", | |
| "for i, (left, right) in enumerate(pairwise(ax.get_xticks())):\n", | |
| " if i % 2 == 0:\n", | |
| " ax.axvspan(left, right, 0, 1, color='gainsboro', alpha=.2)\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "b9ae3180", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from warnings import simplefilter\n", | |
| "simplefilter('ignore')\n", | |
| "from pandas import read_csv\n", | |
| "\n", | |
| "df = (\n", | |
| " read_csv(\n", | |
| " 'https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/weather_york.csv',\n", | |
| " usecols=['date', 'temperature_max', 'temperature_min'],\n", | |
| " parse_dates=['date'],\n", | |
| " )\n", | |
| " .assign(\n", | |
| " year= lambda d: d['date'].dt.year,\n", | |
| " month=lambda d: d['date'].dt.month,\n", | |
| " day= lambda d: d['date'].dt.day,\n", | |
| " )\n", | |
| ")\n", | |
| "\n", | |
| "ref_years = (1960, 1990)\n", | |
| "show_year = 2002\n", | |
| "\n", | |
| "ref_raw = df.loc[lambda d: d['year'].between(*ref_years)]\n", | |
| "show_raw = df.loc[lambda d: d['year'] == show_year]\n", | |
| "\n", | |
| "plot_df = (\n", | |
| " ref_raw.groupby(['month', 'day'])['temperature_max'].agg(\n", | |
| " ref_ub =lambda g: g.quantile(.95),\n", | |
| " ref_lb =lambda g: g.quantile(.05),\n", | |
| " ref_mean='mean',\n", | |
| " )\n", | |
| " .merge(show_raw, left_index=True, right_on=['month', 'day'])\n", | |
| " .assign(\n", | |
| " distance=lambda d: d['temperature_max'] - d['ref_mean'],\n", | |
| " norm_distance=lambda d: d['distance'] / d['distance'].abs().max()\n", | |
| " )\n", | |
| ")\n", | |
| "\n", | |
| "from matplotlib.pyplot import subplots, show\n", | |
| "from matplotlib.colors import TwoSlopeNorm\n", | |
| "\n", | |
| "fig, ax = subplots(gridspec_kw={'top': .8, 'bottom': .2})\n", | |
| "lb_line, = ax.plot('date', 'ref_lb', color='k', data=plot_df, ls='--', lw=.7, zorder=6)\n", | |
| "ub_line, = ax.plot('date', 'ref_ub', color='k', data=plot_df, ls='--', lw=.7, zorder=6)\n", | |
| "context_pc = ax.fill_between(\n", | |
| " 'date', 'ref_lb', 'ref_ub', data=plot_df, color='gainsboro', ec='none', zorder=5\n", | |
| ")\n", | |
| "avg_line = ax.plot('date', 'ref_mean', data=plot_df, color='k', lw=1, zorder=6)\n", | |
| "\n", | |
| "raw_pc = ax.fill_between(\n", | |
| " 'date', 'temperature_max', 'ref_mean', data=plot_df, fc='none', ec=ax.get_facecolor(), zorder=5, lw=.1\n", | |
| ")\n", | |
| "\n", | |
| "arr = raw_pc.get_paths()[0].vertices\n", | |
| "(x0, y0), (x1, y1) = arr.min(axis=0), arr.max(axis=0)\n", | |
| "\n", | |
| "gradient = ax.imshow(\n", | |
| " plot_df['norm_distance'].to_numpy().reshape(1, -1),\n", | |
| " extent=[x0, x1, y0, y1],\n", | |
| " aspect='auto',\n", | |
| " cmap='RdBu_r',\n", | |
| " norm=TwoSlopeNorm(0),\n", | |
| " interpolation='bicubic',\n", | |
| " zorder=5,\n", | |
| ")\n", | |
| "\n", | |
| "gradient.set_clip_path(raw_pc.get_paths()[0], transform=ax.transData)\n", | |
| "\n", | |
| "from matplotlib.dates import MonthLocator, DateFormatter\n", | |
| "from matplotlib.ticker import NullFormatter\n", | |
| "\n", | |
| "ax.xaxis.set_major_locator(MonthLocator())\n", | |
| "ax.xaxis.set_major_formatter(NullFormatter())\n", | |
| "ax.xaxis.set_minor_locator(MonthLocator(bymonthday=15))\n", | |
| "ax.xaxis.set_minor_formatter(DateFormatter('%b'))\n", | |
| "ax.xaxis.set_tick_params(which='both', length=0)\n", | |
| "\n", | |
| "ax.margins(x=0)\n", | |
| "\n", | |
| "from itertools import pairwise\n", | |
| "for i, (left, right) in enumerate(pairwise(ax.get_xticks())):\n", | |
| " if i % 2 == 0:\n", | |
| " ax.axvspan(left, right, 0, 1, color='gainsboro', alpha=.2)\n", | |
| "\n", | |
| "\n", | |
| "from flexitext import flexitext\n", | |
| "\n", | |
| "flexitext(\n", | |
| " s=(\n", | |
| " f'<size:medium,weight:bold>{show_year}</> <color:tab:red>Hot</> and <color:tab:blue>Cold</>'\n", | |
| " ' Temperature deviations from historical average in planet York'\n", | |
| " ),\n", | |
| " x=0, y=1.01,\n", | |
| " va='bottom',\n", | |
| " ax=ax\n", | |
| ")\n", | |
| "\n", | |
| "show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "5ed04a51", | |
| "metadata": {}, | |
| "source": [ | |
| "*interactive* Using the failures.csv dataset,\n", | |
| " - Plot the total failures for each 'player' for each day.\n", | |
| " - Apply a smoothing factor (rolling average) of 90 days prior to plotting the data.\n", | |
| " - Create a slider widget that control the number of days involved in the smoothing.\n", | |
| " - e.g. this slider should allow me to apply 0 days of smoothing all the way up to 90 days of smoothing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "15b1a115", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from matplotlib.pyplot import subplots, show\n", | |
| "from pandas import read_csv, to_datetime\n", | |
| "\n", | |
| "df = (\n", | |
| " read_csv(\n", | |
| " 'https://raw.githubusercontent.com/dutc-io/agu-data-viz/main/data/failures.csv',\n", | |
| " parse_dates=['date'],\n", | |
| " )\n", | |
| ")\n", | |
| "\n", | |
| "print(df.head())" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "jupytext": { | |
| "cell_metadata_filter": "-all", | |
| "main_language": "python", | |
| "notebook_metadata_filter": "-all" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment