Skip to content

Instantly share code, notes, and snippets.

@thomasht86
Created September 4, 2025 05:29
Show Gist options
  • Select an option

  • Save thomasht86/e7749da4d8c9d6be7bde3e3d974ae998 to your computer and use it in GitHub Desktop.

Select an option

Save thomasht86/e7749da4d8c9d6be7bde3e3d974ae998 to your computer and use it in GitHub Desktop.
Create monthly aggregated pypi downloads plot
# /// script
# requires-python = ">=3.9"
# dependencies = [
# "matplotlib",
# "pandas",
# "requests",
# ]
# ///
# Create a standalone PNG with just the monthly downloads chart
import json
import sys
import matplotlib.pyplot as plt
import pandas as pd
import requests
def download_pypi_data(package_name):
"""Download PyPI stats data for a given package."""
url = f"https://pypistats.org/api/packages/{package_name}/overall"
print(f"Downloading data from: {url}")
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"Error downloading data: {e}")
sys.exit(1)
def main():
if len(sys.argv) != 2:
print("Usage: python pypiplot.py <package_name>")
print("Example: python pypiplot.py pyvespa")
sys.exit(1)
package_name = sys.argv[1]
# Download data
j = download_pypi_data(package_name)
# Save data locally for reference
data_file = f"{package_name}_pypidata.json"
with open(data_file, "w") as f:
json.dump(j, f, indent=2)
print(f"Data saved to: {data_file}")
if __name__ == "__main__":
main()
# Load data (either from downloaded file or existing file for compatibility)
if len(sys.argv) == 2:
package_name = sys.argv[1]
data_file = f"{package_name}_pypidata.json"
with open(data_file, "r") as f:
j = json.load(f)
else:
# Fallback for when run without arguments (backward compatibility)
with open("pypidata.json", "r") as f:
j = json.load(f)
package_name = "pyvespa"
df = pd.DataFrame(j["data"])
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date")
# Filter from April 1 of the latest year
last_date = df["date"].max()
start_date = pd.Timestamp(year=last_date.year, month=4, day=1)
df = df[df["date"] >= start_date]
# Pivot and compute monthly sums for "without_mirrors" (demand proxy)
pv = df.pivot_table(
index="date", columns="category", values="downloads", aggfunc="sum"
).fillna(0)
if "without_mirrors" not in pv.columns:
pv["without_mirrors"] = 0
monthly = pv["without_mirrors"].resample("MS").sum()
# Plot in the same minimal Tufte-inspired style used before
plt.figure(figsize=(8, 3), dpi=180)
bars = plt.bar(monthly.index, monthly.values)
for i, v in enumerate(monthly.values):
plt.text(monthly.index[i], v, f"{int(v):,}", ha="center", va="bottom", fontsize=8)
for spine in plt.gca().spines.values():
spine.set_visible(False)
plt.xticks(
monthly.index, [d.strftime("%b %Y") for d in monthly.index], rotation=0, fontsize=8
)
plt.yticks([])
plt.title(
f"{package_name} monthly downloads (without mirrors)", loc="left", fontsize=11
)
plt.tight_layout()
out_path = f"{package_name}_monthly_downloads_apr1.png"
plt.savefig(out_path, bbox_inches="tight")
print(f"Chart saved to: {out_path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment