Skip to content

Instantly share code, notes, and snippets.

@qgallouedec
Last active October 28, 2024 10:37
Show Gist options
  • Select an option

  • Save qgallouedec/86f9a53111b9cca7194f4d845dea7949 to your computer and use it in GitHub Desktop.

Select an option

Save qgallouedec/86f9a53111b9cca7194f4d845dea7949 to your computer and use it in GitHub Desktop.
import pandas as pd
from datetime import datetime, timedelta
from datasets import load_dataset
# Helper function to filter data based on time range
def filter_date_range(df, date_col, start_date, end_date):
return df[(df[date_col] >= start_date) & (df[date_col] < end_date)]
# Get the current time
now = datetime.now()
# Define the time periods for short-term (2 weeks)
one_week_ago = now - timedelta(weeks=1)
two_weeks_ago = now - timedelta(weeks=2)
three_weeks_ago = now - timedelta(weeks=3)
four_weeks_ago = now - timedelta(weeks=4)
five_weeks_ago = now - timedelta(weeks=5)
# Define the time periods for long-term (4 months)
four_months_ago = now - timedelta(weeks=16)
eight_months_ago = now - timedelta(weeks=32)
# Load datasets
issues_df = load_dataset("qgallouedec/trl-metrics", "issues", split="train").to_pandas()
comments_df = load_dataset("qgallouedec/trl-metrics", "issue_comments", split="train").to_pandas()
stargazers_df = load_dataset("qgallouedec/trl-metrics", "stargazers", split="train").to_pandas()
downloads_df = load_dataset("qgallouedec/trl-metrics", "pypi-downloads", split="train").to_pandas()
models_df = load_dataset("qgallouedec/trl-metrics", "models", split="train").to_pandas()
# Convert stargazers_df "starred_at" and models_df "created_at" to naive datetime
stargazers_df["starred_at"] = stargazers_df["starred_at"].dt.tz_convert(None)
models_df["created_at"] = models_df["created_at"].dt.tz_convert(None)
def calculate_issues_stats(start_date, end_date):
# Filter issues in the given period
issues = filter_date_range(issues_df, "created_at", start_date, end_date)
# Get the first comment for each issue
first_comments = comments_df.groupby("issue_number").first().reset_index()
# Merge to find the response time
issues = issues.merge(
first_comments[["issue_number", "created_at"]],
left_on="number",
right_on="issue_number",
how="left",
suffixes=("", "_first_comment"),
)
# Calculate the response time in days
issues["response_time"] = (issues["created_at_first_comment"] - issues["created_at"]).dt.total_seconds() / (3600 * 24)
# Count issues not answered within a week
unanswered_issues_count = len(issues[issues["response_time"].isna() | (issues["response_time"] > 7)])
return unanswered_issues_count
def calculate_stargazers_stats(start_date, end_date):
return len(filter_date_range(stargazers_df, "starred_at", start_date, end_date))
def calculate_downloads_stats(start_date, end_date):
return downloads_df[(downloads_df["day"] >= start_date.date()) & (downloads_df["day"] < end_date.date())]["num_downloads"].sum()
def calculate_models_stats(start_date, end_date):
return len(filter_date_range(models_df, "created_at", start_date, end_date))
def calculate_model_likes_stats():
# Count models in the given period
return models_df["likes"].sum()
### Short-Term (2 Weeks) Stats ###
unanswered_issues_short_term = calculate_issues_stats(three_weeks_ago, one_week_ago)
unanswered_issues_short_term_prev = calculate_issues_stats(five_weeks_ago, three_weeks_ago)
stargazers_short_term = calculate_stargazers_stats(two_weeks_ago, now)
stargazers_short_term_prev = calculate_stargazers_stats(four_weeks_ago, two_weeks_ago)
downloads_short_term = calculate_downloads_stats(two_weeks_ago, now)
downloads_short_term_prev = calculate_downloads_stats(four_weeks_ago, two_weeks_ago)
models_short_term = calculate_models_stats(two_weeks_ago, now)
models_short_term_prev = calculate_models_stats(four_weeks_ago, two_weeks_ago)
### Long-Term (4 Months) Stats ###
unanswered_issues_long_term = calculate_issues_stats(four_months_ago, now)
unanswered_issues_long_term_prev = calculate_issues_stats(eight_months_ago, four_months_ago)
stargazers_long_term = calculate_stargazers_stats(four_months_ago, now)
stargazers_long_term_prev = calculate_stargazers_stats(eight_months_ago, four_months_ago)
downloads_long_term = calculate_downloads_stats(four_months_ago, now)
downloads_long_term_prev = calculate_downloads_stats(eight_months_ago, four_months_ago)
models_long_term = calculate_models_stats(four_months_ago, now)
models_long_term_prev = calculate_models_stats(eight_months_ago, four_months_ago)
### Total Stats ###
stargazers_total = stargazers_df.shape[0]
downloads_total = downloads_df["num_downloads"].sum()
models_total = models_df.shape[0]
### Calculate Relative Changes ###
def calculate_relative_change(current, previous):
return (current - previous) / previous * 100 if previous != 0 else float("inf")
unanswered_issues_short_term_change = calculate_relative_change(unanswered_issues_short_term, unanswered_issues_short_term_prev)
stargazers_short_term_change = calculate_relative_change(stargazers_short_term, stargazers_short_term_prev)
downloads_short_term_change = calculate_relative_change(downloads_short_term, downloads_short_term_prev)
models_short_term_change = calculate_relative_change(models_short_term, models_short_term_prev)
unanswered_issues_long_term_change = calculate_relative_change(unanswered_issues_long_term, unanswered_issues_long_term_prev)
stargazers_long_term_change = calculate_relative_change(stargazers_long_term, stargazers_long_term_prev)
downloads_long_term_change = calculate_relative_change(downloads_long_term, downloads_long_term_prev)
models_long_term_change = calculate_relative_change(models_long_term, models_long_term_prev)
unanswered_issues_short_term_emoji = "πŸ”΄" if unanswered_issues_short_term_change > 0 else "🟒"
stargazers_short_term_emoji = "πŸ”΄" if stargazers_short_term_change < 0 else "🟒"
downloads_short_term_emoji = "πŸ”΄" if downloads_short_term_change < 0 else "🟒"
models_short_term_emoji = "πŸ”΄" if models_short_term_change < 0 else "🟒"
unanswered_issues_long_term_emoji = "πŸ”΄" if unanswered_issues_long_term_change > 0 else "🟒"
stargazers_long_term_emoji = "πŸ”΄" if stargazers_long_term_change < 0 else "🟒"
downloads_long_term_emoji = "πŸ”΄" if downloads_long_term_change < 0 else "🟒"
models_long_term_emoji = "πŸ”΄" if models_long_term_change < 0 else "🟒"
model_likes_short_term_emoji = "XXX"
model_likes_short_term = "XXX"
model_likes_short_term_prev = "XXX"
model_likes_short_term_change = "XXX"
model_likes_long_term_emoji = "XXX"
model_likes_long_term = "XXX"
model_likes_long_term_prev = "XXX"
model_likes_long_term_change = "XXX"
model_likes_total = calculate_model_likes_stats()
### Generate the Report ###
today = now.strftime("%Y-%m")
report = f"""
:trl:
*Monthly [TRL](https://github.com/huggingface/trl) Metrics Report*
:date: {today}
*:ultrafast_parrot: Short-Term* (Last 2 Weeks)
- {downloads_short_term_emoji} PyPI downloads: {downloads_short_term / 1_000:.1f}K (prev: {downloads_short_term_prev / 1_000:.1f}K, change: {downloads_short_term_change:+.2f}%)
- {stargazers_short_term_emoji} New GH :star:: {stargazers_short_term} (prev: {stargazers_short_term_prev}, change: {stargazers_short_term_change:+.2f}%)
- {models_short_term_emoji} New models on :hugging_face: Hub: {models_short_term} (prev: {models_short_term_prev}, change: {models_short_term_change:+.2f}%)
- {model_likes_short_term_emoji} New likes on :hugging_face: Hub: {model_likes_short_term} (prev: {model_likes_short_term_prev}, change: {model_likes_short_term_change}%)
- {unanswered_issues_short_term_emoji} Issues not answered within a week: {unanswered_issues_short_term} (prev: {unanswered_issues_short_term_prev}, change: {unanswered_issues_short_term_change:+.2f}%)
*:60fps_parrot: Long-Term* (Last 4 Months)
- {downloads_long_term_emoji} PyPI downloads: {downloads_long_term / 1_000_000:.1f}M (prev: {downloads_long_term_prev/1_000_000:.1f}M, change: {downloads_long_term_change:+.2f}%, total: {downloads_total/1_000_000:.1f}M)
- {stargazers_long_term_emoji} New GH :star:: {stargazers_long_term} (prev: {stargazers_long_term_prev}, change: {stargazers_long_term_change:+.2f}%, total: {stargazers_total})
- {models_long_term_emoji} New models on :hugging_face: Hub: {models_long_term} (prev: {models_long_term_prev}, change: {models_long_term_change:+.2f}%, total: {models_total})
- {model_likes_long_term_emoji} New likes on :hugging_face: Hub: {model_likes_long_term} (prev: {model_likes_long_term_prev}, change: {model_likes_long_term_change}%, total: {model_likes_total})
- {unanswered_issues_long_term_emoji} Issues not answered within a week: {unanswered_issues_long_term} (prev: {unanswered_issues_long_term_prev}, change: {unanswered_issues_long_term_change:+.2f}%)
Further comments and analysis :arrow_down:
[Code for generating this report](https://gist.github.com/qgallouedec/86f9a53111b9cca7194f4d845dea7949)
"""
print(report)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment