Last active
October 28, 2024 10:37
-
-
Save qgallouedec/86f9a53111b9cca7194f4d845dea7949 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| from datasets import load_dataset | |
| # Helper function to filter data based on time range | |
| def filter_date_range(df, date_col, start_date, end_date): | |
| return df[(df[date_col] >= start_date) & (df[date_col] < end_date)] | |
| # Get the current time | |
| now = datetime.now() | |
| # Define the time periods for short-term (2 weeks) | |
| one_week_ago = now - timedelta(weeks=1) | |
| two_weeks_ago = now - timedelta(weeks=2) | |
| three_weeks_ago = now - timedelta(weeks=3) | |
| four_weeks_ago = now - timedelta(weeks=4) | |
| five_weeks_ago = now - timedelta(weeks=5) | |
| # Define the time periods for long-term (4 months) | |
| four_months_ago = now - timedelta(weeks=16) | |
| eight_months_ago = now - timedelta(weeks=32) | |
| # Load datasets | |
| issues_df = load_dataset("qgallouedec/trl-metrics", "issues", split="train").to_pandas() | |
| comments_df = load_dataset("qgallouedec/trl-metrics", "issue_comments", split="train").to_pandas() | |
| stargazers_df = load_dataset("qgallouedec/trl-metrics", "stargazers", split="train").to_pandas() | |
| downloads_df = load_dataset("qgallouedec/trl-metrics", "pypi-downloads", split="train").to_pandas() | |
| models_df = load_dataset("qgallouedec/trl-metrics", "models", split="train").to_pandas() | |
| # Convert stargazers_df "starred_at" and models_df "created_at" to naive datetime | |
| stargazers_df["starred_at"] = stargazers_df["starred_at"].dt.tz_convert(None) | |
| models_df["created_at"] = models_df["created_at"].dt.tz_convert(None) | |
| def calculate_issues_stats(start_date, end_date): | |
| # Filter issues in the given period | |
| issues = filter_date_range(issues_df, "created_at", start_date, end_date) | |
| # Get the first comment for each issue | |
| first_comments = comments_df.groupby("issue_number").first().reset_index() | |
| # Merge to find the response time | |
| issues = issues.merge( | |
| first_comments[["issue_number", "created_at"]], | |
| left_on="number", | |
| right_on="issue_number", | |
| how="left", | |
| suffixes=("", "_first_comment"), | |
| ) | |
| # Calculate the response time in days | |
| issues["response_time"] = (issues["created_at_first_comment"] - issues["created_at"]).dt.total_seconds() / (3600 * 24) | |
| # Count issues not answered within a week | |
| unanswered_issues_count = len(issues[issues["response_time"].isna() | (issues["response_time"] > 7)]) | |
| return unanswered_issues_count | |
| def calculate_stargazers_stats(start_date, end_date): | |
| return len(filter_date_range(stargazers_df, "starred_at", start_date, end_date)) | |
| def calculate_downloads_stats(start_date, end_date): | |
| return downloads_df[(downloads_df["day"] >= start_date.date()) & (downloads_df["day"] < end_date.date())]["num_downloads"].sum() | |
| def calculate_models_stats(start_date, end_date): | |
| return len(filter_date_range(models_df, "created_at", start_date, end_date)) | |
| def calculate_model_likes_stats(): | |
| # Count models in the given period | |
| return models_df["likes"].sum() | |
| ### Short-Term (2 Weeks) Stats ### | |
| unanswered_issues_short_term = calculate_issues_stats(three_weeks_ago, one_week_ago) | |
| unanswered_issues_short_term_prev = calculate_issues_stats(five_weeks_ago, three_weeks_ago) | |
| stargazers_short_term = calculate_stargazers_stats(two_weeks_ago, now) | |
| stargazers_short_term_prev = calculate_stargazers_stats(four_weeks_ago, two_weeks_ago) | |
| downloads_short_term = calculate_downloads_stats(two_weeks_ago, now) | |
| downloads_short_term_prev = calculate_downloads_stats(four_weeks_ago, two_weeks_ago) | |
| models_short_term = calculate_models_stats(two_weeks_ago, now) | |
| models_short_term_prev = calculate_models_stats(four_weeks_ago, two_weeks_ago) | |
| ### Long-Term (4 Months) Stats ### | |
| unanswered_issues_long_term = calculate_issues_stats(four_months_ago, now) | |
| unanswered_issues_long_term_prev = calculate_issues_stats(eight_months_ago, four_months_ago) | |
| stargazers_long_term = calculate_stargazers_stats(four_months_ago, now) | |
| stargazers_long_term_prev = calculate_stargazers_stats(eight_months_ago, four_months_ago) | |
| downloads_long_term = calculate_downloads_stats(four_months_ago, now) | |
| downloads_long_term_prev = calculate_downloads_stats(eight_months_ago, four_months_ago) | |
| models_long_term = calculate_models_stats(four_months_ago, now) | |
| models_long_term_prev = calculate_models_stats(eight_months_ago, four_months_ago) | |
| ### Total Stats ### | |
| stargazers_total = stargazers_df.shape[0] | |
| downloads_total = downloads_df["num_downloads"].sum() | |
| models_total = models_df.shape[0] | |
| ### Calculate Relative Changes ### | |
| def calculate_relative_change(current, previous): | |
| return (current - previous) / previous * 100 if previous != 0 else float("inf") | |
| unanswered_issues_short_term_change = calculate_relative_change(unanswered_issues_short_term, unanswered_issues_short_term_prev) | |
| stargazers_short_term_change = calculate_relative_change(stargazers_short_term, stargazers_short_term_prev) | |
| downloads_short_term_change = calculate_relative_change(downloads_short_term, downloads_short_term_prev) | |
| models_short_term_change = calculate_relative_change(models_short_term, models_short_term_prev) | |
| unanswered_issues_long_term_change = calculate_relative_change(unanswered_issues_long_term, unanswered_issues_long_term_prev) | |
| stargazers_long_term_change = calculate_relative_change(stargazers_long_term, stargazers_long_term_prev) | |
| downloads_long_term_change = calculate_relative_change(downloads_long_term, downloads_long_term_prev) | |
| models_long_term_change = calculate_relative_change(models_long_term, models_long_term_prev) | |
| unanswered_issues_short_term_emoji = "π΄" if unanswered_issues_short_term_change > 0 else "π’" | |
| stargazers_short_term_emoji = "π΄" if stargazers_short_term_change < 0 else "π’" | |
| downloads_short_term_emoji = "π΄" if downloads_short_term_change < 0 else "π’" | |
| models_short_term_emoji = "π΄" if models_short_term_change < 0 else "π’" | |
| unanswered_issues_long_term_emoji = "π΄" if unanswered_issues_long_term_change > 0 else "π’" | |
| stargazers_long_term_emoji = "π΄" if stargazers_long_term_change < 0 else "π’" | |
| downloads_long_term_emoji = "π΄" if downloads_long_term_change < 0 else "π’" | |
| models_long_term_emoji = "π΄" if models_long_term_change < 0 else "π’" | |
| model_likes_short_term_emoji = "XXX" | |
| model_likes_short_term = "XXX" | |
| model_likes_short_term_prev = "XXX" | |
| model_likes_short_term_change = "XXX" | |
| model_likes_long_term_emoji = "XXX" | |
| model_likes_long_term = "XXX" | |
| model_likes_long_term_prev = "XXX" | |
| model_likes_long_term_change = "XXX" | |
| model_likes_total = calculate_model_likes_stats() | |
| ### Generate the Report ### | |
| today = now.strftime("%Y-%m") | |
| report = f""" | |
| :trl: | |
| *Monthly [TRL](https://github.com/huggingface/trl) Metrics Report* | |
| :date: {today} | |
| *:ultrafast_parrot: Short-Term* (Last 2 Weeks) | |
| - {downloads_short_term_emoji} PyPI downloads: {downloads_short_term / 1_000:.1f}K (prev: {downloads_short_term_prev / 1_000:.1f}K, change: {downloads_short_term_change:+.2f}%) | |
| - {stargazers_short_term_emoji} New GH :star:: {stargazers_short_term} (prev: {stargazers_short_term_prev}, change: {stargazers_short_term_change:+.2f}%) | |
| - {models_short_term_emoji} New models on :hugging_face: Hub: {models_short_term} (prev: {models_short_term_prev}, change: {models_short_term_change:+.2f}%) | |
| - {model_likes_short_term_emoji} New likes on :hugging_face: Hub: {model_likes_short_term} (prev: {model_likes_short_term_prev}, change: {model_likes_short_term_change}%) | |
| - {unanswered_issues_short_term_emoji} Issues not answered within a week: {unanswered_issues_short_term} (prev: {unanswered_issues_short_term_prev}, change: {unanswered_issues_short_term_change:+.2f}%) | |
| *:60fps_parrot: Long-Term* (Last 4 Months) | |
| - {downloads_long_term_emoji} PyPI downloads: {downloads_long_term / 1_000_000:.1f}M (prev: {downloads_long_term_prev/1_000_000:.1f}M, change: {downloads_long_term_change:+.2f}%, total: {downloads_total/1_000_000:.1f}M) | |
| - {stargazers_long_term_emoji} New GH :star:: {stargazers_long_term} (prev: {stargazers_long_term_prev}, change: {stargazers_long_term_change:+.2f}%, total: {stargazers_total}) | |
| - {models_long_term_emoji} New models on :hugging_face: Hub: {models_long_term} (prev: {models_long_term_prev}, change: {models_long_term_change:+.2f}%, total: {models_total}) | |
| - {model_likes_long_term_emoji} New likes on :hugging_face: Hub: {model_likes_long_term} (prev: {model_likes_long_term_prev}, change: {model_likes_long_term_change}%, total: {model_likes_total}) | |
| - {unanswered_issues_long_term_emoji} Issues not answered within a week: {unanswered_issues_long_term} (prev: {unanswered_issues_long_term_prev}, change: {unanswered_issues_long_term_change:+.2f}%) | |
| Further comments and analysis :arrow_down: | |
| [Code for generating this report](https://gist.github.com/qgallouedec/86f9a53111b9cca7194f4d845dea7949) | |
| """ | |
| print(report) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment