Last active
October 27, 2020 17:45
-
-
Save immuntasir/b1d193a06e136f6f219bbf700331181c to your computer and use it in GitHub Desktop.
How to use Github Search API to download the repositories that use a particular library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import requests\n", | |
| "from tqdm import tqdm\n", | |
| "import pandas as pd\n", | |
| "import time\n", | |
| "from github import Github" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with open('../../api_keys/github.txt', \"r\") as f:\n", | |
| " API_KEY = f.read()\n", | |
| " \n", | |
| "headers = {'Authorization': 'token %s' % API_KEY}\n", | |
| " \n", | |
| "REPO_DIR_PARENT = '../../data/package_popularity/numpy/clones/'\n", | |
| "\n", | |
| "LIBRARY = 'numpy'\n", | |
| "LANGUAGE = 'python'\n", | |
| "\n", | |
| "URL = 'https://api.github.com/search/repositories?q=%s+language:%s&sort=stars&order=desc&page=' % (LIBRARY, LANGUAGE)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "dict_keys(['total_count', 'incomplete_results', 'items'])\n", | |
| "Total Repositories: 10854\n", | |
| "Total number of items in a page: 30\n", | |
| "Keys in a item: dict_keys(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url', 'description', 'fork', 'url', 'forks_url', 'keys_url', 'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url', 'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url', 'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url', 'languages_url', 'stargazers_url', 'contributors_url', 'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url', 'comments_url', 'issue_comment_url', 'contents_url', 'compare_url', 'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url', 'milestones_url', 'notifications_url', 'labels_url', 'releases_url', 'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url', 'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size', 'stargazers_count', 'watchers_count', 'language', 'has_issues', 'has_projects', 'has_downloads', 'has_wiki', 'has_pages', 'forks_count', 'mirror_url', 'archived', 'disabled', 'open_issues_count', 'license', 'forks', 'open_issues', 'watchers', 'default_branch', 'permissions', 'score'])\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "r = requests.get(URL + '1', headers=headers)\n", | |
| "json_response = r.json()\n", | |
| "\n", | |
| "print(json_response.keys())\n", | |
| "print('Total Repositories:', json_response['total_count'])\n", | |
| "print('Total number of items in a page:', len(json_response['items']))\n", | |
| "\n", | |
| "print('Keys in a item:', json_response['items'][0].keys())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|██████████| 35/35 [08:17<00:00, 14.22s/it]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "keys = ['name', 'full_name', 'html_url', 'clone_url', 'size', 'stargazers_count']\n", | |
| "NUMBER_OF_PAGES_TO_ITERATE = 35\n", | |
| "\n", | |
| "repo_dict = dict([(key, []) for key in keys])\n", | |
| "\n", | |
| "for page_num in tqdm(range(0, 35)):\n", | |
| " r = requests.get(URL + str(page_num))\n", | |
| " contents = r.json()\n", | |
| " \n", | |
| " for item in contents['items']:\n", | |
| " for key in keys:\n", | |
| " repo_dict[key].append(item[key])\n", | |
| " \n", | |
| " if page_num % 5 == 0:\n", | |
| " time.sleep(60)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>name</th>\n", | |
| " <th>full_name</th>\n", | |
| " <th>html_url</th>\n", | |
| " <th>clone_url</th>\n", | |
| " <th>size</th>\n", | |
| " <th>stargazers_count</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>data-science-ipython-notebooks</td>\n", | |
| " <td>donnemartin/data-science-ipython-notebooks</td>\n", | |
| " <td>https://github.com/donnemartin/data-science-ip...</td>\n", | |
| " <td>https://github.com/donnemartin/data-science-ip...</td>\n", | |
| " <td>49025</td>\n", | |
| " <td>19568</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>ML-From-Scratch</td>\n", | |
| " <td>eriklindernoren/ML-From-Scratch</td>\n", | |
| " <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n", | |
| " <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n", | |
| " <td>553</td>\n", | |
| " <td>16847</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>numpy</td>\n", | |
| " <td>numpy/numpy</td>\n", | |
| " <td>https://github.com/numpy/numpy</td>\n", | |
| " <td>https://github.com/numpy/numpy.git</td>\n", | |
| " <td>84293</td>\n", | |
| " <td>15014</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>jax</td>\n", | |
| " <td>google/jax</td>\n", | |
| " <td>https://github.com/google/jax</td>\n", | |
| " <td>https://github.com/google/jax.git</td>\n", | |
| " <td>28075</td>\n", | |
| " <td>9795</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>numpy-ml</td>\n", | |
| " <td>ddbourgin/numpy-ml</td>\n", | |
| " <td>https://github.com/ddbourgin/numpy-ml</td>\n", | |
| " <td>https://github.com/ddbourgin/numpy-ml.git</td>\n", | |
| " <td>10416</td>\n", | |
| " <td>8963</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " name full_name \\\n", | |
| "0 data-science-ipython-notebooks donnemartin/data-science-ipython-notebooks \n", | |
| "1 ML-From-Scratch eriklindernoren/ML-From-Scratch \n", | |
| "2 numpy numpy/numpy \n", | |
| "3 jax google/jax \n", | |
| "4 numpy-ml ddbourgin/numpy-ml \n", | |
| "\n", | |
| " html_url \\\n", | |
| "0 https://github.com/donnemartin/data-science-ip... \n", | |
| "1 https://github.com/eriklindernoren/ML-From-Scr... \n", | |
| "2 https://github.com/numpy/numpy \n", | |
| "3 https://github.com/google/jax \n", | |
| "4 https://github.com/ddbourgin/numpy-ml \n", | |
| "\n", | |
| " clone_url size stargazers_count \n", | |
| "0 https://github.com/donnemartin/data-science-ip... 49025 19568 \n", | |
| "1 https://github.com/eriklindernoren/ML-From-Scr... 553 16847 \n", | |
| "2 https://github.com/numpy/numpy.git 84293 15014 \n", | |
| "3 https://github.com/google/jax.git 28075 9795 \n", | |
| "4 https://github.com/ddbourgin/numpy-ml.git 10416 8963 " | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "repo_df = pd.DataFrame(repo_dict)\n", | |
| "repo_df.to_csv('../../data/package_popularity/numpy/repo_info.csv')\n", | |
| "\n", | |
| "repo_df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment