Created
October 19, 2022 11:32
-
-
Save bendichter/41a81a23ece06c74c868a470cc630370 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "a228faf2", | |
| "metadata": {}, | |
| "source": [ | |
| "This notebook uses the reporter API to get the project IDs of neuroscience projects and pmids of neuroscience papers. The BRAIN FOA list is used to find projects and papers funded by the BRAIN Initiative." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "6e901831", | |
| "metadata": {}, | |
| "source": [ | |
| "# 1. All NIH-funded neuroscience papers" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "5e0fbb6e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|███████████████████████████████████████████| 24/24 [00:42<00:00, 1.78s/it]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import requests\n", | |
| "\n", | |
| "import pandas as pd\n", | |
| "from tqdm import tqdm, trange\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "def paginate_query(endpoint, query, step_size=500):\n", | |
| " query.update(offset=0, limit=step_size)\n", | |
| " response = requests.post(endpoint, json=query).json()\n", | |
| " total = response[\"meta\"][\"total\"]\n", | |
| " results = response[\"results\"]\n", | |
| " \n", | |
| " for i in trange(step_size, total, step_size):\n", | |
| " query.update(offset=i)\n", | |
| " results += requests.post(endpoint, json=query).json()[\"results\"]\n", | |
| " \n", | |
| " return results\n", | |
| "\n", | |
| "def clean_project_num(x):\n", | |
| " return x[1:].split('-')[0]\n", | |
| "\n", | |
| " \n", | |
| "endpoint = \"https://api.reporter.nih.gov/v2/projects/search/\"\n", | |
| "query = {\n", | |
| " \"criteria\":\n", | |
| " {\n", | |
| " \"agencies\": [\"NINDS\", \"NIMH\"],\n", | |
| " \"project_start_date\": {\n", | |
| " \"from_date\": \"2019-09-30T12:09:00Z\",\n", | |
| " \"to_date\": \"2022-10-30T12:09:00Z\",\n", | |
| " }\n", | |
| " },\n", | |
| " \"include_fields\": [\n", | |
| "# \"ApplId\",\n", | |
| "# \"SubprojectId\",\n", | |
| " \"FiscalYear\",\n", | |
| "# \"Organization\",\n", | |
| " \"ProjectNum\",\n", | |
| " \"OrgCountry\",\n", | |
| " \"ProjectNumSplit\",\n", | |
| "# \"ContactPiName\",\n", | |
| "# \"AllText\",\n", | |
| "# \"FullStudySection\",\n", | |
| " \"ProjectStartDate\",\n", | |
| " \"ProjectEndDate\"\n", | |
| " ],\n", | |
| " \"sort_field\":\"project_start_date\",\n", | |
| " \"sort_order\":\"desc\",\n", | |
| " }\n", | |
| "\n", | |
| "results = paginate_query(endpoint, query)\n", | |
| "\n", | |
| "project_numbers = list({clean_project_num(x[\"project_num\"]) for x in results}) " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "b0c6b974", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "7029" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(project_numbers)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "b228085f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 0%| | 0/8 [00:00<?, ?it/s]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 12%|█████▋ | 1/8 [00:01<00:13, 1.92s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 25%|███████████▎ | 2/8 [00:04<00:12, 2.08s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 38%|████████████████▉ | 3/8 [00:06<00:10, 2.13s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 50%|██████████████████████▌ | 4/8 [00:08<00:08, 2.09s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 62%|████████████████████████████▏ | 5/8 [00:10<00:06, 2.06s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 75%|█████████████████████████████████▊ | 6/8 [00:12<00:04, 2.03s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 88%|███████████████████████████████████████▍ | 7/8 [00:14<00:02, 2.07s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| "100%|█████████████████████████████████████████████| 8/8 [00:15<00:00, 1.89s/it]\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "16055" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "endpoint = \"https://api.reporter.nih.gov/v2/publications/search/\"\n", | |
| "\n", | |
| "pub_query = dict(\n", | |
| " criteria=dict(core_project_nums=[x + '*' for x in project_numbers]),\n", | |
| " sortField=\"string\",\n", | |
| " sortOrder=\"string\",\n", | |
| ")\n", | |
| "\n", | |
| "results = []\n", | |
| "for i in trange(0, len(project_numbers), 1000):\n", | |
| " pub_query[\"criteria\"].update(core_project_nums=[x + '*' for x in project_numbers[i:(i+1000)]])\n", | |
| " results += paginate_query(endpoint, pub_query, 8000)\n", | |
| "\n", | |
| "nih_pmids = {result[\"pmid\"] for result in results}\n", | |
| "len(nih_pmids)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "f9119ec5", | |
| "metadata": {}, | |
| "source": [ | |
| "# 2. All BRAIN-funded papers" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "98e34452", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\r", | |
| " 0%| | 0/2 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1135\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| " 50%|██████████████████████▌ | 1/2 [00:02<00:02, 2.93s/it]\n", | |
| "0it [00:00, ?it/s]\u001b[A\n", | |
| "100%|█████████████████████████████████████████████| 2/2 [00:03<00:00, 1.85s/it]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "5501\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "\n", | |
| "fpath = \"/Users/bendichter/Downloads/funded_awards-2022-10-18T11-36-02.xlsx\"\n", | |
| "\n", | |
| "df = pd.read_excel(fpath)\n", | |
| "\n", | |
| "def clean1(x):\n", | |
| " if \">\" not in x:\n", | |
| " return x\n", | |
| " else:\n", | |
| " return x.split(\">\")[1][:-4]\n", | |
| "\n", | |
| "df = df[df[\"Project Number\"].apply(lambda x: not isinstance(x, float))]\n", | |
| "project_numbers = df[\"Project Number\"].apply(clean1)\n", | |
| "project_numbers = project_numbers.apply(clean_project_num)\n", | |
| "\n", | |
| "print(len(project_numbers))\n", | |
| "\n", | |
| "endpoint = \"https://api.reporter.nih.gov/v2/publications/search/\"\n", | |
| "\n", | |
| "results = []\n", | |
| "for i in trange(0, len(project_numbers), 1000):\n", | |
| " pub_query[\"criteria\"].update(core_project_nums=[x + '*' for x in project_numbers[i:(i+1000)]])\n", | |
| " results += paginate_query(endpoint, pub_query, 8000)\n", | |
| " \n", | |
| "brain_pmids = {result[\"pmid\"] for result in results}\n", | |
| "print(len(brain_pmids))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "c7a05305", | |
| "metadata": {}, | |
| "source": [ | |
| "# 3. Recent BRAIN-funded neurophys projects" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "b6de19bf", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data=\"\"\"1R01NS120819-01\n", | |
| "1R01NS123663-01\n", | |
| "1R01NS123916-01\n", | |
| "1R01NS122742-01\n", | |
| "1U01NS122082-01\n", | |
| "1R01NS120594-01\n", | |
| "1U01NS122040-01\n", | |
| "1R01NS121773-01\n", | |
| "1R01NS121913-01\n", | |
| "1R01NS123424-01\n", | |
| "1U01NS123658-01\n", | |
| "1R01NS120850-01\n", | |
| "1R34NS123819-01\n", | |
| "1R01NS123912-01\n", | |
| "1R01NS124592-01\n", | |
| "1R01NS124017-01\n", | |
| "1R34NS122272-01\n", | |
| "1R01NS123665-01\n", | |
| "1R34NS121898-01\n", | |
| "1U01NS122124-01\n", | |
| "1R34NS123913-01\n", | |
| "1R01NS123887-01\n", | |
| "1R01NS124564-01\n", | |
| "1U01NS120824-01\n", | |
| "1R01NS120832-01\n", | |
| "1UF1MH128337-01\n", | |
| "1R21EY033080-01\n", | |
| "1R01NS120289-01A1\n", | |
| "1R01NS123899-01\n", | |
| "1R01NS121874-01\n", | |
| "1R01NS123681-01\n", | |
| "1R01NS123890-01\n", | |
| "1R01NS123778-01\n", | |
| "1R01NS121772-01\n", | |
| "1R01NS121904-01\n", | |
| "1R01NS121764-01\n", | |
| "1U19NS123719-01\n", | |
| "1R01NS123918-01\n", | |
| "1R34NS121875-01\n", | |
| "1U19NS123716-01\n", | |
| "1R01NS112183-01A1\n", | |
| "1U01NS122123-01\n", | |
| "1UG3MH126864-01\n", | |
| "1R01NS124590-01\n", | |
| "1U01NS120822-01\n", | |
| "1R01NS121911-01\n", | |
| "1R34NS121873-01\n", | |
| "1R01NS120828-01\n", | |
| "1R01NS121918-01\n", | |
| "1R01NS123903-01\n", | |
| "1R01NS123842-01\n", | |
| "1R01NS121919-01\n", | |
| "1U01NS120820-01\n", | |
| "1R34NS123876-01\n", | |
| "1U01NS123668-01\n", | |
| "1R01NS121776-01\n", | |
| "1R34NS121766-01\n", | |
| "1R01NS120851-01A1\"\"\"\n", | |
| "\n", | |
| "project_numbers = [clean_project_num(x) for x in data.split(\"\\n\")]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "848de072", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import requests\n", | |
| "\n", | |
| "import pandas as pd\n", | |
| "from tqdm import tqdm\n", | |
| "\n", | |
| "\n", | |
| "query = dict(\n", | |
| " offset=0,\n", | |
| " limit=8000,\n", | |
| " criteria=dict(core_project_nums=[x+'*' for x in project_numbers]),\n", | |
| " sortField=\"string\",\n", | |
| " sortOrder=\"string\",\n", | |
| ")\n", | |
| "response = requests.post(\"https://api.reporter.nih.gov/v2/publications/search/\", json=query).json()\n", | |
| "results_pmids = {result[\"pmid\"] for result in response[\"results\"]}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "fc111967", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "42" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(results_pmids)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.9.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment