Skip to content

Instantly share code, notes, and snippets.

@fnielsen
Created February 27, 2026 22:38
Show Gist options
  • Select an option

  • Save fnielsen/70069b61da9f7eb09b721e4d82b63710 to your computer and use it in GitHub Desktop.

Select an option

Save fnielsen/70069b61da9f7eb09b721e4d82b63710 to your computer and use it in GitHub Desktop.
import requests
from textwrap import dedent
from datetime import datetime
DOIS = [
"10.1016/j.erss.2023.103213",
"10.1016/j.erss.2021.102482",
"10.1016/j.erss.2023.103053",
"10.1016/j.erss.2021.102444",
"10.1016/j.erss.2020.101837",
"10.1016/j.erss.2022.102714",
"10.1007/s40518-021-00184-6",
"10.1016/j.esd.2024.101546",
"10.1016/j.erss.2023.103010",
"10.1002/wcc.932",
"10.1016/j.erss.2025.104036",
"10.1016/j.erss.2015.10.004",
"10.1016/j.erss.2020.101907",
"10.1002/geo2.70040",
"10.1016/j.erss.2025.104149",
"10.1016/j.erss.2022.102862",
"10.1007/s13280-025-02202-z",
"10.1016/j.erss.2025.104422",
"10.1016/j.erss.2020.101716",
"10.3390/en17143512",
"10.1016/j.erss.2020.101768",
"10.1016/j.erss.2025.104016",
"10.1016/j.erss.2018.06.010",
"10.1016/j.copsyc.2024.101987",
"10.3390/su13042128",
"10.1080/14693062.2023.2256697",
"10.1016/j.erss.2020.101834",
"10.1016/j.rser.2025.115892",
"10.1146/annurev-environ-112621-063400",
"10.1016/j.erss.2020.101871",
"10.1016/j.egyr.2024.01.040",
"10.1016/j.crm.2022.100438",
"10.1146/annurev-environ-101718-033103",
"10.1016/j.agsy.2025.104512",
"10.3390/su15032441",
"10.1016/j.erss.2025.104067",
"10.1007/s43621-021-00024-z",
"10.1186/s13705-021-00330-4",
"10.3390/su11041023",
"10.1016/j.rser.2021.111504",
"10.17573/cepar.2025.2.09",
"10.1111/gec3.12662",
"10.1016/j.erss.2020.101876",
"10.1016/j.eiar.2025.108187",
"10.1016/j.erss.2023.103221",
"10.1016/j.erss.2016.04.001",
"10.1016/j.erss.2021.102333",
"10.1016/j.erss.2022.102913",
"10.1017/sus.2025.2",
"10.1016/j.erss.2021.102257",
"10.1016/j.jenvman.2024.120804",
"10.1007/s13412-021-00726-w",
"10.3390/su16198700",
"10.1017/sus.2024.24",
"10.1016/j.jclepro.2024.143470",
"10.1139/er-2024-0018",
]
def sparql_escape_string(s: str) -> str:
return s.replace("\\", "\\\\").replace('"', '\\"')
def parse_year(date_str: str | None) -> str:
if not date_str:
return ""
# Scholarly/Wikidata returns ISO-ish strings; safest is take first 4 digits if present.
if len(date_str) >= 4 and date_str[:4].isdigit():
return date_str[:4]
return ""
def wiki_escape_cell(text: str) -> str:
if text is None:
return ""
# Avoid breaking table cells
text = text.replace("|", "|")
text = text.replace("\n", " ").replace("\r", " ")
return text
def qid_from_uri(uri: str) -> str:
# e.g. http://www.wikidata.org/entity/Q123 -> Q123
return uri.rsplit("/", 1)[-1]
def build_table_for_dois(dois: list[str]) -> str:
endpoint = "https://query-scholarly.wikidata.org/sparql"
ua = "fnielsen mailto:faan@dtu.dk"
dois_up = [d.strip().upper() for d in dois if d.strip()]
values_block = "\n".join(f'"{sparql_escape_string(d)}"' for d in dois_up)
# One row per (work, matched-doi). Also fetch:
# - earliest publication date (MIN)
# - one title sample (prefer P1476, else English label)
# - all DOIs on the work (GROUP_CONCAT) so we can warn on multi-DOI items
query = dedent(f"""
SELECT ?work ?doi
(MIN(?pubDate) AS ?earliestDate)
(SAMPLE(?titleStr0) AS ?titleSample)
(GROUP_CONCAT(DISTINCT ?allDoi; separator="|") AS ?allDois)
WHERE {{
VALUES ?doi {{ {values_block} }}
?work wdt:P356 ?doi .
OPTIONAL {{ ?work wdt:P577 ?pubDate . }}
OPTIONAL {{
?work wdt:P1476 ?titleLit .
BIND(STR(?titleLit) AS ?titleStr0)
}}
OPTIONAL {{
?work rdfs:label ?labelEn .
FILTER(LANG(?labelEn) = "en")
}}
# Fallback: if P1476 missing, use English label
BIND(COALESCE(?titleStr0, STR(?labelEn)) AS ?titleStr0)
OPTIONAL {{ ?work wdt:P356 ?allDoi . }}
}}
GROUP BY ?work ?doi
ORDER BY ?doi ?work
""")
r = requests.get(
endpoint,
params={"format": "json", "query": query},
headers={"Accept": "application/sparql-results+json", "User-Agent": ua},
timeout=120,
)
r.raise_for_status()
bindings = r.json()["results"]["bindings"]
# Collect rows + warnings
rows = [] # (doi, qid, year, title, all_dois_set)
doi_to_qids: dict[str, set[str]] = {}
qid_to_dois: dict[str, set[str]] = {}
for b in bindings:
work_uri = b["work"]["value"]
qid = qid_from_uri(work_uri)
doi = b["doi"]["value"]
earliest = b.get("earliestDate", {}).get("value")
year = parse_year(earliest)
title = b.get("titleSample", {}).get("value", "")
title = wiki_escape_cell(title)
all_dois_raw = b.get("allDois", {}).get("value", "")
all_dois_set = set(d for d in all_dois_raw.split("|") if d)
rows.append((doi, qid, year, title, all_dois_set))
doi_to_qids.setdefault(doi, set()).add(qid)
qid_to_dois.setdefault(qid, set()).update(all_dois_set)
# Warnings
warn_lines = []
multi_qids = {doi: qids for doi, qids in doi_to_qids.items() if len(qids) > 1}
multi_dois = {qid: doiset for qid, doiset in qid_to_dois.items() if len(doiset) > 1}
if multi_qids:
warn_lines.append("**Warning: DOIs with multiple QIDs (possible duplicates in Wikidata):**")
for doi in sorted(multi_qids.keys()):
qids = ", ".join(sorted(multi_qids[doi]))
warn_lines.append(f"* {doi} → {qids}")
if multi_dois:
warn_lines.append("**Warning: QIDs with multiple DOIs (multiple P356 on the same item):**")
for qid in sorted(multi_dois.keys()):
dois = ", ".join(sorted(multi_dois[qid]))
warn_lines.append(f"* {qid} → {dois}")
# Build MediaWiki table
# Link QIDs to Wikidata; link DOI to doi.org
table_lines = []
table_lines.append('{| class="wikitable sortable"')
table_lines.append("! QID !! Year !! DOI !! Title")
for doi, qid, year, title, _all_dois_set in rows:
qid_link = f"[[d:{qid}|{qid}]]"
doi_link = f"[https://doi.org/{doi} {doi}]"
table_lines.append("|-")
table_lines.append(f"| {qid_link} || {year} || {doi_link} || {title}")
table_lines.append("|}")
# If no matches, still output something useful
if not rows:
warn_lines.insert(0, "**No matches found for the provided DOIs on query-scholarly.wikidata.org.**")
return "\n".join(warn_lines + [""] + table_lines)
if __name__ == "__main__":
wiki_text = build_table_for_dois(DOIS)
print(wiki_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment