Skip to content

Instantly share code, notes, and snippets.

@mbollmann
Created January 18, 2026 12:31
Show Gist options
  • Select an option

  • Save mbollmann/963fc0f01dbe4354b3f58b84a546bcf0 to your computer and use it in GitHub Desktop.

Select an option

Save mbollmann/963fc0f01dbe4354b3f58b84a546bcf0 to your computer and use it in GitHub Desktop.
Script for acl-org/acl-anthology to dump all paper–author assignments into a file
# -*- coding: utf-8 -*-
#
# Copyright 2026 Marcel Bollmann <marcel@bollmann.me>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Writes paper–author assignments to a file, for debugging purposes.
Usage:
print_author_assignments.py [OUTFILE]
Arguments:
OUTFILE The file to write the assignments to. If not given,
defaults to `paperid-authorid-{git rev-parse --short HEAD}.tsv`
"""
from docopt import docopt
from pathlib import Path
import git
import itertools as it
import logging as log
from acl_anthology import Anthology
from acl_anthology.utils.logging import setup_rich_logging
if __name__ == "__main__":
args = docopt(__doc__)
tracker = setup_rich_logging(level=log.INFO)
repo = git.Repo(__file__, search_parent_directories=True)
anthology = Anthology(datadir=Path(repo.working_dir) / "data")
anthology.load_all() # since we need to resolve authors
if not (outfile := args["OUTFILE"]):
ref = repo.rev_parse("HEAD").name_rev[:8]
outfile = f"paperid-authorid-{ref}.tsv"
output = []
for paper in anthology.papers():
for namespec in it.chain(paper.authors, paper.editors):
person = anthology.resolve(namespec)
output.append((paper.full_id_tuple, paper.full_id, person.id))
if tracker.highest >= log.ERROR:
exit(1)
output.sort(key=lambda x: (x[0], x[1]))
with open(outfile, "w") as f:
for ids in output:
print("\t".join(ids[1:]), file=f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment