|
# -*- coding: utf-8 -*- |
|
# |
|
# Copyright 2026 Marcel Bollmann <marcel@bollmann.me> |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
"""Writes paper–author assignments to a file, for debugging purposes. |
|
|
|
Usage: |
|
print_author_assignments.py [OUTFILE] |
|
|
|
Arguments: |
|
OUTFILE The file to write the assignments to. If not given, |
|
defaults to `paperid-authorid-{git rev-parse --short HEAD}.tsv` |
|
""" |
|
|
|
from docopt import docopt |
|
from pathlib import Path |
|
import git |
|
import itertools as it |
|
import logging as log |
|
|
|
from acl_anthology import Anthology |
|
from acl_anthology.utils.logging import setup_rich_logging |
|
|
|
|
|
if __name__ == "__main__": |
|
args = docopt(__doc__) |
|
tracker = setup_rich_logging(level=log.INFO) |
|
|
|
repo = git.Repo(__file__, search_parent_directories=True) |
|
anthology = Anthology(datadir=Path(repo.working_dir) / "data") |
|
anthology.load_all() # since we need to resolve authors |
|
|
|
if not (outfile := args["OUTFILE"]): |
|
ref = repo.rev_parse("HEAD").name_rev[:8] |
|
outfile = f"paperid-authorid-{ref}.tsv" |
|
|
|
output = [] |
|
for paper in anthology.papers(): |
|
for namespec in it.chain(paper.authors, paper.editors): |
|
person = anthology.resolve(namespec) |
|
output.append((paper.full_id_tuple, paper.full_id, person.id)) |
|
|
|
if tracker.highest >= log.ERROR: |
|
exit(1) |
|
|
|
output.sort(key=lambda x: (x[0], x[1])) |
|
with open(outfile, "w") as f: |
|
for ids in output: |
|
print("\t".join(ids[1:]), file=f) |