Skip to content

Instantly share code, notes, and snippets.

@hrchu
Created July 18, 2025 07:48
Show Gist options
  • Select an option

  • Save hrchu/fb0e5ac82284474eb37e175e5c7abc5f to your computer and use it in GitHub Desktop.

Select an option

Save hrchu/fb0e5ac82284474eb37e175e5c7abc5f to your computer and use it in GitHub Desktop.
COSCUP 2025 pretalx submission translate
"""
Script to automatically translate and fill missing translated titles and
abstracts for non-English submissions using DeepL API.
Flow:
1. Iterate all submissions or a specific submission
2. Filter those where language is not English and translated title/abstract
is empty
3. Read original title/abstract, translate using DeepL, and fill the answers
4. Update submissions with the translations
Usage:
python auto_translate_submissions.py --all # Scan all submissions
python auto_translate_submissions.py --code ABC123 # Scan specific one
python auto_translate_submissions.py --all --dry-run # Test mode
python auto_translate_submissions.py --help # Show help
"""
import argparse
import requests
"""Test site"""
# Q_PRESENTATION_LANGUAGE = 351
# Q_TRANSLATED_TITLE = 353
# Q_TRANSLATED_ABSTRACT = 354
# BASE_URL = "https://pretalx.coscup.org/api/events/coscup-playground-2023/"
# API_TOKEN =
""" COSCUP 2025"""
Q_PRESENTATION_LANGUAGE = 269
Q_TRANSLATED_TITLE = 257
Q_TRANSLATED_ABSTRACT = 259
BASE_URL = "https://pretalx.coscup.org/api/events/coscup-2025/"
API_TOKEN =
HEADERS = {
"Authorization": f"Token {API_TOKEN}",
"Content-Type": "application/json"
}
# DeepL API Key
DEEPL_API_KEY =
def detect_language_with_deepl(text):
"""
Detect the language of text using DeepL API.
Args:
text (str): Text to detect language for
Returns:
tuple: (success, detected_language_code)
"""
if not text.strip():
return False, "Empty text"
# Use first 100 characters for detection to save API calls
sample_text = text[:100].strip()
if not sample_text:
return False, "No meaningful text"
url = "https://api-free.deepl.com/v2/translate"
headers = {
"Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}",
"Content-Type": "application/x-www-form-urlencoded"
}
# Try to translate to Chinese to detect source language
data = {
"text": sample_text,
"target_lang": "ZH"
}
try:
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
result = response.json()
translations = result.get("translations", [])
if translations:
detected_lang = translations[0].get(
"detected_source_language", ""
)
return True, detected_lang
else:
return False, "No detection result"
else:
return False, f"API error {response.status_code}: {response.text}"
except Exception as e:
return False, f"Exception: {e}"
def translate_text(text, source_lang="ZH", target_lang="EN"):
"""
Translate text using DeepL API.
Args:
text (str): Text to translate
source_lang (str): Source language code
target_lang (str): Target language code
Returns:
tuple: (success, result)
"""
if not text.strip():
return False, "Empty text"
url = "https://api-free.deepl.com/v2/translate"
headers = {
"Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}",
"Content-Type": "application/x-www-form-urlencoded"
}
data = {
"text": text,
"source_lang": source_lang,
"target_lang": target_lang
}
try:
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
result = response.json()
translations = result.get("translations", [])
if translations:
translated_text = translations[0].get("text", "")
return True, translated_text
else:
return False, "No translation returned"
else:
return False, f"API error {response.status_code}: {response.text}"
except Exception as e:
return False, f"Exception: {e}"
def get_all_submissions():
"""Fetch all submissions from the API with answers expanded."""
all_submissions = []
url = f"{BASE_URL}submissions/?expand=answers"
while url:
try:
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
data = response.json()
submissions = data.get("results", [])
all_submissions.extend(submissions)
url = data.get("next")
except Exception as e:
print(f"❌ Error fetching submissions: {e}")
break
return all_submissions
def get_submission_by_code(submission_code):
"""Fetch a specific submission by its code."""
try:
url = f"{BASE_URL}submissions/{submission_code}/?expand=answers"
response = requests.get(url, headers=HEADERS)
if response.status_code == 200:
return [response.json()] # Return as list for consistency
elif response.status_code == 404:
print(f"❌ Submission '{submission_code}' not found")
return []
else:
print(f"❌ Error fetching submission: {response.status_code}")
return []
except Exception as e:
print(f"❌ Error fetching submission: {e}")
return []
def get_answer_value(answers, question_id):
"""Get the answer value for a specific question ID."""
for answer in answers:
if answer.get("question") == question_id:
return answer.get("answer", "").strip()
return None
def has_answer(answers, question_id):
"""Check if a question has any answer (not empty)."""
value = get_answer_value(answers, question_id)
return value is not None and value != ""
def create_answer(submission_code, question_id, answer_text, dry_run=False):
"""Create an answer for a specific question."""
if dry_run:
preview = (
answer_text[:50] + "..."
if len(answer_text) > 50 else answer_text
)
print(f" πŸ” DRY RUN: Would create Q{question_id} = '{preview}'")
return True, "Dry run - no actual creation"
answer_data = {
"question": question_id,
"answer": answer_text,
"submission": submission_code
}
try:
answers_url = f"{BASE_URL}answers/"
response = requests.post(
answers_url, json=answer_data, headers=HEADERS
)
if response.status_code in [200, 201]:
return True, "Success"
else:
return False, f"HTTP {response.status_code}: {response.text}"
except Exception as e:
return False, f"Exception: {e}"
def determine_source_language(presentation_language):
"""Map presentation language to DeepL language code."""
language_mapping = {
"Mandarin": "ZH",
"Others": "ZH", # Assume Chinese for now, could be expanded
"Chinese": "ZH"
}
return language_mapping.get(presentation_language, "ZH")
def process_submission_translations(submission_code=None, dry_run=False):
"""
Main function to process and translate submissions.
Args:
submission_code (str, optional): Specific submission code to process.
If None, processes all submissions.
dry_run (bool): If True, only shows what would be done without
actually creating answers.
"""
mode_text = " (DRY RUN MODE)" if dry_run else ""
if submission_code:
print(f"🌐 Automatic Translation for Submission: "
f"{submission_code}{mode_text}")
print("=" * 65)
# Step 1: Fetch specific submission
print(f"πŸ” Step 1: Fetching submission {submission_code}...")
submissions = get_submission_by_code(submission_code)
if not submissions:
print("❌ Submission not found or error occurred")
return
print(" Found 1 submission")
else:
print(f"🌐 Automatic Translation for Missing Titles and "
f"Abstracts{mode_text}")
print("=" * 65)
# Step 1: Fetch all submissions
print("πŸ” Step 1: Fetching all submissions...")
submissions = get_all_submissions()
print(f" Found {len(submissions)} total submissions")
# Step 2: Filter non-English submissions with missing translations
print("\nπŸ” Step 2: Filtering submissions that need translation...")
candidates = []
for submission in submissions:
code = submission.get("code")
title = submission.get("title", "")
abstract = submission.get("abstract", "")
answers = submission.get("answers", [])
state = submission.get("state", "")
# Check presentation language
presentation_language = get_answer_value(
answers, Q_PRESENTATION_LANGUAGE
)
# Debug: Show what language we found
title_preview = title[:30] + "..." if len(title) > 30 else title
print(f" πŸ“ [{code}] State: {state} | Lang: {presentation_language}")
print(f" Title: {title_preview}")
# Skip if not accepted/confirmed
accepted_states = ["accepted", "confirmed"]
if state not in accepted_states:
print(f" ⏭️ Skipping submission (state: {state})")
continue
# Skip if English or no language specified
# Check for various English representations
english_values = ["English", "en", "EN", "english", "En", "θ‹±ζ–‡"]
if (not presentation_language or
presentation_language in english_values):
print(" ⏭️ Skipping English submission")
continue
# Detect actual language of title and abstract content
title_lang_detected = False
abstract_lang_detected = False
if title.strip():
print(" πŸ” Detecting title language...")
success, detected_lang = detect_language_with_deepl(title)
if success and detected_lang == "EN":
print(" πŸ” Title detected as English")
title_lang_detected = True
elif success:
print(f" πŸ” Title detected as {detected_lang}")
if abstract.strip():
print(" πŸ” Detecting abstract language...")
success, detected_lang = detect_language_with_deepl(abstract)
if success and detected_lang == "EN":
print(" πŸ” Abstract detected as English")
abstract_lang_detected = True
elif success:
print(f" πŸ” Abstract detected as {detected_lang}")
# Check if translations are missing
has_translated_title = has_answer(answers, Q_TRANSLATED_TITLE)
has_translated_abstract = has_answer(answers, Q_TRANSLATED_ABSTRACT)
# Skip title translation if already in English
needs_title_translation = (not has_translated_title and
not title_lang_detected)
# Skip abstract translation if already in English
needs_abstract_translation = (not has_translated_abstract and
not abstract_lang_detected)
if needs_title_translation or needs_abstract_translation:
needs_msg = (f"Title: {needs_title_translation}, "
f"Abstract: {needs_abstract_translation}")
print(f" πŸ”„ Needs translation ({needs_msg})")
candidates.append({
"code": code,
"title": title,
"abstract": abstract,
"language": presentation_language,
"needs_title": needs_title_translation,
"needs_abstract": needs_abstract_translation
})
else:
print(" βœ… Already has translations or content is English")
print(f" Found {len(candidates)} submissions needing translation")
if not candidates:
print("\nβœ… All submissions already have translations!")
return
# Step 3: Process each candidate
print("\nπŸ”„ Step 3: Processing translations...")
success_count = 0
for i, candidate in enumerate(candidates, 1):
code = candidate["code"]
title = candidate["title"]
abstract = candidate["abstract"]
language = candidate["language"]
print(f"\n{i}. Processing [{code}] ({language})")
print(f" Title: {title}")
# Determine source language for DeepL
source_lang = determine_source_language(language)
# Translate title if needed
if candidate["needs_title"]:
print(" πŸ”„ Translating title...")
success, translated_title = translate_text(
title, source_lang, "EN"
)
if success:
success_create, message = create_answer(
code, Q_TRANSLATED_TITLE, translated_title, dry_run
)
if success_create:
print(f" βœ… Title: {translated_title}")
success_count += 1
else:
print(f" ❌ Failed to save title: {message}")
else:
print(f" ❌ Translation failed: {translated_title}")
# Translate abstract if needed
if candidate["needs_abstract"]:
print(" πŸ”„ Translating abstract...")
success, translated_abstract = translate_text(
abstract, source_lang, "EN"
)
if success:
# Add notice to translated abstract
notice = (
" (Notice: The English content is automatically "
"translated and may contain inaccuracies or "
"misinterpretations. Please refer to the original version "
"for the most accurate information.)"
)
translated_abstract_with_notice = translated_abstract + notice
success_create, message = create_answer(
code, Q_TRANSLATED_ABSTRACT,
translated_abstract_with_notice, dry_run
)
if success_create:
print(f" βœ… Abstract: {translated_abstract[:50]}...")
success_count += 1
else:
print(f" ❌ Failed to save abstract: {message}")
else:
print(f" ❌ Translation failed: {translated_abstract}")
print(" " + "-" * 50)
# Step 4: Summary
print("\nπŸŽ‰ Translation Summary:")
print(f" Submissions processed: {len(candidates)}")
print(f" Successful translations: {success_count}")
print(" βœ… Translation automation complete!")
def main():
"""Main function with command-line argument parsing."""
parser = argparse.ArgumentParser(
description="Automatically translate missing titles and abstracts "
"for non-English submissions"
)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
"--all",
action="store_true",
help="Process all submissions"
)
group.add_argument(
"--code",
type=str,
help="Process specific submission by code (e.g., ABC123)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be done without actually creating answers"
)
args = parser.parse_args()
try:
if args.all:
process_submission_translations(dry_run=args.dry_run)
else:
process_submission_translations(args.code, dry_run=args.dry_run)
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment