Created
July 18, 2025 07:48
-
-
Save hrchu/fb0e5ac82284474eb37e175e5c7abc5f to your computer and use it in GitHub Desktop.
COSCUP 2025 pretalx submission translate
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Script to automatically translate and fill missing translated titles and | |
| abstracts for non-English submissions using DeepL API. | |
| Flow: | |
| 1. Iterate all submissions or a specific submission | |
| 2. Filter those where language is not English and translated title/abstract | |
| is empty | |
| 3. Read original title/abstract, translate using DeepL, and fill the answers | |
| 4. Update submissions with the translations | |
| Usage: | |
| python auto_translate_submissions.py --all # Scan all submissions | |
| python auto_translate_submissions.py --code ABC123 # Scan specific one | |
| python auto_translate_submissions.py --all --dry-run # Test mode | |
| python auto_translate_submissions.py --help # Show help | |
| """ | |
| import argparse | |
| import requests | |
| """Test site""" | |
| # Q_PRESENTATION_LANGUAGE = 351 | |
| # Q_TRANSLATED_TITLE = 353 | |
| # Q_TRANSLATED_ABSTRACT = 354 | |
| # BASE_URL = "https://pretalx.coscup.org/api/events/coscup-playground-2023/" | |
| # API_TOKEN = | |
| """ COSCUP 2025""" | |
| Q_PRESENTATION_LANGUAGE = 269 | |
| Q_TRANSLATED_TITLE = 257 | |
| Q_TRANSLATED_ABSTRACT = 259 | |
| BASE_URL = "https://pretalx.coscup.org/api/events/coscup-2025/" | |
| API_TOKEN = | |
| HEADERS = { | |
| "Authorization": f"Token {API_TOKEN}", | |
| "Content-Type": "application/json" | |
| } | |
| # DeepL API Key | |
| DEEPL_API_KEY = | |
| def detect_language_with_deepl(text): | |
| """ | |
| Detect the language of text using DeepL API. | |
| Args: | |
| text (str): Text to detect language for | |
| Returns: | |
| tuple: (success, detected_language_code) | |
| """ | |
| if not text.strip(): | |
| return False, "Empty text" | |
| # Use first 100 characters for detection to save API calls | |
| sample_text = text[:100].strip() | |
| if not sample_text: | |
| return False, "No meaningful text" | |
| url = "https://api-free.deepl.com/v2/translate" | |
| headers = { | |
| "Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}", | |
| "Content-Type": "application/x-www-form-urlencoded" | |
| } | |
| # Try to translate to Chinese to detect source language | |
| data = { | |
| "text": sample_text, | |
| "target_lang": "ZH" | |
| } | |
| try: | |
| response = requests.post(url, headers=headers, data=data) | |
| if response.status_code == 200: | |
| result = response.json() | |
| translations = result.get("translations", []) | |
| if translations: | |
| detected_lang = translations[0].get( | |
| "detected_source_language", "" | |
| ) | |
| return True, detected_lang | |
| else: | |
| return False, "No detection result" | |
| else: | |
| return False, f"API error {response.status_code}: {response.text}" | |
| except Exception as e: | |
| return False, f"Exception: {e}" | |
| def translate_text(text, source_lang="ZH", target_lang="EN"): | |
| """ | |
| Translate text using DeepL API. | |
| Args: | |
| text (str): Text to translate | |
| source_lang (str): Source language code | |
| target_lang (str): Target language code | |
| Returns: | |
| tuple: (success, result) | |
| """ | |
| if not text.strip(): | |
| return False, "Empty text" | |
| url = "https://api-free.deepl.com/v2/translate" | |
| headers = { | |
| "Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}", | |
| "Content-Type": "application/x-www-form-urlencoded" | |
| } | |
| data = { | |
| "text": text, | |
| "source_lang": source_lang, | |
| "target_lang": target_lang | |
| } | |
| try: | |
| response = requests.post(url, headers=headers, data=data) | |
| if response.status_code == 200: | |
| result = response.json() | |
| translations = result.get("translations", []) | |
| if translations: | |
| translated_text = translations[0].get("text", "") | |
| return True, translated_text | |
| else: | |
| return False, "No translation returned" | |
| else: | |
| return False, f"API error {response.status_code}: {response.text}" | |
| except Exception as e: | |
| return False, f"Exception: {e}" | |
| def get_all_submissions(): | |
| """Fetch all submissions from the API with answers expanded.""" | |
| all_submissions = [] | |
| url = f"{BASE_URL}submissions/?expand=answers" | |
| while url: | |
| try: | |
| response = requests.get(url, headers=HEADERS) | |
| response.raise_for_status() | |
| data = response.json() | |
| submissions = data.get("results", []) | |
| all_submissions.extend(submissions) | |
| url = data.get("next") | |
| except Exception as e: | |
| print(f"β Error fetching submissions: {e}") | |
| break | |
| return all_submissions | |
| def get_submission_by_code(submission_code): | |
| """Fetch a specific submission by its code.""" | |
| try: | |
| url = f"{BASE_URL}submissions/{submission_code}/?expand=answers" | |
| response = requests.get(url, headers=HEADERS) | |
| if response.status_code == 200: | |
| return [response.json()] # Return as list for consistency | |
| elif response.status_code == 404: | |
| print(f"β Submission '{submission_code}' not found") | |
| return [] | |
| else: | |
| print(f"β Error fetching submission: {response.status_code}") | |
| return [] | |
| except Exception as e: | |
| print(f"β Error fetching submission: {e}") | |
| return [] | |
| def get_answer_value(answers, question_id): | |
| """Get the answer value for a specific question ID.""" | |
| for answer in answers: | |
| if answer.get("question") == question_id: | |
| return answer.get("answer", "").strip() | |
| return None | |
| def has_answer(answers, question_id): | |
| """Check if a question has any answer (not empty).""" | |
| value = get_answer_value(answers, question_id) | |
| return value is not None and value != "" | |
| def create_answer(submission_code, question_id, answer_text, dry_run=False): | |
| """Create an answer for a specific question.""" | |
| if dry_run: | |
| preview = ( | |
| answer_text[:50] + "..." | |
| if len(answer_text) > 50 else answer_text | |
| ) | |
| print(f" π DRY RUN: Would create Q{question_id} = '{preview}'") | |
| return True, "Dry run - no actual creation" | |
| answer_data = { | |
| "question": question_id, | |
| "answer": answer_text, | |
| "submission": submission_code | |
| } | |
| try: | |
| answers_url = f"{BASE_URL}answers/" | |
| response = requests.post( | |
| answers_url, json=answer_data, headers=HEADERS | |
| ) | |
| if response.status_code in [200, 201]: | |
| return True, "Success" | |
| else: | |
| return False, f"HTTP {response.status_code}: {response.text}" | |
| except Exception as e: | |
| return False, f"Exception: {e}" | |
| def determine_source_language(presentation_language): | |
| """Map presentation language to DeepL language code.""" | |
| language_mapping = { | |
| "Mandarin": "ZH", | |
| "Others": "ZH", # Assume Chinese for now, could be expanded | |
| "Chinese": "ZH" | |
| } | |
| return language_mapping.get(presentation_language, "ZH") | |
| def process_submission_translations(submission_code=None, dry_run=False): | |
| """ | |
| Main function to process and translate submissions. | |
| Args: | |
| submission_code (str, optional): Specific submission code to process. | |
| If None, processes all submissions. | |
| dry_run (bool): If True, only shows what would be done without | |
| actually creating answers. | |
| """ | |
| mode_text = " (DRY RUN MODE)" if dry_run else "" | |
| if submission_code: | |
| print(f"π Automatic Translation for Submission: " | |
| f"{submission_code}{mode_text}") | |
| print("=" * 65) | |
| # Step 1: Fetch specific submission | |
| print(f"π Step 1: Fetching submission {submission_code}...") | |
| submissions = get_submission_by_code(submission_code) | |
| if not submissions: | |
| print("β Submission not found or error occurred") | |
| return | |
| print(" Found 1 submission") | |
| else: | |
| print(f"π Automatic Translation for Missing Titles and " | |
| f"Abstracts{mode_text}") | |
| print("=" * 65) | |
| # Step 1: Fetch all submissions | |
| print("π Step 1: Fetching all submissions...") | |
| submissions = get_all_submissions() | |
| print(f" Found {len(submissions)} total submissions") | |
| # Step 2: Filter non-English submissions with missing translations | |
| print("\nπ Step 2: Filtering submissions that need translation...") | |
| candidates = [] | |
| for submission in submissions: | |
| code = submission.get("code") | |
| title = submission.get("title", "") | |
| abstract = submission.get("abstract", "") | |
| answers = submission.get("answers", []) | |
| state = submission.get("state", "") | |
| # Check presentation language | |
| presentation_language = get_answer_value( | |
| answers, Q_PRESENTATION_LANGUAGE | |
| ) | |
| # Debug: Show what language we found | |
| title_preview = title[:30] + "..." if len(title) > 30 else title | |
| print(f" π [{code}] State: {state} | Lang: {presentation_language}") | |
| print(f" Title: {title_preview}") | |
| # Skip if not accepted/confirmed | |
| accepted_states = ["accepted", "confirmed"] | |
| if state not in accepted_states: | |
| print(f" βοΈ Skipping submission (state: {state})") | |
| continue | |
| # Skip if English or no language specified | |
| # Check for various English representations | |
| english_values = ["English", "en", "EN", "english", "En", "θ±ζ"] | |
| if (not presentation_language or | |
| presentation_language in english_values): | |
| print(" βοΈ Skipping English submission") | |
| continue | |
| # Detect actual language of title and abstract content | |
| title_lang_detected = False | |
| abstract_lang_detected = False | |
| if title.strip(): | |
| print(" π Detecting title language...") | |
| success, detected_lang = detect_language_with_deepl(title) | |
| if success and detected_lang == "EN": | |
| print(" π Title detected as English") | |
| title_lang_detected = True | |
| elif success: | |
| print(f" π Title detected as {detected_lang}") | |
| if abstract.strip(): | |
| print(" π Detecting abstract language...") | |
| success, detected_lang = detect_language_with_deepl(abstract) | |
| if success and detected_lang == "EN": | |
| print(" π Abstract detected as English") | |
| abstract_lang_detected = True | |
| elif success: | |
| print(f" π Abstract detected as {detected_lang}") | |
| # Check if translations are missing | |
| has_translated_title = has_answer(answers, Q_TRANSLATED_TITLE) | |
| has_translated_abstract = has_answer(answers, Q_TRANSLATED_ABSTRACT) | |
| # Skip title translation if already in English | |
| needs_title_translation = (not has_translated_title and | |
| not title_lang_detected) | |
| # Skip abstract translation if already in English | |
| needs_abstract_translation = (not has_translated_abstract and | |
| not abstract_lang_detected) | |
| if needs_title_translation or needs_abstract_translation: | |
| needs_msg = (f"Title: {needs_title_translation}, " | |
| f"Abstract: {needs_abstract_translation}") | |
| print(f" π Needs translation ({needs_msg})") | |
| candidates.append({ | |
| "code": code, | |
| "title": title, | |
| "abstract": abstract, | |
| "language": presentation_language, | |
| "needs_title": needs_title_translation, | |
| "needs_abstract": needs_abstract_translation | |
| }) | |
| else: | |
| print(" β Already has translations or content is English") | |
| print(f" Found {len(candidates)} submissions needing translation") | |
| if not candidates: | |
| print("\nβ All submissions already have translations!") | |
| return | |
| # Step 3: Process each candidate | |
| print("\nπ Step 3: Processing translations...") | |
| success_count = 0 | |
| for i, candidate in enumerate(candidates, 1): | |
| code = candidate["code"] | |
| title = candidate["title"] | |
| abstract = candidate["abstract"] | |
| language = candidate["language"] | |
| print(f"\n{i}. Processing [{code}] ({language})") | |
| print(f" Title: {title}") | |
| # Determine source language for DeepL | |
| source_lang = determine_source_language(language) | |
| # Translate title if needed | |
| if candidate["needs_title"]: | |
| print(" π Translating title...") | |
| success, translated_title = translate_text( | |
| title, source_lang, "EN" | |
| ) | |
| if success: | |
| success_create, message = create_answer( | |
| code, Q_TRANSLATED_TITLE, translated_title, dry_run | |
| ) | |
| if success_create: | |
| print(f" β Title: {translated_title}") | |
| success_count += 1 | |
| else: | |
| print(f" β Failed to save title: {message}") | |
| else: | |
| print(f" β Translation failed: {translated_title}") | |
| # Translate abstract if needed | |
| if candidate["needs_abstract"]: | |
| print(" π Translating abstract...") | |
| success, translated_abstract = translate_text( | |
| abstract, source_lang, "EN" | |
| ) | |
| if success: | |
| # Add notice to translated abstract | |
| notice = ( | |
| " (Notice: The English content is automatically " | |
| "translated and may contain inaccuracies or " | |
| "misinterpretations. Please refer to the original version " | |
| "for the most accurate information.)" | |
| ) | |
| translated_abstract_with_notice = translated_abstract + notice | |
| success_create, message = create_answer( | |
| code, Q_TRANSLATED_ABSTRACT, | |
| translated_abstract_with_notice, dry_run | |
| ) | |
| if success_create: | |
| print(f" β Abstract: {translated_abstract[:50]}...") | |
| success_count += 1 | |
| else: | |
| print(f" β Failed to save abstract: {message}") | |
| else: | |
| print(f" β Translation failed: {translated_abstract}") | |
| print(" " + "-" * 50) | |
| # Step 4: Summary | |
| print("\nπ Translation Summary:") | |
| print(f" Submissions processed: {len(candidates)}") | |
| print(f" Successful translations: {success_count}") | |
| print(" β Translation automation complete!") | |
| def main(): | |
| """Main function with command-line argument parsing.""" | |
| parser = argparse.ArgumentParser( | |
| description="Automatically translate missing titles and abstracts " | |
| "for non-English submissions" | |
| ) | |
| group = parser.add_mutually_exclusive_group(required=True) | |
| group.add_argument( | |
| "--all", | |
| action="store_true", | |
| help="Process all submissions" | |
| ) | |
| group.add_argument( | |
| "--code", | |
| type=str, | |
| help="Process specific submission by code (e.g., ABC123)" | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help="Show what would be done without actually creating answers" | |
| ) | |
| args = parser.parse_args() | |
| try: | |
| if args.all: | |
| process_submission_translations(dry_run=args.dry_run) | |
| else: | |
| process_submission_translations(args.code, dry_run=args.dry_run) | |
| except Exception as e: | |
| print(f"β Error: {e}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment