Last active
October 26, 2025 13:11
-
-
Save star26bsd/b4b07339238e63b5deb033989d67bf84 to your computer and use it in GitHub Desktop.
Unscrew NDS AWK Export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Before running this script | |
| # - export the attendance XLS from the NDS system. | |
| # - convert it to CSV format using LibreOffice or a similar tool, UTF-8 encoded, comma-separated. | |
| # | |
| # DO NOT remove or change data and DO NOT change its name! | |
| # | |
| # Hosted on https://gist.github.com/star26bsd/b4b07339238e63b5deb033989d67bf84 | |
| import csv | |
| import argparse | |
| import os | |
| def transform_attendance_csv(input_file, output_file): | |
| """ | |
| Transforms a human-readable attendance CSV with metadata to a machine-optimized format. | |
| Expected CSV format: | |
| - Row 7 contains "Datum" and the training dates starting from column 5 | |
| - Student data rows have format: Number, Name, Firstname, Function, Age, attendance_data... | |
| - Attendance is marked with 'K', 'J', 'X', or combinations like 'K, J' (only empty cells represent non-attendance) | |
| Args: | |
| input_file (str): Path to the input CSV file | |
| output_file (str): Path to the output CSV file | |
| """ | |
| # Read input from the CSV file | |
| print(f"๐ Reading input file: {input_file}") | |
| with open(input_file, 'r', encoding='utf-8') as f: | |
| reader = list(csv.reader(f)) | |
| print(f" Total rows read: {len(reader)}") | |
| # Extract class number from filename (last number before file extension) | |
| import re | |
| filename = os.path.basename(input_file) | |
| filename_without_ext = os.path.splitext(filename)[0] | |
| # Find all numbers in the filename and take the last one | |
| numbers = re.findall(r'\d+', filename_without_ext) | |
| class_number = numbers[-1] if numbers else "" | |
| print(f"\n๐ข Extracting class number from filename...") | |
| print(f" Filename: {filename}") | |
| print(f" Found numbers: {numbers}") | |
| print(f" Class number: {class_number or 'NOT FOUND'}") | |
| # Extract Angebot number from the first row, second column | |
| angebot_number = "" | |
| print(f"\n๐ Checking for Angebot number...") | |
| if len(reader) > 0 and len(reader[0]) > 1 and reader[0][0] == "Angebot": | |
| angebot_number = reader[0][1].strip() | |
| print(f" Angebot: {angebot_number}") | |
| else: | |
| print(f" Angebot: NOT FOUND (not in expected location)") | |
| # Find the dates row (should be row with "Datum" in first column) | |
| dates = [] | |
| dates_row_index = -1 | |
| print(f"\n๐ Searching for dates row...") | |
| for i, row in enumerate(reader): | |
| if len(row) > 0 and row[0] == "Datum": | |
| # Extract dates starting from column 5 (index 4) | |
| dates = [cell.strip() for cell in row[4:] if cell.strip() and "2025" in str(cell)] | |
| dates_row_index = i | |
| print(f" Found 'Datum' at row {dates_row_index + 1}") | |
| print(f" Extracted {len(dates)} dates: {dates[:3]}{'...' if len(dates) > 3 else ''}") | |
| break | |
| if not dates: | |
| raise ValueError("Could not find dates row. Expected row with 'Datum' in first column.") | |
| # Generate output filename based on class number | |
| if class_number and output_file == 'output.csv': # Only modify if using default filename | |
| output_file = f'output_{class_number}.csv' | |
| output_records = [] | |
| # Process student rows starting after the dates row | |
| in_participants_section = False | |
| students_processed = 0 | |
| students_skipped = 0 | |
| print(f"\n๐ฅ Processing student rows...") | |
| for row_index, student_row in enumerate(reader): | |
| if row_index <= dates_row_index: | |
| continue | |
| if len(student_row) < 6: # Need at least 6 columns for student data | |
| continue | |
| # Check for section headers to track which section we're in | |
| row_content = ''.join(student_row[:3]).strip() | |
| # Check if we're entering the participants section | |
| # Handle both "Teilnehmer/in" and "Teilnehmer/-in" formats | |
| if "Teilnehmer" in row_content and ("/-in" in row_content or "/in" in row_content): | |
| in_participants_section = True | |
| print(f" โก๏ธ Entering PARTICIPANTS section (row {row_index + 1})") | |
| continue | |
| # Check if we're entering the instructors section or any other section | |
| # Handle both "Leiter/in" and "Leiter/-in" formats | |
| is_leiter = "Leiter" in row_content and ("/-in" in row_content or "/in" in row_content) | |
| is_teilnehmer = "Teilnehmer" in row_content and ("/-in" in row_content or "/in" in row_content) | |
| if is_leiter or (row_content.endswith(':') and not is_teilnehmer): | |
| if in_participants_section: | |
| print(f" โฌ ๏ธ Leaving participants section - entering '{row_content}' (row {row_index + 1})") | |
| in_participants_section = False | |
| continue | |
| # Skip if we're not in the participants section | |
| if not in_participants_section: | |
| continue | |
| # Skip section headers, empty rows, and the column header row | |
| if (not row_content or | |
| row_content.endswith(':') or | |
| not any(c.isalpha() for c in row_content) or | |
| (student_row[1] == "Name" and student_row[2] == "Vorname")): # Skip header row | |
| students_skipped += 1 | |
| continue | |
| # Extract student information | |
| # Format: Number, LastName, FirstName, Birthday, Age, attendance_data... | |
| last_name = student_row[1].strip() | |
| first_name = student_row[2].strip() | |
| birthday_raw = student_row[3].strip() if len(student_row) > 3 else "" | |
| attendance_data = student_row[5:] # Attendance starts from column 6 (index 5) | |
| # Convert birthday from DD.MM.YYYY to YYYY-MM-DD format | |
| birthday_formatted = "" | |
| if birthday_raw and "." in birthday_raw: | |
| try: | |
| # Split DD.MM.YYYY and rearrange to YYYY-MM-DD | |
| day, month, year = birthday_raw.split(".") | |
| birthday_formatted = f"{year}-{month.zfill(2)}-{day.zfill(2)}" | |
| except ValueError: | |
| print(f" โ ๏ธ Birthday parsing failed for '{birthday_raw}' on row {row_index + 1}") | |
| birthday_formatted = "" # If parsing fails, leave empty | |
| # Skip if no name data | |
| if not last_name and not first_name: | |
| students_skipped += 1 | |
| continue | |
| # Check attendance for each date | |
| attendance_count_for_student = 0 | |
| for i, date in enumerate(dates): | |
| if i < len(attendance_data): | |
| attendance = attendance_data[i].strip() | |
| # Check if attendance contains 'K', 'J', or 'X' (only empty cells represent non-attendance) | |
| if attendance and ('K' in attendance or 'J' in attendance or 'X' in attendance): | |
| output_records.append([class_number, date, f"{first_name} {last_name}", birthday_formatted]) | |
| attendance_count_for_student += 1 | |
| students_processed += 1 | |
| if attendance_count_for_student > 0: | |
| print(f" โ {first_name} {last_name}: {attendance_count_for_student} attendance records") | |
| # Progress update every 10 students | |
| if students_processed % 10 == 0: | |
| print(f" ๐ Processed {students_processed} students so far...") | |
| # Write to the new CSV file | |
| with open(output_file, 'w', newline='', encoding='utf-8') as f: | |
| writer = csv.writer(f) | |
| writer.writerow(['class_number', 'training_date', 'student_name', 'birthday']) | |
| writer.writerows(output_records) | |
| print(f"\n" + "="*60) | |
| print(f"๐ TRANSFORMATION SUMMARY") | |
| print(f"="*60) | |
| print(f" Class number: {class_number}") | |
| print(f" Training dates found: {len(dates)}") | |
| print(f" Students processed: {students_processed}") | |
| print(f" Students skipped: {students_skipped}") | |
| print(f" Total attendance records: {len(output_records)}") | |
| print(f" Output file: {output_file}") | |
| print(f"="*60) | |
| print(f"\nTransformation complete! {len(output_records)} attendance records written to {output_file}") | |
| # Return the actual output filename used | |
| return output_file | |
| # Main execution | |
| if __name__ == "__main__": | |
| # Set up command line argument parsing | |
| parser = argparse.ArgumentParser(description='Transform attendance CSV from human-readable to machine-optimized format.') | |
| parser.add_argument('-f', '--file', | |
| required=True, | |
| help='Input CSV file path (required)') | |
| parser.add_argument('-o', '--output', | |
| default='output.csv', | |
| help='Output CSV file path (default: output.csv, will be modified to include Angebot number)') | |
| args = parser.parse_args() | |
| input_filename = args.file | |
| output_filename = args.output | |
| try: | |
| actual_output_filename = transform_attendance_csv(input_filename, output_filename) | |
| print(f"Successfully transformed {input_filename} to {actual_output_filename}") | |
| except FileNotFoundError: | |
| print(f"Error: Could not find input file '{input_filename}'") | |
| print("Please ensure the CSV file exists and the path is correct.") | |
| print("Available CSV files in current directory:") | |
| # Try to find available CSV files | |
| csv_files = [f for f in os.listdir('.') if f.endswith('.csv')] | |
| for csv_file in csv_files: | |
| print(f" - {csv_file}") | |
| except Exception as e: | |
| print(f"An error occurred: {e}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment