Skip to content

Instantly share code, notes, and snippets.

@star26bsd
Last active October 26, 2025 13:11
Show Gist options
  • Select an option

  • Save star26bsd/b4b07339238e63b5deb033989d67bf84 to your computer and use it in GitHub Desktop.

Select an option

Save star26bsd/b4b07339238e63b5deb033989d67bf84 to your computer and use it in GitHub Desktop.
Unscrew NDS AWK Export
#!/usr/bin/env python3
# Before running this script
# - export the attendance XLS from the NDS system.
# - convert it to CSV format using LibreOffice or a similar tool, UTF-8 encoded, comma-separated.
#
# DO NOT remove or change data and DO NOT change its name!
#
# Hosted on https://gist.github.com/star26bsd/b4b07339238e63b5deb033989d67bf84
import csv
import argparse
import os
def transform_attendance_csv(input_file, output_file):
"""
Transforms a human-readable attendance CSV with metadata to a machine-optimized format.
Expected CSV format:
- Row 7 contains "Datum" and the training dates starting from column 5
- Student data rows have format: Number, Name, Firstname, Function, Age, attendance_data...
- Attendance is marked with 'K', 'J', 'X', or combinations like 'K, J' (only empty cells represent non-attendance)
Args:
input_file (str): Path to the input CSV file
output_file (str): Path to the output CSV file
"""
# Read input from the CSV file
print(f"๐Ÿ“– Reading input file: {input_file}")
with open(input_file, 'r', encoding='utf-8') as f:
reader = list(csv.reader(f))
print(f" Total rows read: {len(reader)}")
# Extract class number from filename (last number before file extension)
import re
filename = os.path.basename(input_file)
filename_without_ext = os.path.splitext(filename)[0]
# Find all numbers in the filename and take the last one
numbers = re.findall(r'\d+', filename_without_ext)
class_number = numbers[-1] if numbers else ""
print(f"\n๐Ÿ”ข Extracting class number from filename...")
print(f" Filename: {filename}")
print(f" Found numbers: {numbers}")
print(f" Class number: {class_number or 'NOT FOUND'}")
# Extract Angebot number from the first row, second column
angebot_number = ""
print(f"\n๐Ÿ“‹ Checking for Angebot number...")
if len(reader) > 0 and len(reader[0]) > 1 and reader[0][0] == "Angebot":
angebot_number = reader[0][1].strip()
print(f" Angebot: {angebot_number}")
else:
print(f" Angebot: NOT FOUND (not in expected location)")
# Find the dates row (should be row with "Datum" in first column)
dates = []
dates_row_index = -1
print(f"\n๐Ÿ“… Searching for dates row...")
for i, row in enumerate(reader):
if len(row) > 0 and row[0] == "Datum":
# Extract dates starting from column 5 (index 4)
dates = [cell.strip() for cell in row[4:] if cell.strip() and "2025" in str(cell)]
dates_row_index = i
print(f" Found 'Datum' at row {dates_row_index + 1}")
print(f" Extracted {len(dates)} dates: {dates[:3]}{'...' if len(dates) > 3 else ''}")
break
if not dates:
raise ValueError("Could not find dates row. Expected row with 'Datum' in first column.")
# Generate output filename based on class number
if class_number and output_file == 'output.csv': # Only modify if using default filename
output_file = f'output_{class_number}.csv'
output_records = []
# Process student rows starting after the dates row
in_participants_section = False
students_processed = 0
students_skipped = 0
print(f"\n๐Ÿ‘ฅ Processing student rows...")
for row_index, student_row in enumerate(reader):
if row_index <= dates_row_index:
continue
if len(student_row) < 6: # Need at least 6 columns for student data
continue
# Check for section headers to track which section we're in
row_content = ''.join(student_row[:3]).strip()
# Check if we're entering the participants section
# Handle both "Teilnehmer/in" and "Teilnehmer/-in" formats
if "Teilnehmer" in row_content and ("/-in" in row_content or "/in" in row_content):
in_participants_section = True
print(f" โžก๏ธ Entering PARTICIPANTS section (row {row_index + 1})")
continue
# Check if we're entering the instructors section or any other section
# Handle both "Leiter/in" and "Leiter/-in" formats
is_leiter = "Leiter" in row_content and ("/-in" in row_content or "/in" in row_content)
is_teilnehmer = "Teilnehmer" in row_content and ("/-in" in row_content or "/in" in row_content)
if is_leiter or (row_content.endswith(':') and not is_teilnehmer):
if in_participants_section:
print(f" โฌ…๏ธ Leaving participants section - entering '{row_content}' (row {row_index + 1})")
in_participants_section = False
continue
# Skip if we're not in the participants section
if not in_participants_section:
continue
# Skip section headers, empty rows, and the column header row
if (not row_content or
row_content.endswith(':') or
not any(c.isalpha() for c in row_content) or
(student_row[1] == "Name" and student_row[2] == "Vorname")): # Skip header row
students_skipped += 1
continue
# Extract student information
# Format: Number, LastName, FirstName, Birthday, Age, attendance_data...
last_name = student_row[1].strip()
first_name = student_row[2].strip()
birthday_raw = student_row[3].strip() if len(student_row) > 3 else ""
attendance_data = student_row[5:] # Attendance starts from column 6 (index 5)
# Convert birthday from DD.MM.YYYY to YYYY-MM-DD format
birthday_formatted = ""
if birthday_raw and "." in birthday_raw:
try:
# Split DD.MM.YYYY and rearrange to YYYY-MM-DD
day, month, year = birthday_raw.split(".")
birthday_formatted = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
except ValueError:
print(f" โš ๏ธ Birthday parsing failed for '{birthday_raw}' on row {row_index + 1}")
birthday_formatted = "" # If parsing fails, leave empty
# Skip if no name data
if not last_name and not first_name:
students_skipped += 1
continue
# Check attendance for each date
attendance_count_for_student = 0
for i, date in enumerate(dates):
if i < len(attendance_data):
attendance = attendance_data[i].strip()
# Check if attendance contains 'K', 'J', or 'X' (only empty cells represent non-attendance)
if attendance and ('K' in attendance or 'J' in attendance or 'X' in attendance):
output_records.append([class_number, date, f"{first_name} {last_name}", birthday_formatted])
attendance_count_for_student += 1
students_processed += 1
if attendance_count_for_student > 0:
print(f" โœ“ {first_name} {last_name}: {attendance_count_for_student} attendance records")
# Progress update every 10 students
if students_processed % 10 == 0:
print(f" ๐Ÿ“Š Processed {students_processed} students so far...")
# Write to the new CSV file
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['class_number', 'training_date', 'student_name', 'birthday'])
writer.writerows(output_records)
print(f"\n" + "="*60)
print(f"๐Ÿ“Š TRANSFORMATION SUMMARY")
print(f"="*60)
print(f" Class number: {class_number}")
print(f" Training dates found: {len(dates)}")
print(f" Students processed: {students_processed}")
print(f" Students skipped: {students_skipped}")
print(f" Total attendance records: {len(output_records)}")
print(f" Output file: {output_file}")
print(f"="*60)
print(f"\nTransformation complete! {len(output_records)} attendance records written to {output_file}")
# Return the actual output filename used
return output_file
# Main execution
if __name__ == "__main__":
# Set up command line argument parsing
parser = argparse.ArgumentParser(description='Transform attendance CSV from human-readable to machine-optimized format.')
parser.add_argument('-f', '--file',
required=True,
help='Input CSV file path (required)')
parser.add_argument('-o', '--output',
default='output.csv',
help='Output CSV file path (default: output.csv, will be modified to include Angebot number)')
args = parser.parse_args()
input_filename = args.file
output_filename = args.output
try:
actual_output_filename = transform_attendance_csv(input_filename, output_filename)
print(f"Successfully transformed {input_filename} to {actual_output_filename}")
except FileNotFoundError:
print(f"Error: Could not find input file '{input_filename}'")
print("Please ensure the CSV file exists and the path is correct.")
print("Available CSV files in current directory:")
# Try to find available CSV files
csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
for csv_file in csv_files:
print(f" - {csv_file}")
except Exception as e:
print(f"An error occurred: {e}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment