Last active
February 23, 2026 15:20
-
-
Save EinLama/455cd7ce3772755e4532138524f596fc to your computer and use it in GitHub Desktop.
Group CSV file by column name
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| require "csv" | |
| def list_files_and_select | |
| files = Dir.glob("*").select { |f| File.file?(f) && f.end_with?(".csv") } | |
| if files.empty? | |
| puts "No CSV files found in the current directory." | |
| exit | |
| end | |
| puts "Select a file by entering its number:" | |
| puts "0: ABORT without selecting a file" | |
| files.each_with_index { |file, i| puts "#{i + 1}: #{file}" } | |
| print "Enter the number of the file: " | |
| choice = gets.chomp.to_i | |
| if choice < 0 || choice > files.size | |
| puts "Invalid selection." | |
| exit | |
| elsif choice == 0 | |
| exit | |
| end | |
| files[choice - 1] | |
| end | |
| def group_csv_by_column(filename, column) | |
| rows = CSV.read(filename, headers: true) | |
| unless rows.headers.include?(column) | |
| puts "Error: CSV file does not contain a '#{column}' column." | |
| exit | |
| end | |
| # Subgroup by feed subtype (breast or bottle) | |
| rows.each do |r| | |
| if r["Type"] == "Feed" | |
| r["Type"] = "Feed_#{r['Start Location']}" | |
| end | |
| end | |
| file_base = File.basename(filename, ".csv") | |
| grouped = rows.group_by { |row| row[column] } | |
| headers = transform_headers(rows.headers) | |
| csvs = {} | |
| grouped.each do |type_col, group_rows| | |
| new_filename = "#{file_base}_#{type_col}.csv" | |
| csvs[new_filename] = CSV.generate do |csv| | |
| csv << headers | |
| group_rows | |
| .sort_by { |r| r["Start"] } # reorder oldest to newest | |
| .each { |row| csv << transform_row(row) } | |
| end | |
| end | |
| csvs | |
| end | |
| def transform_headers(headers) | |
| headers.concat(["Start Time", "End Time"]) | |
| end | |
| def transform_row(row) | |
| if row["Start"] | |
| start_date, start_time = row["Start"].split(" ") | |
| row["Start"] = start_date | |
| row["Start Time"] = start_time | |
| end | |
| if row["End"] | |
| end_date, end_time = row["End"].split(" ") | |
| row["End"] = end_date | |
| row["End Time"] = end_time | |
| end | |
| if (s = row["End Condition"]) | |
| # Remove "ml" suffix for bottle feedings, etc. | |
| if (match = s.match(/(\d+)ml\z/)) | |
| row["End Condition"] = match[1] | |
| end | |
| end | |
| row.fields | |
| end | |
| ############# main ############# | |
| Dir.chdir(File.dirname(File.expand_path(__FILE__))) | |
| if ARGV.size != 1 | |
| filename = list_files_and_select | |
| else | |
| filename = ARGV[0] | |
| end | |
| begin | |
| csvs = group_csv_by_column(filename, "Type") | |
| csvs.each do |filename, csv| | |
| File.open(filename, "w") do |f| | |
| f.write(csv) | |
| end | |
| puts "Written file #{filename}" | |
| end | |
| rescue Errno::ENOENT | |
| puts "Error: File '#{filename}' not found." | |
| rescue StandardError => e | |
| puts "Error reading file: #{e.message}" | |
| end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment