EinLama/csv_groupby.rb

## csv_groupby.rb
#!/usr/bin/env ruby

require "csv"


def list_files_and_select
  files = Dir.glob("*").select { |f| File.file?(f) && f.end_with?(".csv") }
  if files.empty?
    puts "No CSV files found in the current directory."
    exit
  end

  puts "Select a file by entering its number:"
  puts "0: ABORT without selecting a file"
  files.each_with_index { |file, i| puts "#{i + 1}: #{file}" }

  print "Enter the number of the file: "
  choice = gets.chomp.to_i
  if choice < 0 || choice > files.size
    puts "Invalid selection."
    exit
  elsif choice == 0
    exit
  end

  files[choice - 1]
end

def group_csv_by_column(filename, column)
  rows = CSV.read(filename, headers: true)
  unless rows.headers.include?(column)
    puts "Error: CSV file does not contain a '#{column}' column."
    exit
  end

  # Subgroup by feed subtype (breast or bottle)
  rows.each do |r|
    if r["Type"] == "Feed"
      r["Type"] = "Feed_#{r['Start Location']}"
    end
  end

  file_base = File.basename(filename, ".csv")

  grouped = rows.group_by { |row| row[column] }

  headers = transform_headers(rows.headers)

  csvs = {}

  grouped.each do |type_col, group_rows|
    new_filename = "#{file_base}_#{type_col}.csv"

    csvs[new_filename] = CSV.generate do |csv|
      csv << headers
      group_rows
        .sort_by { |r| r["Start"] } # reorder oldest to newest
        .each { |row| csv << transform_row(row) }
    end
  end

  csvs
end

def transform_headers(headers)
  headers.concat(["Start Time", "End Time"])
end

def transform_row(row)
  if row["Start"]
    start_date, start_time = row["Start"].split(" ")
    row["Start"] = start_date
    row["Start Time"] = start_time
  end

  if row["End"]
    end_date, end_time = row["End"].split(" ")
    row["End"] = end_date
    row["End Time"] = end_time
  end

  if (s = row["End Condition"])
    # Remove "ml" suffix for bottle feedings, etc.
    if (match = s.match(/(\d+)ml\z/))
      row["End Condition"] = match[1]
    end
  end

  row.fields
end

############# main #############
Dir.chdir(File.dirname(File.expand_path(__FILE__)))

if ARGV.size != 1
  filename = list_files_and_select
else
  filename = ARGV[0]
end

begin
  csvs = group_csv_by_column(filename, "Type")

  csvs.each do |filename, csv|
    File.open(filename, "w") do |f|
      f.write(csv)
    end
    puts "Written file #{filename}"
  end
rescue Errno::ENOENT
  puts "Error: File '#{filename}' not found."
rescue StandardError => e
  puts "Error reading file: #{e.message}"
end
	#!/usr/bin/env ruby

	require "csv"


	def list_files_and_select
	files = Dir.glob("*").select { \|f\| File.file?(f) && f.end_with?(".csv") }
	if files.empty?
	puts "No CSV files found in the current directory."
	exit
	end

	puts "Select a file by entering its number:"
	puts "0: ABORT without selecting a file"
	files.each_with_index { \|file, i\| puts "#{i + 1}: #{file}" }

	print "Enter the number of the file: "
	choice = gets.chomp.to_i
	if choice < 0 \|\| choice > files.size
	puts "Invalid selection."
	exit
	elsif choice == 0
	exit
	end

	files[choice - 1]
	end

	def group_csv_by_column(filename, column)
	rows = CSV.read(filename, headers: true)
	unless rows.headers.include?(column)
	puts "Error: CSV file does not contain a '#{column}' column."
	exit
	end

	# Subgroup by feed subtype (breast or bottle)
	rows.each do \|r\|
	if r["Type"] == "Feed"
	r["Type"] = "Feed_#{r['Start Location']}"
	end
	end

	file_base = File.basename(filename, ".csv")

	grouped = rows.group_by { \|row\| row[column] }

	headers = transform_headers(rows.headers)

	csvs = {}

	grouped.each do \|type_col, group_rows\|
	new_filename = "#{file_base}_#{type_col}.csv"

	csvs[new_filename] = CSV.generate do \|csv\|
	csv << headers
	group_rows
	.sort_by { \|r\| r["Start"] } # reorder oldest to newest
	.each { \|row\| csv << transform_row(row) }
	end
	end

	csvs
	end

	def transform_headers(headers)
	headers.concat(["Start Time", "End Time"])
	end

	def transform_row(row)
	if row["Start"]
	start_date, start_time = row["Start"].split(" ")
	row["Start"] = start_date
	row["Start Time"] = start_time
	end

	if row["End"]
	end_date, end_time = row["End"].split(" ")
	row["End"] = end_date
	row["End Time"] = end_time
	end

	if (s = row["End Condition"])
	# Remove "ml" suffix for bottle feedings, etc.
	if (match = s.match(/(\d+)ml\z/))
	row["End Condition"] = match[1]
	end
	end

	row.fields
	end

	############# main #############
	Dir.chdir(File.dirname(File.expand_path(__FILE__)))

	if ARGV.size != 1
	filename = list_files_and_select
	else
	filename = ARGV[0]
	end

	begin
	csvs = group_csv_by_column(filename, "Type")

	csvs.each do \|filename, csv\|
	File.open(filename, "w") do \|f\|
	f.write(csv)
	end
	puts "Written file #{filename}"
	end
	rescue Errno::ENOENT
	puts "Error: File '#{filename}' not found."
	rescue StandardError => e
	puts "Error reading file: #{e.message}"
	end
No results found