mthadley/feeds_report.rb

## feeds_report.rb
#!/usr/bin/env ruby
# frozen_string_literal: true

require "bundler/inline"
require "stringio"
require "time"
require "zlib"

gemfile do
  source "https://rubygems.org"

  gem "async"
  gem "http"
  gem "mime-types"
  gem "nokogiri"
  gem "brotli"
  gem "lipgloss"
  gem "time-lord"
end

FeedResult = Data.define(:title, :url, :xml, :error, :duration) do
  def last_published_at
    return if xml.nil?

    sanitized_xml = sanitize_xml(xml)
    return if sanitized_xml.strip.empty?

    doc = Nokogiri::XML(sanitized_xml)
    doc.remove_namespaces!

    candidates = []

    doc.xpath("//item").each do |item|
      candidates << parse_time(item.at_xpath("pubDate")&.text)
      candidates << parse_time(item.at_xpath("published")&.text)
      candidates << parse_time(item.at_xpath("updated")&.text)
      candidates << parse_time(item.at_xpath("date")&.text)
    end

    doc.xpath("//entry").each do |entry|
      candidates << parse_time(entry.at_xpath("updated")&.text)
      candidates << parse_time(entry.at_xpath("published")&.text)
    end

    candidates.compact.max
  end

  private

  def sanitize_xml(raw)
    raw = raw.dup
    raw.force_encoding("UTF-8")
    raw.scrub
  rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
    raw.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
  end

  def parse_time(value)
    return if value.nil? || value.strip.empty?

    Time.parse(value).utc
  rescue ArgumentError
    nil
  end
end

class FeedReporter
  USER_AGENT = "NNW Feed Inspector/1.0"
  FEED_TIMEOUT = ENV.fetch("FEED_TIMEOUT", 15).to_f
  ACCOUNT_NAME = ENV.fetch("NNW_ACCOUNT_NAME", "iCloud")
  ACCEPT_HEADER = "application/rss+xml, application/atom+xml, application/xml;q=0.9, text/xml;q=0.8, */*;q=0.5"
  ACCEPT_ENCODING_HEADER = "gzip, deflate, br"

  def run
    feeds = fetch_feeds_from_netnewswire

    if feeds.empty?
      warn "No feeds found in NetNewsWire."
      return
    end

    fetch_latest_publications(feeds).then { display(_1) }
  end

  private

  def fetch_feeds_from_netnewswire
    account_name = ACCOUNT_NAME&.strip
    account_filter = if account_name.nil? || account_name.empty?
      "set matchingAccounts to accounts"
    else
      escaped = account_name.gsub("\"", "\\\"")
      <<~FILTER.chomp
        set matchingAccounts to every account whose name is "#{escaped}"
        if (count of matchingAccounts) is 0 then
          error "No NetNewsWire account named #{escaped}."
        end if
      FILTER
    end

    script = <<~APPLESCRIPT
      on collectFeedsFromFolder(aFolder)
        tell application "NetNewsWire"
          set collected to {}
          repeat with f in feeds of aFolder
            set end of collected to (name of f & "\t" & URL of f)
          end repeat
          try
            set subFolders to folders of aFolder
          on error
            set subFolders to {}
          end try
          repeat with subFolder in subFolders
            set collected to collected & my collectFeedsFromFolder(subFolder)
          end repeat
          return collected
        end tell
      end collectFeedsFromFolder

      on collectFeedsFromAccount(anAccount)
        tell application "NetNewsWire"
          set collected to {}
          repeat with f in feeds of anAccount
            set end of collected to (name of f & "\t" & URL of f)
          end repeat
          try
            set accountFolders to folders of anAccount
          on error
            set accountFolders to {}
          end try
          repeat with aFolder in accountFolders
            set collected to collected & my collectFeedsFromFolder(aFolder)
          end repeat
          return collected
        end tell
      end collectFeedsFromAccount

      set feedOutput to {}
      tell application "NetNewsWire"
        #{account_filter}
        repeat with anAccount in matchingAccounts
          set feedOutput to feedOutput & my collectFeedsFromAccount(anAccount)
        end repeat
      end tell
      set AppleScript's text item delimiters to "\n"
      return feedOutput as string
    APPLESCRIPT

    run_applescript(script)
      .split(/\r?\n/)
      .map { _1.split("\t", 2) }
      .select { _1.length == 2 }
      .map do |title, url|
        { title: title.strip, url: url.strip }
      end
  end

  def run_applescript(script)
    output = nil

    IO.popen(["osascript"], "r+") do |io|
      io.write(script)
      io.close_write
      output = io.read.to_s
    end

    raise "osascript exited with status #{$?.exitstatus}" unless $?.success?

    output.strip
  rescue Errno::ENOENT
    raise "osascript not available. Are you running on macOS?"
  end

  def fetch_latest_publications(feeds)
    Async do |task|
      feeds.map do |feed|
        task.async do |subtask|
          subtask.with_timeout(FEED_TIMEOUT) do
            fetch_single_feed(feed)
          end
        rescue Async::TimeoutError => e
          FeedResult.new(
            title: feed[:title],
            url: feed[:url],
            xml: nil,
            error: "Timeout: #{e.message}",
            duration: FEED_TIMEOUT
          )
        end
      end.map(&:wait)
    end.wait
  end

  def fetch_single_feed(feed)
    start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
    body = perform_get(feed[:url]).then { decode_body(_1) }
    duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time

    FeedResult.new(
      title: feed[:title],
      url: feed[:url],
      xml: body,
      error: nil,
      duration: duration
    )
  rescue StandardError => e
    duration ||= Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time

    FeedResult.new(
      title: feed[:title],
      url: feed[:url],
      xml: nil,
      error: e.message,
      duration: duration
    )
  end

  def perform_get(url)
    response = HTTP
      .follow(max_hops: 5)
      .timeout(connect: FEED_TIMEOUT, read: FEED_TIMEOUT)
      .headers(
        "User-Agent" => USER_AGENT,
        "Accept" => ACCEPT_HEADER,
        "Accept-Encoding" => ACCEPT_ENCODING_HEADER
      )
      .get(url)

    raise "HTTP #{response.status}" unless response.status.success?

    content_type = response.headers["Content-Type"].to_s
    unless content_type.empty?
      simplified = MIME::Type.simplified(content_type)
      unless simplified.include?("xml") ||
             simplified.include?("rss") ||
             simplified.include?("atom")
        raise "Unexpected content type #{simplified}"
      end
    end

    response
  end

  def decode_body(response)
    encoding = response.headers["Content-Encoding"].to_s.downcase
    body = response.body.to_s

    return body if encoding.empty? || encoding == "identity"

    case encoding
    when /br/
      Brotli.inflate(body)
    when /gzip/
      Zlib::GzipReader.new(StringIO.new(body)).read
    when /deflate/
      Zlib::Inflate.inflate(body)
    else
      raise "Unsupported content encoding #{encoding}"
    end
  rescue Brotli::Error, Zlib::Error => e
    raise "Failed to decode #{encoding}: #{e.message}"
  end

  def display(results)
    sorted = results.sort_by { _1.last_published_at || Time.at(0) }

    headers = ["Last Published", "Feed Title", "Fetch (s)", "Feed URL"]
    rows = sorted.map do |res|
      duration = res.duration ? format("%.2f", res.duration) : "--"
      timestamp = res.last_published_at ? relative_time(res.last_published_at) : "--"
      title = truncate(res.title, 30)
      url_text = res.error ? "#{res.url}  (#{res.error})" : res.url

      [
        style(:timestamp).render(timestamp),
        style(:title).render(title),
        style(:duration).render(duration),
        (res.error ? style(:error) : style(:url)).render(url_text)
      ]
    end

    table = Lipgloss::Table.new
      .headers(headers)
      .rows(rows)
      .border(:rounded)
      .style_func(rows: rows.length + 1, columns: headers.length) do |row, _column|
        if row == Lipgloss::Table::HEADER_ROW
          style(:header)
        else
          style(:row)
        end
      end

    puts table.render
  end

  def truncate(text, length)
    return text if text.length <= length

    text[0, length - 1] + "…"
  end

  def relative_time(time) = TimeLord::Period.new(time, Time.now).to_words

  STYLES = {
    timestamp: Lipgloss::Style.new.foreground("#8BE9FD"),
    title: Lipgloss::Style.new.bold(true),
    duration: Lipgloss::Style.new.align(:right).foreground("#50FA7B"),
    url: Lipgloss::Style.new,
    error: Lipgloss::Style.new.foreground("#FF6B6B"),
    header: Lipgloss::Style.new.bold(true).foreground("#F8F8F2").background("#44475A").padding(0, 1),
    row: Lipgloss::Style.new.padding(0, 1)
  }.freeze

  def style(name) = STYLES.fetch(name) { raise KeyError, "Unknown style: #{name}" }
end

FeedReporter.new.run
	#!/usr/bin/env ruby
	# frozen_string_literal: true

	require "bundler/inline"
	require "stringio"
	require "time"
	require "zlib"

	gemfile do
	source "https://rubygems.org"

	gem "async"
	gem "http"
	gem "mime-types"
	gem "nokogiri"
	gem "brotli"
	gem "lipgloss"
	gem "time-lord"
	end

	FeedResult = Data.define(:title, :url, :xml, :error, :duration) do
	def last_published_at
	return if xml.nil?

	sanitized_xml = sanitize_xml(xml)
	return if sanitized_xml.strip.empty?

	doc = Nokogiri::XML(sanitized_xml)
	doc.remove_namespaces!

	candidates = []

	doc.xpath("//item").each do \|item\|
	candidates << parse_time(item.at_xpath("pubDate")&.text)
	candidates << parse_time(item.at_xpath("published")&.text)
	candidates << parse_time(item.at_xpath("updated")&.text)
	candidates << parse_time(item.at_xpath("date")&.text)
	end

	doc.xpath("//entry").each do \|entry\|
	candidates << parse_time(entry.at_xpath("updated")&.text)
	candidates << parse_time(entry.at_xpath("published")&.text)
	end

	candidates.compact.max
	end

	private

	def sanitize_xml(raw)
	raw = raw.dup
	raw.force_encoding("UTF-8")
	raw.scrub
	rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
	raw.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
	end

	def parse_time(value)
	return if value.nil? \|\| value.strip.empty?

	Time.parse(value).utc
	rescue ArgumentError
	nil
	end
	end

	class FeedReporter
	USER_AGENT = "NNW Feed Inspector/1.0"
	FEED_TIMEOUT = ENV.fetch("FEED_TIMEOUT", 15).to_f
	ACCOUNT_NAME = ENV.fetch("NNW_ACCOUNT_NAME", "iCloud")
	ACCEPT_HEADER = "application/rss+xml, application/atom+xml, application/xml;q=0.9, text/xml;q=0.8, /;q=0.5"
	ACCEPT_ENCODING_HEADER = "gzip, deflate, br"

	def run
	feeds = fetch_feeds_from_netnewswire

	if feeds.empty?
	warn "No feeds found in NetNewsWire."
	return
	end

	fetch_latest_publications(feeds).then { display(_1) }
	end

	private

	def fetch_feeds_from_netnewswire
	account_name = ACCOUNT_NAME&.strip
	account_filter = if account_name.nil? \|\| account_name.empty?
	"set matchingAccounts to accounts"
	else
	escaped = account_name.gsub("\"", "\\\"")
	<<~FILTER.chomp
	set matchingAccounts to every account whose name is "#{escaped}"
	if (count of matchingAccounts) is 0 then
	error "No NetNewsWire account named #{escaped}."
	end if
	FILTER
	end

	script = <<~APPLESCRIPT
	on collectFeedsFromFolder(aFolder)
	tell application "NetNewsWire"
	set collected to {}
	repeat with f in feeds of aFolder
	set end of collected to (name of f & "\t" & URL of f)
	end repeat
	try
	set subFolders to folders of aFolder
	on error
	set subFolders to {}
	end try
	repeat with subFolder in subFolders
	set collected to collected & my collectFeedsFromFolder(subFolder)
	end repeat
	return collected
	end tell
	end collectFeedsFromFolder

	on collectFeedsFromAccount(anAccount)
	tell application "NetNewsWire"
	set collected to {}
	repeat with f in feeds of anAccount
	set end of collected to (name of f & "\t" & URL of f)
	end repeat
	try
	set accountFolders to folders of anAccount
	on error
	set accountFolders to {}
	end try
	repeat with aFolder in accountFolders
	set collected to collected & my collectFeedsFromFolder(aFolder)
	end repeat
	return collected
	end tell
	end collectFeedsFromAccount

	set feedOutput to {}
	tell application "NetNewsWire"
	#{account_filter}
	repeat with anAccount in matchingAccounts
	set feedOutput to feedOutput & my collectFeedsFromAccount(anAccount)
	end repeat
	end tell
	set AppleScript's text item delimiters to "\n"
	return feedOutput as string
	APPLESCRIPT

	run_applescript(script)
	.split(/\r?\n/)
	.map { _1.split("\t", 2) }
	.select { _1.length == 2 }
	.map do \|title, url\|
	{ title: title.strip, url: url.strip }
	end
	end

	def run_applescript(script)
	output = nil

	IO.popen(["osascript"], "r+") do \|io\|
	io.write(script)
	io.close_write
	output = io.read.to_s
	end

	raise "osascript exited with status #{$?.exitstatus}" unless $?.success?

	output.strip
	rescue Errno::ENOENT
	raise "osascript not available. Are you running on macOS?"
	end

	def fetch_latest_publications(feeds)
	Async do \|task\|
	feeds.map do \|feed\|
	task.async do \|subtask\|
	subtask.with_timeout(FEED_TIMEOUT) do
	fetch_single_feed(feed)
	end
	rescue Async::TimeoutError => e
	FeedResult.new(
	title: feed[:title],
	url: feed[:url],
	xml: nil,
	error: "Timeout: #{e.message}",
	duration: FEED_TIMEOUT
	)
	end
	end.map(&:wait)
	end.wait
	end

	def fetch_single_feed(feed)
	start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
	body = perform_get(feed[:url]).then { decode_body(_1) }
	duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time

	FeedResult.new(
	title: feed[:title],
	url: feed[:url],
	xml: body,
	error: nil,
	duration: duration
	)
	rescue StandardError => e
	duration \|\|= Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time

	FeedResult.new(
	title: feed[:title],
	url: feed[:url],
	xml: nil,
	error: e.message,
	duration: duration
	)
	end

	def perform_get(url)
	response = HTTP
	.follow(max_hops: 5)
	.timeout(connect: FEED_TIMEOUT, read: FEED_TIMEOUT)
	.headers(
	"User-Agent" => USER_AGENT,
	"Accept" => ACCEPT_HEADER,
	"Accept-Encoding" => ACCEPT_ENCODING_HEADER
	)
	.get(url)

	raise "HTTP #{response.status}" unless response.status.success?

	content_type = response.headers["Content-Type"].to_s
	unless content_type.empty?
	simplified = MIME::Type.simplified(content_type)
	unless simplified.include?("xml") \|\|
	simplified.include?("rss") \|\|
	simplified.include?("atom")
	raise "Unexpected content type #{simplified}"
	end
	end

	response
	end

	def decode_body(response)
	encoding = response.headers["Content-Encoding"].to_s.downcase
	body = response.body.to_s

	return body if encoding.empty? \|\| encoding == "identity"

	case encoding
	when /br/
	Brotli.inflate(body)
	when /gzip/
	Zlib::GzipReader.new(StringIO.new(body)).read
	when /deflate/
	Zlib::Inflate.inflate(body)
	else
	raise "Unsupported content encoding #{encoding}"
	end
	rescue Brotli::Error, Zlib::Error => e
	raise "Failed to decode #{encoding}: #{e.message}"
	end

	def display(results)
	sorted = results.sort_by { _1.last_published_at \|\| Time.at(0) }

	headers = ["Last Published", "Feed Title", "Fetch (s)", "Feed URL"]
	rows = sorted.map do \|res\|
	duration = res.duration ? format("%.2f", res.duration) : "--"
	timestamp = res.last_published_at ? relative_time(res.last_published_at) : "--"
	title = truncate(res.title, 30)
	url_text = res.error ? "#{res.url} (#{res.error})" : res.url

	[
	style(:timestamp).render(timestamp),
	style(:title).render(title),
	style(:duration).render(duration),
	(res.error ? style(:error) : style(:url)).render(url_text)
	]
	end

	table = Lipgloss::Table.new
	.headers(headers)
	.rows(rows)
	.border(:rounded)
	.style_func(rows: rows.length + 1, columns: headers.length) do \|row, _column\|
	if row == Lipgloss::Table::HEADER_ROW
	style(:header)
	else
	style(:row)
	end
	end

	puts table.render
	end

	def truncate(text, length)
	return text if text.length <= length

	text[0, length - 1] + "…"
	end

	def relative_time(time) = TimeLord::Period.new(time, Time.now).to_words

	STYLES = {
	timestamp: Lipgloss::Style.new.foreground("#8BE9FD"),
	title: Lipgloss::Style.new.bold(true),
	duration: Lipgloss::Style.new.align(:right).foreground("#50FA7B"),
	url: Lipgloss::Style.new,
	error: Lipgloss::Style.new.foreground("#FF6B6B"),
	header: Lipgloss::Style.new.bold(true).foreground("#F8F8F2").background("#44475A").padding(0, 1),
	row: Lipgloss::Style.new.padding(0, 1)
	}.freeze

	def style(name) = STYLES.fetch(name) { raise KeyError, "Unknown style: #{name}" }
	end

	FeedReporter.new.run
No results found