Last active
January 12, 2026 01:35
-
-
Save mthadley/fcab97d78b2dc4dd3741bff2641036af to your computer and use it in GitHub Desktop.
Fetch all of your NetNewsWire feeds, ordered by oldest published, to help with pruning out of date feeds.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # frozen_string_literal: true | |
| require "bundler/inline" | |
| require "stringio" | |
| require "time" | |
| require "zlib" | |
| gemfile do | |
| source "https://rubygems.org" | |
| gem "async" | |
| gem "http" | |
| gem "mime-types" | |
| gem "nokogiri" | |
| gem "brotli" | |
| gem "lipgloss" | |
| gem "time-lord" | |
| end | |
| FeedResult = Data.define(:title, :url, :xml, :error, :duration) do | |
| def last_published_at | |
| return if xml.nil? | |
| sanitized_xml = sanitize_xml(xml) | |
| return if sanitized_xml.strip.empty? | |
| doc = Nokogiri::XML(sanitized_xml) | |
| doc.remove_namespaces! | |
| candidates = [] | |
| doc.xpath("//item").each do |item| | |
| candidates << parse_time(item.at_xpath("pubDate")&.text) | |
| candidates << parse_time(item.at_xpath("published")&.text) | |
| candidates << parse_time(item.at_xpath("updated")&.text) | |
| candidates << parse_time(item.at_xpath("date")&.text) | |
| end | |
| doc.xpath("//entry").each do |entry| | |
| candidates << parse_time(entry.at_xpath("updated")&.text) | |
| candidates << parse_time(entry.at_xpath("published")&.text) | |
| end | |
| candidates.compact.max | |
| end | |
| private | |
| def sanitize_xml(raw) | |
| raw = raw.dup | |
| raw.force_encoding("UTF-8") | |
| raw.scrub | |
| rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError | |
| raw.encode("UTF-8", invalid: :replace, undef: :replace, replace: "") | |
| end | |
| def parse_time(value) | |
| return if value.nil? || value.strip.empty? | |
| Time.parse(value).utc | |
| rescue ArgumentError | |
| nil | |
| end | |
| end | |
| class FeedReporter | |
| USER_AGENT = "NNW Feed Inspector/1.0" | |
| FEED_TIMEOUT = ENV.fetch("FEED_TIMEOUT", 15).to_f | |
| ACCOUNT_NAME = ENV.fetch("NNW_ACCOUNT_NAME", "iCloud") | |
| ACCEPT_HEADER = "application/rss+xml, application/atom+xml, application/xml;q=0.9, text/xml;q=0.8, */*;q=0.5" | |
| ACCEPT_ENCODING_HEADER = "gzip, deflate, br" | |
| def run | |
| feeds = fetch_feeds_from_netnewswire | |
| if feeds.empty? | |
| warn "No feeds found in NetNewsWire." | |
| return | |
| end | |
| fetch_latest_publications(feeds).then { display(_1) } | |
| end | |
| private | |
| def fetch_feeds_from_netnewswire | |
| account_name = ACCOUNT_NAME&.strip | |
| account_filter = if account_name.nil? || account_name.empty? | |
| "set matchingAccounts to accounts" | |
| else | |
| escaped = account_name.gsub("\"", "\\\"") | |
| <<~FILTER.chomp | |
| set matchingAccounts to every account whose name is "#{escaped}" | |
| if (count of matchingAccounts) is 0 then | |
| error "No NetNewsWire account named #{escaped}." | |
| end if | |
| FILTER | |
| end | |
| script = <<~APPLESCRIPT | |
| on collectFeedsFromFolder(aFolder) | |
| tell application "NetNewsWire" | |
| set collected to {} | |
| repeat with f in feeds of aFolder | |
| set end of collected to (name of f & "\t" & URL of f) | |
| end repeat | |
| try | |
| set subFolders to folders of aFolder | |
| on error | |
| set subFolders to {} | |
| end try | |
| repeat with subFolder in subFolders | |
| set collected to collected & my collectFeedsFromFolder(subFolder) | |
| end repeat | |
| return collected | |
| end tell | |
| end collectFeedsFromFolder | |
| on collectFeedsFromAccount(anAccount) | |
| tell application "NetNewsWire" | |
| set collected to {} | |
| repeat with f in feeds of anAccount | |
| set end of collected to (name of f & "\t" & URL of f) | |
| end repeat | |
| try | |
| set accountFolders to folders of anAccount | |
| on error | |
| set accountFolders to {} | |
| end try | |
| repeat with aFolder in accountFolders | |
| set collected to collected & my collectFeedsFromFolder(aFolder) | |
| end repeat | |
| return collected | |
| end tell | |
| end collectFeedsFromAccount | |
| set feedOutput to {} | |
| tell application "NetNewsWire" | |
| #{account_filter} | |
| repeat with anAccount in matchingAccounts | |
| set feedOutput to feedOutput & my collectFeedsFromAccount(anAccount) | |
| end repeat | |
| end tell | |
| set AppleScript's text item delimiters to "\n" | |
| return feedOutput as string | |
| APPLESCRIPT | |
| run_applescript(script) | |
| .split(/\r?\n/) | |
| .map { _1.split("\t", 2) } | |
| .select { _1.length == 2 } | |
| .map do |title, url| | |
| { title: title.strip, url: url.strip } | |
| end | |
| end | |
| def run_applescript(script) | |
| output = nil | |
| IO.popen(["osascript"], "r+") do |io| | |
| io.write(script) | |
| io.close_write | |
| output = io.read.to_s | |
| end | |
| raise "osascript exited with status #{$?.exitstatus}" unless $?.success? | |
| output.strip | |
| rescue Errno::ENOENT | |
| raise "osascript not available. Are you running on macOS?" | |
| end | |
| def fetch_latest_publications(feeds) | |
| Async do |task| | |
| feeds.map do |feed| | |
| task.async do |subtask| | |
| subtask.with_timeout(FEED_TIMEOUT) do | |
| fetch_single_feed(feed) | |
| end | |
| rescue Async::TimeoutError => e | |
| FeedResult.new( | |
| title: feed[:title], | |
| url: feed[:url], | |
| xml: nil, | |
| error: "Timeout: #{e.message}", | |
| duration: FEED_TIMEOUT | |
| ) | |
| end | |
| end.map(&:wait) | |
| end.wait | |
| end | |
| def fetch_single_feed(feed) | |
| start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) | |
| body = perform_get(feed[:url]).then { decode_body(_1) } | |
| duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time | |
| FeedResult.new( | |
| title: feed[:title], | |
| url: feed[:url], | |
| xml: body, | |
| error: nil, | |
| duration: duration | |
| ) | |
| rescue StandardError => e | |
| duration ||= Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time | |
| FeedResult.new( | |
| title: feed[:title], | |
| url: feed[:url], | |
| xml: nil, | |
| error: e.message, | |
| duration: duration | |
| ) | |
| end | |
| def perform_get(url) | |
| response = HTTP | |
| .follow(max_hops: 5) | |
| .timeout(connect: FEED_TIMEOUT, read: FEED_TIMEOUT) | |
| .headers( | |
| "User-Agent" => USER_AGENT, | |
| "Accept" => ACCEPT_HEADER, | |
| "Accept-Encoding" => ACCEPT_ENCODING_HEADER | |
| ) | |
| .get(url) | |
| raise "HTTP #{response.status}" unless response.status.success? | |
| content_type = response.headers["Content-Type"].to_s | |
| unless content_type.empty? | |
| simplified = MIME::Type.simplified(content_type) | |
| unless simplified.include?("xml") || | |
| simplified.include?("rss") || | |
| simplified.include?("atom") | |
| raise "Unexpected content type #{simplified}" | |
| end | |
| end | |
| response | |
| end | |
| def decode_body(response) | |
| encoding = response.headers["Content-Encoding"].to_s.downcase | |
| body = response.body.to_s | |
| return body if encoding.empty? || encoding == "identity" | |
| case encoding | |
| when /br/ | |
| Brotli.inflate(body) | |
| when /gzip/ | |
| Zlib::GzipReader.new(StringIO.new(body)).read | |
| when /deflate/ | |
| Zlib::Inflate.inflate(body) | |
| else | |
| raise "Unsupported content encoding #{encoding}" | |
| end | |
| rescue Brotli::Error, Zlib::Error => e | |
| raise "Failed to decode #{encoding}: #{e.message}" | |
| end | |
| def display(results) | |
| sorted = results.sort_by { _1.last_published_at || Time.at(0) } | |
| headers = ["Last Published", "Feed Title", "Fetch (s)", "Feed URL"] | |
| rows = sorted.map do |res| | |
| duration = res.duration ? format("%.2f", res.duration) : "--" | |
| timestamp = res.last_published_at ? relative_time(res.last_published_at) : "--" | |
| title = truncate(res.title, 30) | |
| url_text = res.error ? "#{res.url} (#{res.error})" : res.url | |
| [ | |
| style(:timestamp).render(timestamp), | |
| style(:title).render(title), | |
| style(:duration).render(duration), | |
| (res.error ? style(:error) : style(:url)).render(url_text) | |
| ] | |
| end | |
| table = Lipgloss::Table.new | |
| .headers(headers) | |
| .rows(rows) | |
| .border(:rounded) | |
| .style_func(rows: rows.length + 1, columns: headers.length) do |row, _column| | |
| if row == Lipgloss::Table::HEADER_ROW | |
| style(:header) | |
| else | |
| style(:row) | |
| end | |
| end | |
| puts table.render | |
| end | |
| def truncate(text, length) | |
| return text if text.length <= length | |
| text[0, length - 1] + "…" | |
| end | |
| def relative_time(time) = TimeLord::Period.new(time, Time.now).to_words | |
| STYLES = { | |
| timestamp: Lipgloss::Style.new.foreground("#8BE9FD"), | |
| title: Lipgloss::Style.new.bold(true), | |
| duration: Lipgloss::Style.new.align(:right).foreground("#50FA7B"), | |
| url: Lipgloss::Style.new, | |
| error: Lipgloss::Style.new.foreground("#FF6B6B"), | |
| header: Lipgloss::Style.new.bold(true).foreground("#F8F8F2").background("#44475A").padding(0, 1), | |
| row: Lipgloss::Style.new.padding(0, 1) | |
| }.freeze | |
| def style(name) = STYLES.fetch(name) { raise KeyError, "Unknown style: #{name}" } | |
| end | |
| FeedReporter.new.run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment