Skip to content

Instantly share code, notes, and snippets.

@dmgerman
Created December 30, 2025 19:08
Show Gist options
  • Select an option

  • Save dmgerman/9534bf8f1b3733c28d9d433b458b8609 to your computer and use it in GitHub Desktop.

Select an option

Save dmgerman/9534bf8f1b3733c28d9d433b458b8609 to your computer and use it in GitHub Desktop.

important, this should the first thing of the file

;;; -*- lexical-binding: t; -*-

save URL for reading later

Download URL using readability-cli and save as org file to ~/sync/roam/read-later/

(defgroup dmg-read-later nil
  "Customization options for the dmg read-later article capture workflow."
  :group 'applications)

(defcustom dmg-read-later-template "#+STARTUP: showall\n"
  "Template text inserted at the top of read-later files."
  :type 'string
  :group 'dmg-read-later)

(defcustom dmg-read-later-default-topic "general"
  "Default topic or subdirectory used when none is specified."
  :type 'string
  :group 'dmg-read-later)

(defcustom dmg-read-later-summary-heading "Generated Summary"
  "Heading text for the AI-generated summary section."
  :type 'string
  :group 'dmg-read-later)

(defcustom dmg-read-later-summary-prompt
  "Summarize the main points of this article in an itemized form."
  "Prompt used for gptel to generate article summaries."
  :type 'string
  :group 'dmg-read-later)

(defcustom dmg-read-later-allowed-image-extensions
  '("jpg" "jpeg" "png" "gif" "webp" "svg")
  "List of allowed image file extensions to download."
  :type '(repeat string)
  :group 'dmg-read-later)

(defcustom dmg-read-later-max-image-size (* 5 1024 1024)
  "Maximum image file size in bytes."
  :type 'integer
  :group 'dmg-read-later)

(defun dmg-url-for-later--fetch-content (url)
  "Fetch and convert URL content to org format.
Returns a plist with :title and :content keys."
  (let ((title nil)
        (content nil))
    ;; Extract title
    (let ((title-output (with-output-to-string
                          (with-current-buffer standard-output
                            (unless (zerop (call-process "readable" nil t nil
                                                        url "--json" "--properties" "title"))
                              (error "Failed to fetch URL with readable"))))))
      (if (string-match "\"title\":\\s-*\"\\([^\"]+\\)\"" title-output)
          (setq title (match-string 1 title-output))
        (setq title url)))

    ;; Download content
    (setq content
          (with-output-to-string
            (with-current-buffer standard-output
              (let ((readable-exit
                     (call-process "readable" nil '(t nil) nil url)))
                (unless (zerop readable-exit)
                  (error "readable failed with exit code %d" readable-exit)))
              (goto-char (point-min))
              (let ((pandoc-exit
                     (call-process-region (point-min) (point-max)
                                         "pandoc" t t nil
                                         "-f" "html" "-t" "org")))
                (unless (zerop pandoc-exit)
                  (error "pandoc failed with exit code %d" pandoc-exit))))))

    (when (or (not content) (string-empty-p (string-trim content)))
      (error "Downloaded content is empty for URL: %s" url))

    ;; Clean up content
    (setq content (replace-regexp-in-string "<<[^>]+>>\n?" "" content))

    (list :title title :content content)))

(defun dmg-url-for-later--build-filepath (title topic base-dir)
  "Build filepath for article with TITLE and TOPIC in BASE-DIR.
Returns a cons of (article-dir . filepath)."
  (let* ((effective-topic (if (or (not topic) (string-empty-p topic))
                              dmg-read-later-default-topic
                            topic))
         (topic-dir (expand-file-name effective-topic base-dir))
         (clean-title (replace-regexp-in-string "[^[:alnum:]]+" "-" title))
         (short-title (if (> (length clean-title) 50)
                          (substring clean-title 0 50)
                        clean-title))
         (timestamp (format-time-string "%y%m%d-%H%M_"))
         (base-name (concat timestamp short-title))
         (article-dir (expand-file-name base-name topic-dir))
         (filename (concat base-name ".org"))
         (filepath (expand-file-name filename article-dir)))
    (cons article-dir filepath)))

(defun dmg-url-for-later--insert-summary-section (content)
  "Insert summary section into CONTENT if needed.
Returns modified content."
  (replace-regexp-in-string
   "^\\(\\* .*\n\\(?::PROPERTIES:.*\n\\(?:.*\n\\)*?:END:\n\\)?\\)"
   (concat "\\1\n** " dmg-read-later-summary-heading "\n\nGenerating summary...\n\n** Introduction\n\n")
   content
   nil nil 1))

(defun dmg-url-for-later--extract-image-urls (content)
  "Extract image URLs from org CONTENT.
Returns a list of image URLs that match allowed extensions."
  (let ((urls nil))
    (with-temp-buffer
      (insert content)
      (goto-char (point-min))
      ;; Match [[http(s)://...]] style links
      (while (re-search-forward "\\[\\[\\(https?://[^]]+\\)\\]\\]" nil t)
        (let ((url (match-string 1)))
          ;; Check if URL has allowed image extension
          (when (seq-some (lambda (ext)
                            (string-match-p (concat "\\." ext "\\(?:[?#].*\\)?$") url))
                          dmg-read-later-allowed-image-extensions)
            (push url urls)))))
    (nreverse urls)))

(defun dmg-url-for-later--clean-filename (url)
  "Extract and clean filename from URL.
Returns a clean filename or nil if filename is too messy."
  (when (string-match "/\\([^/?#]+\\)\\(?:[?#].*\\)?$" url)
    (let ((filename (match-string 1 url)))
      ;; Consider filename clean if it only contains alphanumeric, dash, underscore, and dot
      (when (string-match-p "^[a-zA-Z0-9._-]+$" filename)
        filename))))

(defun dmg-url-for-later--download-images (image-urls image-dir)
  "Download images from IMAGE-URLS to IMAGE-DIR.
Returns alist mapping old URL to new relative path.
Skips images larger than dmg-read-later-max-image-size."
  (let ((url-mapping nil)
        (counter 0))
    (dolist (url image-urls)
      ;; Check file size first using curl --head
      (let* ((size-output (with-output-to-string
                            (with-current-buffer standard-output
                              (call-process "curl" nil t nil
                                          "--silent" "--head" "--location" url))))
             (content-length (when (string-match "Content-Length:\\s-*\\([0-9]+\\)" size-output)
                              (string-to-number (match-string 1 size-output)))))

        (cond
         ;; Skip if too large
         ((and content-length (> content-length dmg-read-later-max-image-size))
          (message "Skipping large image (%s bytes): %s" content-length url))

         ;; Try to download
         (t
          (setq counter (1+ counter))
          (let* ((clean-name (dmg-url-for-later--clean-filename url))
                 (extension (when (string-match "\\.\\([a-zA-Z0-9]+\\)\\(?:[?#].*\\)?$" url)
                             (match-string 1 url)))
                 (filename (if clean-name
                              clean-name
                            (format "image-%03d.%s" counter (or extension "jpg"))))
                 (filepath (expand-file-name filename image-dir))
                 (download-success nil))

            ;; Download with curl
            (setq download-success
                  (zerop (call-process "curl" nil nil nil
                                      "--silent" "--location" "--output" filepath
                                      "--max-filesize" (number-to-string dmg-read-later-max-image-size)
                                      url)))

            (if download-success
                (progn
                  ;; Store mapping: old URL -> new relative path (same directory)
                  (push (cons url (concat "./" filename)) url-mapping)
                  (message "Downloaded image: %s" filename))
              (message "Failed to download image: %s" url)))))))
    url-mapping))

(defun dmg-url-for-later--replace-image-urls (content url-mapping)
  "Replace image URLs in CONTENT using URL-MAPPING.
URL-MAPPING is an alist of (old-url . new-path)."
  (let ((result content))
    (dolist (mapping url-mapping)
      (let ((old-url (car mapping))
            (new-path (cdr mapping)))
        (setq result (replace-regexp-in-string
                     (regexp-quote (concat "[[" old-url "]]"))
                     (concat "[[" new-path "]]")
                     result
                     t t))))
    result))

(defun dmg-url-for-later--write-file (filepath url domain title topic content template)
  "Write article file at FILEPATH with metadata and CONTENT."
  (let ((download-date (format-time-string "%Y-%m-%d %H:%M:%S")))
    (with-temp-file filepath
      (insert template)
      (insert "#+TITLE: " title "\n")
      (insert "#+URL: " url "\n")
      (insert "#+DOMAIN: " (or domain "unknown") "\n")
      (insert "#+DOWNLOADED: " download-date "\n")
      (insert "#+filetags: :" topic ":\n")
      (insert "\n")
      (insert content))))

(defun dmg-url-for-later (url topic summary-p)
  "Download URL as org file to read-later directory using readability-cli.
URL is the article URL to download.
TOPIC is the category/subdirectory (empty string uses default).
SUMMARY-P if non-nil, generates AI summary using gptel."
  (interactive
   (let* ((base-dir (expand-file-name "~/sync/roam/read-later/"))
          (subdirs (when (file-directory-p base-dir)
                     (seq-filter
                      (lambda (f)
                        (and (file-directory-p (expand-file-name f base-dir))
                             (not (string-prefix-p "." f))))
                      (directory-files base-dir))))
          (url-input (read-string "URL: "))
          (topic-input (completing-read
                        (format "Topic (default: %s): " dmg-read-later-default-topic)
                        subdirs nil nil))
          (summary-input (y-or-n-p "Generate summary with AI? ")))
     (list url-input topic-input summary-input)))

  ;; Check for required tools
  (unless (executable-find "readable")
    (error "readable not found. Install with: npm install -g readability-cli"))
  (unless (executable-find "pandoc")
    (error "pandoc not found. Please install pandoc"))

  ;; Fetch and convert article
  (let* ((base-dir (expand-file-name "~/sync/roam/read-later/"))
         (article (dmg-url-for-later--fetch-content url))
         (title (plist-get article :title))
         (content (plist-get article :content))
         (domain (when (string-match "https?://\\([^/]+\\)" url)
                   (match-string 1 url)))
         (filepath-parts (dmg-url-for-later--build-filepath title topic base-dir))
         (target-dir (car filepath-parts))
         (filepath (cdr filepath-parts))
         (effective-topic (if (or (not topic) (string-empty-p topic))
                              dmg-read-later-default-topic
                            topic)))

    ;; Create article directory
    (unless (file-directory-p target-dir)
      (make-directory target-dir t))

    ;; Download images if present
    (let* ((image-urls (dmg-url-for-later--extract-image-urls content))
           (url-mapping (when image-urls
                         (message "Downloading %d images..." (length image-urls))
                         (dmg-url-for-later--download-images image-urls target-dir))))

      ;; Replace image URLs with local paths if any images were downloaded
      (when url-mapping
        (setq content (dmg-url-for-later--replace-image-urls content url-mapping))
        (message "Downloaded %d images to %s" (length url-mapping) target-dir))

      ;; Insert summary section if requested
      (when summary-p
        (setq content (dmg-url-for-later--insert-summary-section content)))

      ;; Write file
      (dmg-url-for-later--write-file filepath url domain title effective-topic
                                     content dmg-read-later-template)

      ;; Generate summary asynchronously if requested
      (when summary-p
        (dmg-generate-summary-async filepath))

      (message "Saved to: %s" filepath))))

(defun dmg-generate-summary-async (filepath)
  "Generate AI summary for article at FILEPATH using gptel."
  (let* ((buf (find-file-noselect filepath))
         (summary-buf (generate-new-buffer "*gptel-summary*")))
    (with-current-buffer buf
      (goto-char (point-min))
      (if (re-search-forward (concat "^\\*\\* " (regexp-quote dmg-read-later-summary-heading) "\n\n") nil t)
          (let ((placeholder-start (point))
                (article-content (buffer-substring-no-properties
                                  (point-min)
                                  (point-max))))

            ;; Call gptel-request with callback to insert summary
            (gptel-request article-content
                           :system dmg-read-later-summary-prompt
                           :callback
                           (lambda (response info)
                             (if response
                                 (progn
                                   ;; Insert summary into target file
                                   (with-current-buffer buf
                                     (goto-char placeholder-start)
                                     ;; Delete placeholder
                                     (when (looking-at "Generating summary\\.\\.\\.\n\n")
                                       (delete-region (point) (match-end 0)))
                                     ;; Insert the AI response
                                     (insert response "\n\n")
                                     (save-buffer))
                                   (message "Summary generated for: %s" filepath)
                                   ;; Clean up temp buffer
                                   (when (buffer-live-p summary-buf)
                                     (kill-buffer summary-buf)))
                               ;; Error case
                               (progn
                                 (with-current-buffer buf
                                   (goto-char placeholder-start)
                                   (when (looking-at "Generating summary\\.\\.\\.\n\n")
                                     (delete-region (point) (match-end 0)))
                                   (insert "Summary generation failed\n")
                                   (save-buffer))
                                 (when (buffer-live-p summary-buf)
                                   (kill-buffer summary-buf))
                                 (message "Summary generation failed for: %s" filepath))))))
        (message "Could not find summary heading in: %s" filepath)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment