important, this should the first thing of the file
;;; -*- lexical-binding: t; -*-Download URL using readability-cli and save as org file to ~/sync/roam/read-later/
(defgroup dmg-read-later nil
"Customization options for the dmg read-later article capture workflow."
:group 'applications)
(defcustom dmg-read-later-template "#+STARTUP: showall\n"
"Template text inserted at the top of read-later files."
:type 'string
:group 'dmg-read-later)
(defcustom dmg-read-later-default-topic "general"
"Default topic or subdirectory used when none is specified."
:type 'string
:group 'dmg-read-later)
(defcustom dmg-read-later-summary-heading "Generated Summary"
"Heading text for the AI-generated summary section."
:type 'string
:group 'dmg-read-later)
(defcustom dmg-read-later-summary-prompt
"Summarize the main points of this article in an itemized form."
"Prompt used for gptel to generate article summaries."
:type 'string
:group 'dmg-read-later)
(defcustom dmg-read-later-allowed-image-extensions
'("jpg" "jpeg" "png" "gif" "webp" "svg")
"List of allowed image file extensions to download."
:type '(repeat string)
:group 'dmg-read-later)
(defcustom dmg-read-later-max-image-size (* 5 1024 1024)
"Maximum image file size in bytes."
:type 'integer
:group 'dmg-read-later)
(defun dmg-url-for-later--fetch-content (url)
"Fetch and convert URL content to org format.
Returns a plist with :title and :content keys."
(let ((title nil)
(content nil))
;; Extract title
(let ((title-output (with-output-to-string
(with-current-buffer standard-output
(unless (zerop (call-process "readable" nil t nil
url "--json" "--properties" "title"))
(error "Failed to fetch URL with readable"))))))
(if (string-match "\"title\":\\s-*\"\\([^\"]+\\)\"" title-output)
(setq title (match-string 1 title-output))
(setq title url)))
;; Download content
(setq content
(with-output-to-string
(with-current-buffer standard-output
(let ((readable-exit
(call-process "readable" nil '(t nil) nil url)))
(unless (zerop readable-exit)
(error "readable failed with exit code %d" readable-exit)))
(goto-char (point-min))
(let ((pandoc-exit
(call-process-region (point-min) (point-max)
"pandoc" t t nil
"-f" "html" "-t" "org")))
(unless (zerop pandoc-exit)
(error "pandoc failed with exit code %d" pandoc-exit))))))
(when (or (not content) (string-empty-p (string-trim content)))
(error "Downloaded content is empty for URL: %s" url))
;; Clean up content
(setq content (replace-regexp-in-string "<<[^>]+>>\n?" "" content))
(list :title title :content content)))
(defun dmg-url-for-later--build-filepath (title topic base-dir)
"Build filepath for article with TITLE and TOPIC in BASE-DIR.
Returns a cons of (article-dir . filepath)."
(let* ((effective-topic (if (or (not topic) (string-empty-p topic))
dmg-read-later-default-topic
topic))
(topic-dir (expand-file-name effective-topic base-dir))
(clean-title (replace-regexp-in-string "[^[:alnum:]]+" "-" title))
(short-title (if (> (length clean-title) 50)
(substring clean-title 0 50)
clean-title))
(timestamp (format-time-string "%y%m%d-%H%M_"))
(base-name (concat timestamp short-title))
(article-dir (expand-file-name base-name topic-dir))
(filename (concat base-name ".org"))
(filepath (expand-file-name filename article-dir)))
(cons article-dir filepath)))
(defun dmg-url-for-later--insert-summary-section (content)
"Insert summary section into CONTENT if needed.
Returns modified content."
(replace-regexp-in-string
"^\\(\\* .*\n\\(?::PROPERTIES:.*\n\\(?:.*\n\\)*?:END:\n\\)?\\)"
(concat "\\1\n** " dmg-read-later-summary-heading "\n\nGenerating summary...\n\n** Introduction\n\n")
content
nil nil 1))
(defun dmg-url-for-later--extract-image-urls (content)
"Extract image URLs from org CONTENT.
Returns a list of image URLs that match allowed extensions."
(let ((urls nil))
(with-temp-buffer
(insert content)
(goto-char (point-min))
;; Match [[http(s)://...]] style links
(while (re-search-forward "\\[\\[\\(https?://[^]]+\\)\\]\\]" nil t)
(let ((url (match-string 1)))
;; Check if URL has allowed image extension
(when (seq-some (lambda (ext)
(string-match-p (concat "\\." ext "\\(?:[?#].*\\)?$") url))
dmg-read-later-allowed-image-extensions)
(push url urls)))))
(nreverse urls)))
(defun dmg-url-for-later--clean-filename (url)
"Extract and clean filename from URL.
Returns a clean filename or nil if filename is too messy."
(when (string-match "/\\([^/?#]+\\)\\(?:[?#].*\\)?$" url)
(let ((filename (match-string 1 url)))
;; Consider filename clean if it only contains alphanumeric, dash, underscore, and dot
(when (string-match-p "^[a-zA-Z0-9._-]+$" filename)
filename))))
(defun dmg-url-for-later--download-images (image-urls image-dir)
"Download images from IMAGE-URLS to IMAGE-DIR.
Returns alist mapping old URL to new relative path.
Skips images larger than dmg-read-later-max-image-size."
(let ((url-mapping nil)
(counter 0))
(dolist (url image-urls)
;; Check file size first using curl --head
(let* ((size-output (with-output-to-string
(with-current-buffer standard-output
(call-process "curl" nil t nil
"--silent" "--head" "--location" url))))
(content-length (when (string-match "Content-Length:\\s-*\\([0-9]+\\)" size-output)
(string-to-number (match-string 1 size-output)))))
(cond
;; Skip if too large
((and content-length (> content-length dmg-read-later-max-image-size))
(message "Skipping large image (%s bytes): %s" content-length url))
;; Try to download
(t
(setq counter (1+ counter))
(let* ((clean-name (dmg-url-for-later--clean-filename url))
(extension (when (string-match "\\.\\([a-zA-Z0-9]+\\)\\(?:[?#].*\\)?$" url)
(match-string 1 url)))
(filename (if clean-name
clean-name
(format "image-%03d.%s" counter (or extension "jpg"))))
(filepath (expand-file-name filename image-dir))
(download-success nil))
;; Download with curl
(setq download-success
(zerop (call-process "curl" nil nil nil
"--silent" "--location" "--output" filepath
"--max-filesize" (number-to-string dmg-read-later-max-image-size)
url)))
(if download-success
(progn
;; Store mapping: old URL -> new relative path (same directory)
(push (cons url (concat "./" filename)) url-mapping)
(message "Downloaded image: %s" filename))
(message "Failed to download image: %s" url)))))))
url-mapping))
(defun dmg-url-for-later--replace-image-urls (content url-mapping)
"Replace image URLs in CONTENT using URL-MAPPING.
URL-MAPPING is an alist of (old-url . new-path)."
(let ((result content))
(dolist (mapping url-mapping)
(let ((old-url (car mapping))
(new-path (cdr mapping)))
(setq result (replace-regexp-in-string
(regexp-quote (concat "[[" old-url "]]"))
(concat "[[" new-path "]]")
result
t t))))
result))
(defun dmg-url-for-later--write-file (filepath url domain title topic content template)
"Write article file at FILEPATH with metadata and CONTENT."
(let ((download-date (format-time-string "%Y-%m-%d %H:%M:%S")))
(with-temp-file filepath
(insert template)
(insert "#+TITLE: " title "\n")
(insert "#+URL: " url "\n")
(insert "#+DOMAIN: " (or domain "unknown") "\n")
(insert "#+DOWNLOADED: " download-date "\n")
(insert "#+filetags: :" topic ":\n")
(insert "\n")
(insert content))))
(defun dmg-url-for-later (url topic summary-p)
"Download URL as org file to read-later directory using readability-cli.
URL is the article URL to download.
TOPIC is the category/subdirectory (empty string uses default).
SUMMARY-P if non-nil, generates AI summary using gptel."
(interactive
(let* ((base-dir (expand-file-name "~/sync/roam/read-later/"))
(subdirs (when (file-directory-p base-dir)
(seq-filter
(lambda (f)
(and (file-directory-p (expand-file-name f base-dir))
(not (string-prefix-p "." f))))
(directory-files base-dir))))
(url-input (read-string "URL: "))
(topic-input (completing-read
(format "Topic (default: %s): " dmg-read-later-default-topic)
subdirs nil nil))
(summary-input (y-or-n-p "Generate summary with AI? ")))
(list url-input topic-input summary-input)))
;; Check for required tools
(unless (executable-find "readable")
(error "readable not found. Install with: npm install -g readability-cli"))
(unless (executable-find "pandoc")
(error "pandoc not found. Please install pandoc"))
;; Fetch and convert article
(let* ((base-dir (expand-file-name "~/sync/roam/read-later/"))
(article (dmg-url-for-later--fetch-content url))
(title (plist-get article :title))
(content (plist-get article :content))
(domain (when (string-match "https?://\\([^/]+\\)" url)
(match-string 1 url)))
(filepath-parts (dmg-url-for-later--build-filepath title topic base-dir))
(target-dir (car filepath-parts))
(filepath (cdr filepath-parts))
(effective-topic (if (or (not topic) (string-empty-p topic))
dmg-read-later-default-topic
topic)))
;; Create article directory
(unless (file-directory-p target-dir)
(make-directory target-dir t))
;; Download images if present
(let* ((image-urls (dmg-url-for-later--extract-image-urls content))
(url-mapping (when image-urls
(message "Downloading %d images..." (length image-urls))
(dmg-url-for-later--download-images image-urls target-dir))))
;; Replace image URLs with local paths if any images were downloaded
(when url-mapping
(setq content (dmg-url-for-later--replace-image-urls content url-mapping))
(message "Downloaded %d images to %s" (length url-mapping) target-dir))
;; Insert summary section if requested
(when summary-p
(setq content (dmg-url-for-later--insert-summary-section content)))
;; Write file
(dmg-url-for-later--write-file filepath url domain title effective-topic
content dmg-read-later-template)
;; Generate summary asynchronously if requested
(when summary-p
(dmg-generate-summary-async filepath))
(message "Saved to: %s" filepath))))
(defun dmg-generate-summary-async (filepath)
"Generate AI summary for article at FILEPATH using gptel."
(let* ((buf (find-file-noselect filepath))
(summary-buf (generate-new-buffer "*gptel-summary*")))
(with-current-buffer buf
(goto-char (point-min))
(if (re-search-forward (concat "^\\*\\* " (regexp-quote dmg-read-later-summary-heading) "\n\n") nil t)
(let ((placeholder-start (point))
(article-content (buffer-substring-no-properties
(point-min)
(point-max))))
;; Call gptel-request with callback to insert summary
(gptel-request article-content
:system dmg-read-later-summary-prompt
:callback
(lambda (response info)
(if response
(progn
;; Insert summary into target file
(with-current-buffer buf
(goto-char placeholder-start)
;; Delete placeholder
(when (looking-at "Generating summary\\.\\.\\.\n\n")
(delete-region (point) (match-end 0)))
;; Insert the AI response
(insert response "\n\n")
(save-buffer))
(message "Summary generated for: %s" filepath)
;; Clean up temp buffer
(when (buffer-live-p summary-buf)
(kill-buffer summary-buf)))
;; Error case
(progn
(with-current-buffer buf
(goto-char placeholder-start)
(when (looking-at "Generating summary\\.\\.\\.\n\n")
(delete-region (point) (match-end 0)))
(insert "Summary generation failed\n")
(save-buffer))
(when (buffer-live-p summary-buf)
(kill-buffer summary-buf))
(message "Summary generation failed for: %s" filepath))))))
(message "Could not find summary heading in: %s" filepath)))))