Last active
November 20, 2025 14:24
-
-
Save dajare/afe80221f6ca4b8b2d309c2f5d2f92ab to your computer and use it in GitHub Desktop.
Convert `NETSCAPE-Bookmark-file-1` to plain text; bash script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env sh | |
| # netscape-to-text.sh -- Convert Netscape (HTML) bookmarks to an indented plain-text list. | |
| # POSIX sh compatible (no gawk extensions). 2 spaces per level. Usage: | |
| # ./netscape-to-text.sh bookmarks.html > bookmarks.txt | |
| # or: cat bookmarks.html | ./netscape-to-text.sh | |
| file=${1:-/dev/stdin} | |
| depth=0 | |
| # Put tags onto separate lines so attributes split across lines are handled. | |
| # Then read line by line and process. | |
| sed -e 's/></>\ | |
| </g' "$file" | while IFS= read -r line || [ -n "$line" ]; do | |
| # trim leading/trailing whitespace (POSIX) | |
| trimmed="$(printf '%s' "$line" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" | |
| case "$trimmed" in | |
| *"<DL>"*|*"<dl>"*) | |
| depth=$((depth + 1)) | |
| ;; | |
| *"</DL>"*|*"</dl>"*) | |
| if [ "$depth" -gt 0 ]; then | |
| depth=$((depth - 1)) | |
| fi | |
| ;; | |
| *"<H3"*"</H3>"*|*"<h3"*"</h3>"*) | |
| # extract folder name (case-insensitive pattern) | |
| folder="$(printf '%s' "$trimmed" | sed -n 's/.*<[Hh]3[^>]*>\(.*\)<\/[Hh]3>.*/\1/p')" | |
| if [ -n "$folder" ]; then | |
| # indent width = depth * 2 | |
| printf '%*s[Folder] %s\n' $((depth * 2)) '' "$folder" | |
| fi | |
| ;; | |
| *"<A "*"</A>"*|*"<a "*"</a>"*) | |
| # extract href and title. This is a best-effort: it matches href="..." and the link text. | |
| href="$(printf '%s' "$trimmed" | sed -n 's/.*<[Aa][^>]*[Hh][Rr][Ee][Ff]="\([^"]*\)"[^>]*>.*<\/[Aa].*/\1/p')" | |
| title="$(printf '%s' "$trimmed" | sed -n 's/.*<[Aa][^>]*>\([^<]*\)<\/[Aa].*/\1/p')" | |
| # If href or title not captured by the above (attributes order variations), try a fallback: | |
| if [ -z "$href" ]; then | |
| href="$(printf '%s' "$trimmed" | sed -n 's/.*href='\''\([^'\'']*\)'\''.*<\/[Aa].*/\1/p')" | |
| fi | |
| if [ -n "$title" ] || [ -n "$href" ]; then | |
| printf '%*s' $((depth * 2)) '' | |
| # Print "title - url" if both present, otherwise whichever exists | |
| if [ -n "$title" ] && [ -n "$href" ]; then | |
| printf '%s - %s\n' "$title" "$href" | |
| elif [ -n "$title" ]; then | |
| printf '%s\n' "$title" | |
| else | |
| printf '%s\n' "$href" | |
| fi | |
| fi | |
| ;; | |
| *) | |
| # ignore other lines | |
| ;; | |
| esac | |
| done |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Save gist to file, e.g.
bookmarks-to-text.shand make executable.Run with:
./bookmarks-to-text.sh bookmarks.html > bookmarks.txtN.b. This script was iterated with ChatGPT.