Last active
January 24, 2026 16:23
-
-
Save ruario/5dc38017ee10f344e1cff3fa2e44782e to your computer and use it in GitHub Desktop.
A fast, simple and small, streaming archiver
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # A fast, simple and small, streaming archiver. | |
| # | |
| # Usage is similar to cpio but without path length limits, file size limits, | |
| # or complex metadata handling. It is intentionally minimal and only supports | |
| # regular files and symbolic links. It restores modification times and | |
| # permissions for regular files. | |
| # | |
| # Modes: | |
| # | |
| # -Archive files- | |
| # | |
| # a (or c) | |
| # • Reads file paths from stdin (one per line) | |
| # • Writes an archive to stdout | |
| # • Stores regular files, their mtimes, sizes and permissions | |
| # • Stores symbolic links | |
| # | |
| # Examples: | |
| # find directory | clump a > archive.clp | |
| # ls image.png image.jpg | clump a > images.clp | |
| # ls *.txt | clump a | gzip > textfiles.clp.gz | |
| # | |
| # Note: Unlike cpio or tar, you can just cat archives together or append | |
| # with a new `clump a >>` command and they are still valid archives. Better | |
| # yet if you compress them with a compression format that supports | |
| # concatenation itself (e.g. xz), then multiple compressed archives combined | |
| # (or appended) make one valid compressed archive. \o/ | |
| # | |
| # -Extract all files from the archive- | |
| # | |
| # e (or x) | |
| # • Reads an archive from stdin | |
| # • Recreates files and symlinks | |
| # • Restores modification times and permissions for regular files | |
| # | |
| # Example: | |
| # clump e < archive.clp | |
| # | |
| # -List the archive contents- | |
| # | |
| # l | |
| # • Reads an archive from stdin | |
| # • Prints <timestamp> <path> or <timestamp> <path> -> <target> | |
| # • For symlinks, the timestamp is the current time (no mtime stored) | |
| # | |
| # Example: | |
| # clump l < archive.clp | |
| # | |
| # | |
| # Archive entry format (all NUL‑terminated fields): | |
| # | |
| # Regular files: | |
| # F\0PATH\0MODIFICATION-TIME\0SIZE\0PERMISSIONS\0FILE-CONTENTS | |
| # | |
| # Symbolic links: | |
| # L\0PATH\0TARGET\0 | |
| # | |
| # Notes: | |
| # • Absolute paths are stripped. | |
| # • No special file types (devices, FIFOs, sockets) are supported. | |
| # • Directories are not recorded but created automatically when extracting. | |
| # • No random access: extraction is always the entire archive. | |
| # • This implementation will not work on macOS/BSD due to GNU specific | |
| # switches for various tools. In addition it relies on how Linux handles | |
| # piping, which is slightly different than POSIX. The idea would work | |
| # however, if reimplemented in another language, such as perl or python. | |
| set -euo pipefail | |
| mode="${1:-}" | |
| archive() { | |
| while IFS= read -r orig_path; do | |
| [ -z "$orig_path" ] && continue | |
| stored_path="${orig_path##/}" | |
| # Symlinks first (including symlinks to directories) | |
| if [ -h "$orig_path" ]; then | |
| link_target="$(readlink -- "$orig_path")" | |
| # Symlinks only need path and destination ending with a NUL | |
| printf '%s\0' L "$stored_path" "$link_target" | |
| continue | |
| fi | |
| # Skip real directories (symlinks already handled above) | |
| [ -d "$orig_path" ] && continue | |
| # Regular file, print the meta data and then the file. We do not need to | |
| # end with a NUL since we know the exact size. | |
| read -r mtime size perm < <(stat -c '%Y %s %a' -- "$orig_path") | |
| printf '%s\0' F "$stored_path" "$mtime" "$size" "$perm" | |
| cat -- "$orig_path" | |
| done | |
| } | |
| extract() { | |
| while true; do | |
| # Read the type "F = file" or "L = symLink", or end once there is | |
| # nothing left. | |
| if ! IFS= read -r -d '' type; then | |
| break | |
| fi | |
| # Get the path and strip leading /'s | |
| IFS= read -r -d '' stored_path | |
| stored_path="${stored_path##/}" | |
| if [ "$type" = "L" ]; then | |
| # For symlinks we only need where the path points | |
| IFS= read -r -d '' link_target | |
| mkdir -p -- "$(dirname -- "$stored_path")" | |
| ln -fsn -- "$link_target" "$stored_path" | |
| elif [ "$type" = "F" ]; then | |
| # For files get modification time, size and permissions | |
| IFS= read -r -d '' mtime | |
| IFS= read -r -d '' size | |
| IFS= read -r -d '' perm | |
| # Make a directory to house the file | |
| mkdir -p -- "$(dirname -- "$stored_path")" | |
| # Extract the file using its size (length) | |
| head -c "$size" > "$stored_path" | |
| # Restore the modification time | |
| touch -d "@$mtime" -- "$stored_path" | |
| # Restore the file permissions | |
| chmod "$perm" "$stored_path" | |
| else | |
| echo "Unknown entry type '$type'. Archive corrupted?" >&2 | |
| exit 1 | |
| fi | |
| done | |
| } | |
| list() { | |
| while true; do | |
| if ! IFS= read -r -d '' type; then | |
| break | |
| fi | |
| IFS= read -r -d '' stored_path | |
| if [ "$type" = "L" ]; then | |
| # Read and display the symlink target | |
| IFS= read -r -d '' link_target | |
| # we have no timestamp for symlinks so just use the current time | |
| ts="$(date '+%Y-%m-%d %H:%M:%S')" | |
| # Show the path and where it points like a directory listing | |
| printf '%s %s -> %s\n' "$ts" "$stored_path" "$link_target" | |
| elif [ "$type" = "F" ]; then | |
| IFS= read -r -d '' mtime | |
| IFS= read -r -d '' size | |
| # Permissions must be read even though they are not used in the listing | |
| # as this allows us to move further through the archive to the file | |
| IFS= read -r -d '' _perm | |
| # We do not need the file but it must be consumed so that we can get to | |
| # the next entry (or the end of file) | |
| head -c "$size" >/dev/null | |
| # Now we can print the listing | |
| ts="$(date -d "@$mtime" '+%Y-%m-%d %H:%M:%S')" | |
| printf '%s %s\n' "$ts" "$stored_path" | |
| else | |
| echo "Unknown entry type '$type'. Archive corrupted?" >&2 | |
| exit 1 | |
| fi | |
| done | |
| } | |
| case "$mode" in | |
| a|c) archive ;; | |
| e|x) extract ;; | |
| l) list ;; | |
| *) | |
| # If no options are given, offer some help | |
| echo "Usage:" | |
| echo " find directory | ${0##*/} a > archive.clp # Archive files" | |
| echo " ${0##*/} e < archive.clp # Extract archived files" | |
| echo " ${0##*/} l < archive.clp # List archive contents" | |
| exit 1 | |
| ;; | |
| esac |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # Combine this stub and a clump archive to make a self-extracting archive. | |
| # | |
| # Example: | |
| # cat sfx.sh archive.clp > archive.sh | |
| # chmod +x archive.sh | |
| set -euo pipefail | |
| sed '1,/^exit$/d' "$(readlink -f "$0")" | while true; do | |
| if ! IFS= read -r -d '' type; then | |
| break | |
| fi | |
| IFS= read -r -d '' path | |
| if [ "$type" = "L" ]; then | |
| IFS= read -r -d '' link_target | |
| mkdir -p -- "$(dirname -- "$path")" | |
| ln -fsn -- "$link_target" "$path" | |
| elif [ "$type" = "F" ]; then | |
| IFS= read -r -d '' mtime | |
| IFS= read -r -d '' size | |
| IFS= read -r -d '' perm | |
| mkdir -p -- "$(dirname -- "$path")" | |
| head -c "$size" > "$path" | |
| touch -d "@$mtime" -- "$path" | |
| chmod "$perm" "$path" | |
| else | |
| echo "Archive corrupted?" >&2 | |
| exit 1 | |
| fi | |
| done | |
| exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment