Skip to content

Instantly share code, notes, and snippets.

@ruario
Last active January 24, 2026 16:23
Show Gist options
  • Select an option

  • Save ruario/5dc38017ee10f344e1cff3fa2e44782e to your computer and use it in GitHub Desktop.

Select an option

Save ruario/5dc38017ee10f344e1cff3fa2e44782e to your computer and use it in GitHub Desktop.
A fast, simple and small, streaming archiver
#!/usr/bin/env bash
#
# A fast, simple and small, streaming archiver.
#
# Usage is similar to cpio but without path length limits, file size limits,
# or complex metadata handling. It is intentionally minimal and only supports
# regular files and symbolic links. It restores modification times and
# permissions for regular files.
#
# Modes:
#
# -Archive files-
#
# a (or c)
# • Reads file paths from stdin (one per line)
# • Writes an archive to stdout
# • Stores regular files, their mtimes, sizes and permissions
# • Stores symbolic links
#
# Examples:
# find directory | clump a > archive.clp
# ls image.png image.jpg | clump a > images.clp
# ls *.txt | clump a | gzip > textfiles.clp.gz
#
# Note: Unlike cpio or tar, you can just cat archives together or append
# with a new `clump a >>` command and they are still valid archives. Better
# yet if you compress them with a compression format that supports
# concatenation itself (e.g. xz), then multiple compressed archives combined
# (or appended) make one valid compressed archive. \o/
#
# -Extract all files from the archive-
#
# e (or x)
# • Reads an archive from stdin
# • Recreates files and symlinks
# • Restores modification times and permissions for regular files
#
# Example:
# clump e < archive.clp
#
# -List the archive contents-
#
# l
# • Reads an archive from stdin
# • Prints <timestamp> <path> or <timestamp> <path> -> <target>
# • For symlinks, the timestamp is the current time (no mtime stored)
#
# Example:
# clump l < archive.clp
#
#
# Archive entry format (all NUL‑terminated fields):
#
# Regular files:
# F\0PATH\0MODIFICATION-TIME\0SIZE\0PERMISSIONS\0FILE-CONTENTS
#
# Symbolic links:
# L\0PATH\0TARGET\0
#
# Notes:
# • Absolute paths are stripped.
# • No special file types (devices, FIFOs, sockets) are supported.
# • Directories are not recorded but created automatically when extracting.
# • No random access: extraction is always the entire archive.
# • This implementation will not work on macOS/BSD due to GNU specific
# switches for various tools. In addition it relies on how Linux handles
# piping, which is slightly different than POSIX. The idea would work
# however, if reimplemented in another language, such as perl or python.
set -euo pipefail
mode="${1:-}"
archive() {
while IFS= read -r orig_path; do
[ -z "$orig_path" ] && continue
stored_path="${orig_path##/}"
# Symlinks first (including symlinks to directories)
if [ -h "$orig_path" ]; then
link_target="$(readlink -- "$orig_path")"
# Symlinks only need path and destination ending with a NUL
printf '%s\0' L "$stored_path" "$link_target"
continue
fi
# Skip real directories (symlinks already handled above)
[ -d "$orig_path" ] && continue
# Regular file, print the meta data and then the file. We do not need to
# end with a NUL since we know the exact size.
read -r mtime size perm < <(stat -c '%Y %s %a' -- "$orig_path")
printf '%s\0' F "$stored_path" "$mtime" "$size" "$perm"
cat -- "$orig_path"
done
}
extract() {
while true; do
# Read the type "F = file" or "L = symLink", or end once there is
# nothing left.
if ! IFS= read -r -d '' type; then
break
fi
# Get the path and strip leading /'s
IFS= read -r -d '' stored_path
stored_path="${stored_path##/}"
if [ "$type" = "L" ]; then
# For symlinks we only need where the path points
IFS= read -r -d '' link_target
mkdir -p -- "$(dirname -- "$stored_path")"
ln -fsn -- "$link_target" "$stored_path"
elif [ "$type" = "F" ]; then
# For files get modification time, size and permissions
IFS= read -r -d '' mtime
IFS= read -r -d '' size
IFS= read -r -d '' perm
# Make a directory to house the file
mkdir -p -- "$(dirname -- "$stored_path")"
# Extract the file using its size (length)
head -c "$size" > "$stored_path"
# Restore the modification time
touch -d "@$mtime" -- "$stored_path"
# Restore the file permissions
chmod "$perm" "$stored_path"
else
echo "Unknown entry type '$type'. Archive corrupted?" >&2
exit 1
fi
done
}
list() {
while true; do
if ! IFS= read -r -d '' type; then
break
fi
IFS= read -r -d '' stored_path
if [ "$type" = "L" ]; then
# Read and display the symlink target
IFS= read -r -d '' link_target
# we have no timestamp for symlinks so just use the current time
ts="$(date '+%Y-%m-%d %H:%M:%S')"
# Show the path and where it points like a directory listing
printf '%s %s -> %s\n' "$ts" "$stored_path" "$link_target"
elif [ "$type" = "F" ]; then
IFS= read -r -d '' mtime
IFS= read -r -d '' size
# Permissions must be read even though they are not used in the listing
# as this allows us to move further through the archive to the file
IFS= read -r -d '' _perm
# We do not need the file but it must be consumed so that we can get to
# the next entry (or the end of file)
head -c "$size" >/dev/null
# Now we can print the listing
ts="$(date -d "@$mtime" '+%Y-%m-%d %H:%M:%S')"
printf '%s %s\n' "$ts" "$stored_path"
else
echo "Unknown entry type '$type'. Archive corrupted?" >&2
exit 1
fi
done
}
case "$mode" in
a|c) archive ;;
e|x) extract ;;
l) list ;;
*)
# If no options are given, offer some help
echo "Usage:"
echo " find directory | ${0##*/} a > archive.clp # Archive files"
echo " ${0##*/} e < archive.clp # Extract archived files"
echo " ${0##*/} l < archive.clp # List archive contents"
exit 1
;;
esac
#!/usr/bin/env bash
#
# Combine this stub and a clump archive to make a self-extracting archive.
#
# Example:
# cat sfx.sh archive.clp > archive.sh
# chmod +x archive.sh
set -euo pipefail
sed '1,/^exit$/d' "$(readlink -f "$0")" | while true; do
if ! IFS= read -r -d '' type; then
break
fi
IFS= read -r -d '' path
if [ "$type" = "L" ]; then
IFS= read -r -d '' link_target
mkdir -p -- "$(dirname -- "$path")"
ln -fsn -- "$link_target" "$path"
elif [ "$type" = "F" ]; then
IFS= read -r -d '' mtime
IFS= read -r -d '' size
IFS= read -r -d '' perm
mkdir -p -- "$(dirname -- "$path")"
head -c "$size" > "$path"
touch -d "@$mtime" -- "$path"
chmod "$perm" "$path"
else
echo "Archive corrupted?" >&2
exit 1
fi
done
exit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment