ruario/clump.sh

## clump.sh
#!/usr/bin/env bash
#
# A fast, simple and small, streaming archiver.
#
# Usage is similar to cpio but without path length limits, file size limits,
# or complex metadata handling. It is intentionally minimal and only supports
# regular files and symbolic links. It restores modification times and
# permissions for regular files.
#
# Modes:
#
#   -Archive files-
#
#   a (or c)
#     • Reads file paths from stdin (one per line)
#     • Writes an archive to stdout
#     • Stores regular files, their mtimes, sizes and permissions
#     • Stores symbolic links
#
#     Examples:
#         find directory | clump a > archive.clp
#         ls image.png image.jpg | clump a > images.clp
#         ls *.txt | clump a | gzip > textfiles.clp.gz
#
#   Note: Unlike cpio or tar, you can just cat archives together or append
#   with a new `clump a >>` command and they are still valid archives. Better
#   yet if you compress them with a compression format that supports
#   concatenation itself (e.g. xz), then multiple compressed archives combined
#   (or appended) make one valid compressed archive. \o/
#
#   -Extract all files from the archive-
#
#   e (or x)
#     • Reads an archive from stdin
#     • Recreates files and symlinks
#     • Restores modification times and permissions for regular files
#
#     Example:
#         clump e < archive.clp
#
#   -List the archive contents-
#
#   l
#     • Reads an archive from stdin
#     • Prints <timestamp> <path> or <timestamp> <path> -> <target>
#     • For symlinks, the timestamp is the current time (no mtime stored)
#
#     Example:
#         clump l < archive.clp
#
#
# Archive entry format (all NUL‑terminated fields):
#
#   Regular files:
#       F\0PATH\0MODIFICATION-TIME\0SIZE\0PERMISSIONS\0FILE-CONTENTS
#
#   Symbolic links:
#       L\0PATH\0TARGET\0
#
# Notes:
#   • Absolute paths are stripped.
#   • No special file types (devices, FIFOs, sockets) are supported.
#   • Directories are not recorded but created automatically when extracting.
#   • No random access: extraction is always the entire archive.
#   • This implementation will not work on macOS/BSD due to GNU specific
#     switches for various tools. In addition it relies on how Linux handles
#     piping, which is slightly different than POSIX. The idea would work
#     however, if reimplemented in another language, such as perl or python.

set -euo pipefail

mode="${1:-}"

archive() {
  while IFS= read -r orig_path; do
    [ -z "$orig_path" ] && continue

    stored_path="${orig_path##/}"

    # Symlinks first (including symlinks to directories)
    if [ -h "$orig_path" ]; then
      link_target="$(readlink -- "$orig_path")"
      # Symlinks only need path and destination ending with a NUL
      printf '%s\0' L "$stored_path" "$link_target"
      continue
    fi

    # Skip real directories (symlinks already handled above)
    [ -d "$orig_path" ] && continue

    # Regular file, print the meta data and then the file. We do not need to
    # end with a NUL since we know the exact size.
    read -r mtime size perm < <(stat -c '%Y %s %a' -- "$orig_path")
    printf '%s\0' F "$stored_path" "$mtime" "$size" "$perm"
    cat -- "$orig_path"
  done
}

extract() {
  while true; do
    # Read the type "F = file" or "L = symLink", or end once there is
    # nothing left.
    if ! IFS= read -r -d '' type; then
      break
    fi

    # Get the path and strip leading /'s
    IFS= read -r -d '' stored_path
    stored_path="${stored_path##/}"

    if [ "$type" = "L" ]; then
      # For symlinks we only need where the path points
      IFS= read -r -d '' link_target
      mkdir -p -- "$(dirname -- "$stored_path")"
      ln -fsn -- "$link_target" "$stored_path"

    elif [ "$type" = "F" ]; then
      # For files get modification time, size and permissions
      IFS= read -r -d '' mtime
      IFS= read -r -d '' size
      IFS= read -r -d '' perm

      # Make a directory to house the file
      mkdir -p -- "$(dirname -- "$stored_path")"
      # Extract the file using its size (length)
      head -c "$size" > "$stored_path"
      # Restore the modification time
      touch -d "@$mtime" -- "$stored_path"
      # Restore the file permissions
      chmod "$perm" "$stored_path"

    else
      echo "Unknown entry type '$type'. Archive corrupted?" >&2
      exit 1
    fi
  done
}

list() {
  while true; do
    if ! IFS= read -r -d '' type; then
      break
    fi

    IFS= read -r -d '' stored_path

    if [ "$type" = "L" ]; then
      # Read and display the symlink target
      IFS= read -r -d '' link_target
      # we have no timestamp for symlinks so just use the current time
      ts="$(date '+%Y-%m-%d %H:%M:%S')"
      # Show the path and where it points like a directory listing
      printf '%s %s -> %s\n' "$ts" "$stored_path" "$link_target"

    elif [ "$type" = "F" ]; then
      IFS= read -r -d '' mtime
      IFS= read -r -d '' size
      # Permissions must be read even though they are not used in the listing
      # as this allows us to move further through the archive to the file
      IFS= read -r -d '' _perm

      # We do not need the file but it must be consumed so that we can get to
      # the next entry (or the end of file)
      head -c "$size" >/dev/null

      # Now we can print the listing
      ts="$(date -d "@$mtime" '+%Y-%m-%d %H:%M:%S')"
      printf '%s %s\n' "$ts" "$stored_path"

    else
      echo "Unknown entry type '$type'. Archive corrupted?" >&2
      exit 1
    fi
  done
}

case "$mode" in
  a|c) archive ;;
  e|x) extract ;;
  l) list ;;
  *)
    # If no options are given, offer some help
    echo "Usage:"
    echo "  find directory | ${0##*/} a > archive.clp # Archive files"
    echo "  ${0##*/} e < archive.clp # Extract archived files"
    echo "  ${0##*/} l < archive.clp # List archive contents"
    exit 1
    ;;
esac

## sfx.sh
#!/usr/bin/env bash
#
# Combine this stub and a clump archive to make a self-extracting archive.
#
# Example:
#     cat sfx.sh archive.clp > archive.sh
#     chmod +x archive.sh

set -euo pipefail
sed '1,/^exit$/d' "$(readlink -f "$0")" | while true; do
  if ! IFS= read -r -d '' type; then
    break
  fi
  IFS= read -r -d '' path
  if [ "$type" = "L" ]; then
    IFS= read -r -d '' link_target
    mkdir -p -- "$(dirname -- "$path")"
    ln -fsn -- "$link_target" "$path"
  elif [ "$type" = "F" ]; then
    IFS= read -r -d '' mtime
    IFS= read -r -d '' size
    IFS= read -r -d '' perm
    mkdir -p -- "$(dirname -- "$path")"
    head -c "$size" > "$path"
    touch -d "@$mtime" -- "$path"
    chmod "$perm" "$path"
  else
    echo "Archive corrupted?" >&2
    exit 1
  fi
done
exit
	#!/usr/bin/env bash
	#
	# A fast, simple and small, streaming archiver.
	#
	# Usage is similar to cpio but without path length limits, file size limits,
	# or complex metadata handling. It is intentionally minimal and only supports
	# regular files and symbolic links. It restores modification times and
	# permissions for regular files.
	#
	# Modes:
	#
	# -Archive files-
	#
	# a (or c)
	# • Reads file paths from stdin (one per line)
	# • Writes an archive to stdout
	# • Stores regular files, their mtimes, sizes and permissions
	# • Stores symbolic links
	#
	# Examples:
	# find directory \| clump a > archive.clp
	# ls image.png image.jpg \| clump a > images.clp
	# ls *.txt \| clump a \| gzip > textfiles.clp.gz
	#
	# Note: Unlike cpio or tar, you can just cat archives together or append
	# with a new `clump a >>` command and they are still valid archives. Better
	# yet if you compress them with a compression format that supports
	# concatenation itself (e.g. xz), then multiple compressed archives combined
	# (or appended) make one valid compressed archive. \o/
	#
	# -Extract all files from the archive-
	#
	# e (or x)
	# • Reads an archive from stdin
	# • Recreates files and symlinks
	# • Restores modification times and permissions for regular files
	#
	# Example:
	# clump e < archive.clp
	#
	# -List the archive contents-
	#
	# l
	# • Reads an archive from stdin
	# • Prints <timestamp> <path> or <timestamp> <path> -> <target>
	# • For symlinks, the timestamp is the current time (no mtime stored)
	#
	# Example:
	# clump l < archive.clp
	#
	#
	# Archive entry format (all NUL‑terminated fields):
	#
	# Regular files:
	# F\0PATH\0MODIFICATION-TIME\0SIZE\0PERMISSIONS\0FILE-CONTENTS
	#
	# Symbolic links:
	# L\0PATH\0TARGET\0
	#
	# Notes:
	# • Absolute paths are stripped.
	# • No special file types (devices, FIFOs, sockets) are supported.
	# • Directories are not recorded but created automatically when extracting.
	# • No random access: extraction is always the entire archive.
	# • This implementation will not work on macOS/BSD due to GNU specific
	# switches for various tools. In addition it relies on how Linux handles
	# piping, which is slightly different than POSIX. The idea would work
	# however, if reimplemented in another language, such as perl or python.

	set -euo pipefail

	mode="${1:-}"

	archive() {
	while IFS= read -r orig_path; do
	[ -z "$orig_path" ] && continue

	stored_path="${orig_path##/}"

	# Symlinks first (including symlinks to directories)
	if [ -h "$orig_path" ]; then
	link_target="$(readlink -- "$orig_path")"
	# Symlinks only need path and destination ending with a NUL
	printf '%s\0' L "$stored_path" "$link_target"
	continue
	fi

	# Skip real directories (symlinks already handled above)
	[ -d "$orig_path" ] && continue

	# Regular file, print the meta data and then the file. We do not need to
	# end with a NUL since we know the exact size.
	read -r mtime size perm < <(stat -c '%Y %s %a' -- "$orig_path")
	printf '%s\0' F "$stored_path" "$mtime" "$size" "$perm"
	cat -- "$orig_path"
	done
	}

	extract() {
	while true; do
	# Read the type "F = file" or "L = symLink", or end once there is
	# nothing left.
	if ! IFS= read -r -d '' type; then
	break
	fi

	# Get the path and strip leading /'s
	IFS= read -r -d '' stored_path
	stored_path="${stored_path##/}"

	if [ "$type" = "L" ]; then
	# For symlinks we only need where the path points
	IFS= read -r -d '' link_target
	mkdir -p -- "$(dirname -- "$stored_path")"
	ln -fsn -- "$link_target" "$stored_path"

	elif [ "$type" = "F" ]; then
	# For files get modification time, size and permissions
	IFS= read -r -d '' mtime
	IFS= read -r -d '' size
	IFS= read -r -d '' perm

	# Make a directory to house the file
	mkdir -p -- "$(dirname -- "$stored_path")"
	# Extract the file using its size (length)
	head -c "$size" > "$stored_path"
	# Restore the modification time
	touch -d "@$mtime" -- "$stored_path"
	# Restore the file permissions
	chmod "$perm" "$stored_path"

	else
	echo "Unknown entry type '$type'. Archive corrupted?" >&2
	exit 1
	fi
	done
	}

	list() {
	while true; do
	if ! IFS= read -r -d '' type; then
	break
	fi

	IFS= read -r -d '' stored_path

	if [ "$type" = "L" ]; then
	# Read and display the symlink target
	IFS= read -r -d '' link_target
	# we have no timestamp for symlinks so just use the current time
	ts="$(date '+%Y-%m-%d %H:%M:%S')"
	# Show the path and where it points like a directory listing
	printf '%s %s -> %s\n' "$ts" "$stored_path" "$link_target"

	elif [ "$type" = "F" ]; then
	IFS= read -r -d '' mtime
	IFS= read -r -d '' size
	# Permissions must be read even though they are not used in the listing
	# as this allows us to move further through the archive to the file
	IFS= read -r -d '' _perm

	# We do not need the file but it must be consumed so that we can get to
	# the next entry (or the end of file)
	head -c "$size" >/dev/null

	# Now we can print the listing
	ts="$(date -d "@$mtime" '+%Y-%m-%d %H:%M:%S')"
	printf '%s %s\n' "$ts" "$stored_path"

	else
	echo "Unknown entry type '$type'. Archive corrupted?" >&2
	exit 1
	fi
	done
	}

	case "$mode" in
	a\|c) archive ;;
	e\|x) extract ;;
	l) list ;;
	*)
	# If no options are given, offer some help
	echo "Usage:"
	echo " find directory \| ${0##*/} a > archive.clp # Archive files"
	echo " ${0##*/} e < archive.clp # Extract archived files"
	echo " ${0##*/} l < archive.clp # List archive contents"
	exit 1
	;;
	esac
	#!/usr/bin/env bash
	#
	# Combine this stub and a clump archive to make a self-extracting archive.
	#
	# Example:
	# cat sfx.sh archive.clp > archive.sh
	# chmod +x archive.sh

	set -euo pipefail
	sed '1,/^exit$/d' "$(readlink -f "$0")" \| while true; do
	if ! IFS= read -r -d '' type; then
	break
	fi
	IFS= read -r -d '' path
	if [ "$type" = "L" ]; then
	IFS= read -r -d '' link_target
	mkdir -p -- "$(dirname -- "$path")"
	ln -fsn -- "$link_target" "$path"
	elif [ "$type" = "F" ]; then
	IFS= read -r -d '' mtime
	IFS= read -r -d '' size
	IFS= read -r -d '' perm
	mkdir -p -- "$(dirname -- "$path")"
	head -c "$size" > "$path"
	touch -d "@$mtime" -- "$path"
	chmod "$perm" "$path"
	else
	echo "Archive corrupted?" >&2
	exit 1
	fi
	done
	exit