ifthenelse/ncdu-top-dirs.zsh

## ncdu-top-dirs.zsh
#!/usr/bin/env zsh
set -euo pipefail

# -----------------------------------------------------------------------------
# ncdu-top-dirs.zsh
#
# Scans a target path with ncdu, exports the full scan as JSON, then extracts
# the TOP N largest directories by *disk usage* (ncdu "dsize", not apparent size).
#
# Requirements:
#   - ncdu (any version with -e -o export support)
#   - jq   (1.5+)
#
# Notes on ncdu JSON:
#   - "dsize" = disk usage (allocated on disk) — this is what we sort by
#   - "asize" = apparent size (included for reference only)
#
# Supported ncdu export formats:
#   - Compact array nodes: ["dirname", {dsize, asize, ...}, [children...]]
#   - Object nodes:        {name: "...", dsize: ..., items: [...]}
#   - Root at .[3], .[3][0], or nested — discovered dynamically
# -----------------------------------------------------------------------------

print_help() {
  cat <<'EOF'
Usage:
  ncdu-top-dirs.zsh [-p <path>] [-n <count>] [-h]

Options:
  -p <path>   Start path to scan (default: "/")
  -n <count>  Number of top directories to export (default: 50)
  -h          Show this help and exit

Outputs (timestamped, in ~/Downloads):
  - Full ncdu export JSON:       ncdu-export-<timestamp>.json
  - Top directories JSON list:   ncdu-topdirs-<timestamp>.json
  - Top directories TSV:         ncdu-topdirs-<timestamp>.tsv

TSV columns:
  dsize_GB  asize_GB  dsize_bytes  asize_bytes  path

Examples:
  Scan root, top 50:
    ncdu-top-dirs.zsh

  Scan home, top 100:
    ncdu-top-dirs.zsh -p "$HOME" -n 100
EOF
}

# -----------------------------------------------------------------------------
# Dependency checks — fail fast if any required tool is missing
# -----------------------------------------------------------------------------
require_cmd() {
  local cmd="$1"
  if ! command -v "$cmd" >/dev/null 2>&1; then
    echo "Error: required dependency '$cmd' not found in PATH." >&2
    exit 127
  fi
}

require_cmd ncdu
require_cmd jq

# -----------------------------------------------------------------------------
# Defaults
# -----------------------------------------------------------------------------
START_PATH="/"
TOP_N=50

# -----------------------------------------------------------------------------
# Parse command-line options
# -----------------------------------------------------------------------------
while getopts ":p:n:h" opt; do
  case "$opt" in
    p) START_PATH="$OPTARG" ;;
    n) TOP_N="$OPTARG" ;;
    h) print_help; exit 0 ;;
    \?)
      echo "Error: unknown option -$OPTARG" >&2
      print_help
      exit 2
      ;;
    :)
      echo "Error: option -$OPTARG requires an argument." >&2
      print_help
      exit 2
      ;;
  esac
done

# -----------------------------------------------------------------------------
# Validate inputs
# -----------------------------------------------------------------------------
if [[ ! -d "$START_PATH" ]]; then
  echo "Error: start path is not a directory: $START_PATH" >&2
  exit 2
fi

if ! [[ "$TOP_N" =~ '^[0-9]+$' ]] || [[ "$TOP_N" -le 0 ]]; then
  echo "Error: -n <count> must be a positive integer (got: $TOP_N)" >&2
  exit 2
fi

# -----------------------------------------------------------------------------
# Output filenames (timestamped)
# -----------------------------------------------------------------------------
TS="$(date '+%Y%m%d-%H%M%S')"
OUT_DIR="$HOME/Downloads"
FULL_JSON="$OUT_DIR/ncdu-export-$TS.json"
TOP_JSON="$OUT_DIR/ncdu-topdirs-$TS.json"
TOP_TSV="$OUT_DIR/ncdu-topdirs-$TS.tsv"

echo "Scanning: $START_PATH"
echo "Exporting full ncdu JSON to: $FULL_JSON"

# -e: include extended info (dsize, asize, mtime, etc.)
# -o: write export to file
ncdu -e -o "$FULL_JSON" "$START_PATH"

echo "Extracting TOP $TOP_N directories by disk usage (dsize)..."
echo "Writing JSON to: $TOP_JSON"
echo "Writing TSV  to: $TOP_TSV"

# -----------------------------------------------------------------------------
# jq filter: robust extraction of top directories by disk usage
#
# DESIGN NOTES:
#
# 1. ROOT DISCOVERY (find_root):
#    ncdu exports vary across versions. Three known formats at .[3]:
#
#      FORMAT A (ncdu 1.x compact): .[3] = ["name", {metadata}, [children]]
#        Direct compact-array node with name, metadata, children
#
#      FORMAT B (ncdu 1.x object): .[3] = {name: "...", items: [...]}
#        Direct object node with name and items array
#
#      FORMAT C (ncdu 2.x flat): .[3] = [{root_meta}, {entry1}, [children1], {entry2}, [children2], ...]
#        Flat alternating list where:
#          .[3][0] = root directory metadata object
#          .[3][1], .[3][3], .[3][5], ... = entry metadata objects
#          .[3][2], .[3][4], .[3][6], ... = corresponding children arrays
#
#    We detect the format dynamically and normalize to a tree structure.
#
# 2. NODE TYPE DETECTION:
#    Three representations must be supported:
#      - Compact array: ["name", {metadata}, [children]]
#      - Object with items: {name: "...", dsize: ..., items: [...]}
#      - Object metadata: {name: "...", dsize: ..., asize: ...} (ncdu 2.x)
#
#    Directories are identified by having children (non-empty items/children array).
#
# 3. DISK USAGE SEMANTICS:
#    - Sorting is ALWAYS by dsize (disk usage = allocated blocks)
#    - asize (apparent size) is included for reference but never used for sorting
#    - Only directories (nodes with children) are included in output
#
# 4. RECURSION (collect_dirs):
#    We walk the tree depth-first, accumulating path prefixes.
#    Each directory node yields one record; we then recurse into children.
# -----------------------------------------------------------------------------

JQ_FILTER='
# ============================================================================
# Format detection and normalization (ncdu 2.x flat list support)
# ============================================================================

# Convert ncdu 2.x flat depth-first list into tree structure
# Input at root level: [{root_meta}, child1, child2, ...]
#   where each child is either:
#     - {file_meta} (a standalone object for files)
#     - [{dir_meta}, subchild1, subchild2, ...] (a flat list for directories)
def normalize_flat_list:
  if type == "array" and length >= 1 and (.[0] | type) == "object" then
    # First element is directory metadata
    .[0] as $dir_meta
    | {
        meta: $dir_meta,
        children: (
          # Process children (everything after first element)
          .[1:] | map(
            if type == "object" then
              # This is a file (no children)
              {meta: ., children: []}
            elif type == "array" and length >= 1 and (.[0] | type) == "object" then
              # This is a directory (flat list) - recursively normalize it
              normalize_flat_list
            else
              # Unexpected - skip or return empty
              empty
            end
          )
        )
      }
  else
    # Not a flat list, return as-is
    .
  end;

# ============================================================================
# Node accessors: abstract over all three formats
# ============================================================================

# Get the name of a node
def node_name:
  if type == "array" and length >= 1 and (.[0] | type) == "string" then
    # Format A: compact array ["name", ...]
    .[0]
  elif type == "object" then
    if has("name") then
      # Format B/C: object with name field
      .name
    elif has("meta") and (.meta | type) == "object" and (.meta | has("name")) then
      # Normalized flat list node
      .meta.name
    else
      null
    end
  else
    null
  end;

# Get metadata object (contains dsize, asize, mtime, etc.)
def node_meta:
  if type == "array" and length >= 2 and (.[1] | type) == "object" then
    # Format A: compact array [name, {metadata}, ...]
    .[1]
  elif type == "object" then
    if has("meta") then
      # Normalized flat list node
      .meta
    else
      # Format B/C: object node, metadata is on the node itself
      .
    end
  else
    {}
  end;

# Get children array (empty for files, non-empty for directories)
def node_children:
  if type == "array" and length >= 3 and (.[2] | type) == "array" then
    # Format A: compact array [name, meta, [children]]
    .[2]
  elif type == "object" then
    if has("children") and (.children | type) == "array" then
      # Normalized flat list node
      .children
    elif has("items") and (.items | type) == "array" then
      # Format B: object with items array
      .items
    else
      []
    end
  else
    []
  end;

# ============================================================================
# Validation predicates
# ============================================================================

# Check if this looks like a valid ncdu filesystem node
def is_valid_node:
  if type == "array" then
    length >= 2 and (.[0] | type) == "string" and (.[1] | type) == "object"
  elif type == "object" then
    has("name") or has("meta")
  else
    false
  end;

# Check if this node is a directory (has children)
def is_dir_node:
  is_valid_node and (node_children | length) > 0;

# ============================================================================
# Root discovery: find the filesystem tree root dynamically
# ============================================================================

# Detect if this is ncdu 2.x flat list format
def is_flat_list:
  (type == "array")
  and (length >= 3)
  and ((.[0] | type) == "object")
  and ((.[0] | type) == "object" and (.[0] | has("name")))
  and ((.[1] | type) == "object")
  and ((.[2] | type) == "array");

# Recursively search for a valid node starting from a candidate
def find_root_recursive:
  if is_flat_list then
    # ncdu 2.x flat list - normalize it
    normalize_flat_list
  elif is_valid_node then
    # Already a valid node
    .
  elif type == "array" and length > 0 then
    # Try first element if current is just a wrapper array
    .[0] | find_root_recursive
  else
    null
  end;

# Entry point: locate the filesystem root in the ncdu export
# ncdu exports are arrays where index 3 typically contains the tree
def find_root:
  if type == "array" and length > 3 then
    .[3] | find_root_recursive
  elif type == "array" and length > 0 then
    # Fallback: search from beginning
    . | find_root_recursive
  elif is_valid_node then
    .
  else
    null
  end;

# ============================================================================
# Directory collection: recursively gather all directories
# ============================================================================

# Recursively collect all directories with their full paths and metadata
# Arguments: prefix (string) — the path prefix built so far
# Returns: {dirs: [...], total_dsize: N, total_asize: N}
def collect_dirs(prefix):
  (node_name // "") as $name
  | (if prefix == "" then $name else prefix + "/" + $name end) as $path
  | node_meta as $meta
  | node_children as $children

  # Start with this node'\''s own size (0 for directories, non-zero for files)
  | ($meta.dsize // 0) as $own_dsize
  | ($meta.asize // 0) as $own_asize

  # Recursively collect from children and sum their sizes
  | ($children | map(
      if is_valid_node then
        collect_dirs($path)
      else
        empty
      end
    )) as $child_results

  # Sum up all children'\''s total sizes
  | ($child_results | map(.total_dsize) | add // 0) as $children_dsize
  | ($child_results | map(.total_asize) | add // 0) as $children_asize

  # Total for this node
  | ($own_dsize + $children_dsize) as $total_dsize
  | ($own_asize + $children_asize) as $total_asize

  # Collect all directory records from children
  | ($child_results | map(.dirs) | add // []) as $child_dirs

  # Only include this node if it is a directory (has children)
  | if ($children | length) > 0 then
      {
        dirs: ([{
          path: $path,
          dsize: $total_dsize,
          asize: $total_asize,
          mtime: ($meta.mtime // null)
        }] + $child_dirs),
        total_dsize: $total_dsize,
        total_asize: $total_asize
      }
    else
      # This is a file - contribute size but no dir record
      {
        dirs: $child_dirs,
        total_dsize: $total_dsize,
        total_asize: $total_asize
      }
    end;

# ============================================================================
# Main pipeline
# ============================================================================

find_root as $root
| if $root == null then
    # Could not find valid root — output empty array (will trigger error in shell)
    []
  else
    $root
    | collect_dirs("")
    | .dirs  # Extract the dirs array from the result
    | sort_by(.dsize)
    | reverse
    | .[:$N]
  end
'

# Run the extraction
jq --argjson N "$TOP_N" "$JQ_FILTER" "$FULL_JSON" > "$TOP_JSON"

# -----------------------------------------------------------------------------
# Validate output — fail loudly if extraction yielded no directories
# -----------------------------------------------------------------------------
DIR_COUNT=$(jq 'length' "$TOP_JSON" 2>/dev/null || echo "0")

if [[ "$DIR_COUNT" -eq 0 ]]; then
  echo "" >&2
  echo "========================================================================" >&2
  echo "ERROR: Extracted zero directories from ncdu export." >&2
  echo "========================================================================" >&2
  echo "" >&2
  echo "This may indicate:" >&2
  echo "  1. The scanned path contains only files (no subdirectories)" >&2
  echo "  2. An unsupported ncdu export format" >&2
  echo "  3. The ncdu export is corrupt or empty" >&2
  echo "" >&2
  echo "Diagnostic info:" >&2
  echo "  Export file: $FULL_JSON" >&2
  echo "  File size:   $(wc -c < "$FULL_JSON" | tr -d ' ') bytes" >&2
  echo "" >&2
  echo "First 500 chars of export:" >&2
  head -c 500 "$FULL_JSON" >&2
  echo "" >&2
  echo "" >&2
  echo "To debug, run: jq '.[3]' \"$FULL_JSON\" | head -c 1000" >&2
  echo "========================================================================" >&2
  exit 1
fi

# -----------------------------------------------------------------------------
# Generate human-readable TSV
# Columns: dsize_GB  asize_GB  dsize_bytes  asize_bytes  path
# -----------------------------------------------------------------------------
{
  echo "dsize_GB\tasize_GB\tdsize_bytes\tasize_bytes\tpath"
  jq -r '
    .[]
    | [
        ((.dsize / 1073741824 * 100 | floor) / 100),  # dsize in GB (2 decimal places)
        ((.asize / 1073741824 * 100 | floor) / 100),  # asize in GB (2 decimal places)
        .dsize,
        .asize,
        .path
      ]
    | @tsv
  ' "$TOP_JSON"
} > "$TOP_TSV"

# -----------------------------------------------------------------------------
# Summary
# -----------------------------------------------------------------------------
echo ""
echo "Done. Extracted $DIR_COUNT directories."
echo ""
echo "Output files:"
echo "  Full export: $FULL_JSON"
echo "  Top JSON:    $TOP_JSON"
echo "  Top TSV:     $TOP_TSV"
	#!/usr/bin/env zsh
	set -euo pipefail

	# -----------------------------------------------------------------------------
	# ncdu-top-dirs.zsh
	#
	# Scans a target path with ncdu, exports the full scan as JSON, then extracts
	# the TOP N largest directories by disk usage (ncdu "dsize", not apparent size).
	#
	# Requirements:
	# - ncdu (any version with -e -o export support)
	# - jq (1.5+)
	#
	# Notes on ncdu JSON:
	# - "dsize" = disk usage (allocated on disk) — this is what we sort by
	# - "asize" = apparent size (included for reference only)
	#
	# Supported ncdu export formats:
	# - Compact array nodes: ["dirname", {dsize, asize, ...}, [children...]]
	# - Object nodes: {name: "...", dsize: ..., items: [...]}
	# - Root at .[3], .[3][0], or nested — discovered dynamically
	# -----------------------------------------------------------------------------

	print_help() {
	cat <<'EOF'
	Usage:
	ncdu-top-dirs.zsh [-p <path>] [-n <count>] [-h]

	Options:
	-p <path> Start path to scan (default: "/")
	-n <count> Number of top directories to export (default: 50)
	-h Show this help and exit

	Outputs (timestamped, in ~/Downloads):
	- Full ncdu export JSON: ncdu-export-<timestamp>.json
	- Top directories JSON list: ncdu-topdirs-<timestamp>.json
	- Top directories TSV: ncdu-topdirs-<timestamp>.tsv

	TSV columns:
	dsize_GB asize_GB dsize_bytes asize_bytes path

	Examples:
	Scan root, top 50:
	ncdu-top-dirs.zsh

	Scan home, top 100:
	ncdu-top-dirs.zsh -p "$HOME" -n 100
	EOF
	}

	# -----------------------------------------------------------------------------
	# Dependency checks — fail fast if any required tool is missing
	# -----------------------------------------------------------------------------
	require_cmd() {
	local cmd="$1"
	if ! command -v "$cmd" >/dev/null 2>&1; then
	echo "Error: required dependency '$cmd' not found in PATH." >&2
	exit 127
	fi
	}

	require_cmd ncdu
	require_cmd jq

	# -----------------------------------------------------------------------------
	# Defaults
	# -----------------------------------------------------------------------------
	START_PATH="/"
	TOP_N=50

	# -----------------------------------------------------------------------------
	# Parse command-line options
	# -----------------------------------------------------------------------------
	while getopts ":p:n:h" opt; do
	case "$opt" in
	p) START_PATH="$OPTARG" ;;
	n) TOP_N="$OPTARG" ;;
	h) print_help; exit 0 ;;
	\?)
	echo "Error: unknown option -$OPTARG" >&2
	print_help
	exit 2
	;;
	:)
	echo "Error: option -$OPTARG requires an argument." >&2
	print_help
	exit 2
	;;
	esac
	done

	# -----------------------------------------------------------------------------
	# Validate inputs
	# -----------------------------------------------------------------------------
	if [[ ! -d "$START_PATH" ]]; then
	echo "Error: start path is not a directory: $START_PATH" >&2
	exit 2
	fi

	if ! [[ "$TOP_N" =~ '^[0-9]+$' ]] \|\| [[ "$TOP_N" -le 0 ]]; then
	echo "Error: -n <count> must be a positive integer (got: $TOP_N)" >&2
	exit 2
	fi

	# -----------------------------------------------------------------------------
	# Output filenames (timestamped)
	# -----------------------------------------------------------------------------
	TS="$(date '+%Y%m%d-%H%M%S')"
	OUT_DIR="$HOME/Downloads"
	FULL_JSON="$OUT_DIR/ncdu-export-$TS.json"
	TOP_JSON="$OUT_DIR/ncdu-topdirs-$TS.json"
	TOP_TSV="$OUT_DIR/ncdu-topdirs-$TS.tsv"

	echo "Scanning: $START_PATH"
	echo "Exporting full ncdu JSON to: $FULL_JSON"

	# -e: include extended info (dsize, asize, mtime, etc.)
	# -o: write export to file
	ncdu -e -o "$FULL_JSON" "$START_PATH"

	echo "Extracting TOP $TOP_N directories by disk usage (dsize)..."
	echo "Writing JSON to: $TOP_JSON"
	echo "Writing TSV to: $TOP_TSV"

	# -----------------------------------------------------------------------------
	# jq filter: robust extraction of top directories by disk usage
	#
	# DESIGN NOTES:
	#
	# 1. ROOT DISCOVERY (find_root):
	# ncdu exports vary across versions. Three known formats at .[3]:
	#
	# FORMAT A (ncdu 1.x compact): .[3] = ["name", {metadata}, [children]]
	# Direct compact-array node with name, metadata, children
	#
	# FORMAT B (ncdu 1.x object): .[3] = {name: "...", items: [...]}
	# Direct object node with name and items array
	#
	# FORMAT C (ncdu 2.x flat): .[3] = [{root_meta}, {entry1}, [children1], {entry2}, [children2], ...]
	# Flat alternating list where:
	# .[3][0] = root directory metadata object
	# .[3][1], .[3][3], .[3][5], ... = entry metadata objects
	# .[3][2], .[3][4], .[3][6], ... = corresponding children arrays
	#
	# We detect the format dynamically and normalize to a tree structure.
	#
	# 2. NODE TYPE DETECTION:
	# Three representations must be supported:
	# - Compact array: ["name", {metadata}, [children]]
	# - Object with items: {name: "...", dsize: ..., items: [...]}
	# - Object metadata: {name: "...", dsize: ..., asize: ...} (ncdu 2.x)
	#
	# Directories are identified by having children (non-empty items/children array).
	#
	# 3. DISK USAGE SEMANTICS:
	# - Sorting is ALWAYS by dsize (disk usage = allocated blocks)
	# - asize (apparent size) is included for reference but never used for sorting
	# - Only directories (nodes with children) are included in output
	#
	# 4. RECURSION (collect_dirs):
	# We walk the tree depth-first, accumulating path prefixes.
	# Each directory node yields one record; we then recurse into children.
	# -----------------------------------------------------------------------------

	JQ_FILTER='
	# ============================================================================
	# Format detection and normalization (ncdu 2.x flat list support)
	# ============================================================================

	# Convert ncdu 2.x flat depth-first list into tree structure
	# Input at root level: [{root_meta}, child1, child2, ...]
	# where each child is either:
	# - {file_meta} (a standalone object for files)
	# - [{dir_meta}, subchild1, subchild2, ...] (a flat list for directories)
	def normalize_flat_list:
	if type == "array" and length >= 1 and (.[0] \| type) == "object" then
	# First element is directory metadata
	.[0] as $dir_meta
	\| {
	meta: $dir_meta,
	children: (
	# Process children (everything after first element)
	.[1:] \| map(
	if type == "object" then
	# This is a file (no children)
	{meta: ., children: []}
	elif type == "array" and length >= 1 and (.[0] \| type) == "object" then
	# This is a directory (flat list) - recursively normalize it
	normalize_flat_list
	else
	# Unexpected - skip or return empty
	empty
	end
	)
	)
	}
	else
	# Not a flat list, return as-is
	.
	end;

	# ============================================================================
	# Node accessors: abstract over all three formats
	# ============================================================================

	# Get the name of a node
	def node_name:
	if type == "array" and length >= 1 and (.[0] \| type) == "string" then
	# Format A: compact array ["name", ...]
	.[0]
	elif type == "object" then
	if has("name") then
	# Format B/C: object with name field
	.name
	elif has("meta") and (.meta \| type) == "object" and (.meta \| has("name")) then
	# Normalized flat list node
	.meta.name
	else
	null
	end
	else
	null
	end;

	# Get metadata object (contains dsize, asize, mtime, etc.)
	def node_meta:
	if type == "array" and length >= 2 and (.[1] \| type) == "object" then
	# Format A: compact array [name, {metadata}, ...]
	.[1]
	elif type == "object" then
	if has("meta") then
	# Normalized flat list node
	.meta
	else
	# Format B/C: object node, metadata is on the node itself
	.
	end
	else
	{}
	end;

	# Get children array (empty for files, non-empty for directories)
	def node_children:
	if type == "array" and length >= 3 and (.[2] \| type) == "array" then
	# Format A: compact array [name, meta, [children]]
	.[2]
	elif type == "object" then
	if has("children") and (.children \| type) == "array" then
	# Normalized flat list node
	.children
	elif has("items") and (.items \| type) == "array" then
	# Format B: object with items array
	.items
	else
	[]
	end
	else
	[]
	end;

	# ============================================================================
	# Validation predicates
	# ============================================================================

	# Check if this looks like a valid ncdu filesystem node
	def is_valid_node:
	if type == "array" then
	length >= 2 and (.[0] \| type) == "string" and (.[1] \| type) == "object"
	elif type == "object" then
	has("name") or has("meta")
	else
	false
	end;

	# Check if this node is a directory (has children)
	def is_dir_node:
	is_valid_node and (node_children \| length) > 0;

	# ============================================================================
	# Root discovery: find the filesystem tree root dynamically
	# ============================================================================

	# Detect if this is ncdu 2.x flat list format
	def is_flat_list:
	(type == "array")
	and (length >= 3)
	and ((.[0] \| type) == "object")
	and ((.[0] \| type) == "object" and (.[0] \| has("name")))
	and ((.[1] \| type) == "object")
	and ((.[2] \| type) == "array");

	# Recursively search for a valid node starting from a candidate
	def find_root_recursive:
	if is_flat_list then
	# ncdu 2.x flat list - normalize it
	normalize_flat_list
	elif is_valid_node then
	# Already a valid node
	.
	elif type == "array" and length > 0 then
	# Try first element if current is just a wrapper array
	.[0] \| find_root_recursive
	else
	null
	end;

	# Entry point: locate the filesystem root in the ncdu export
	# ncdu exports are arrays where index 3 typically contains the tree
	def find_root:
	if type == "array" and length > 3 then
	.[3] \| find_root_recursive
	elif type == "array" and length > 0 then
	# Fallback: search from beginning
	. \| find_root_recursive
	elif is_valid_node then
	.
	else
	null
	end;

	# ============================================================================
	# Directory collection: recursively gather all directories
	# ============================================================================

	# Recursively collect all directories with their full paths and metadata
	# Arguments: prefix (string) — the path prefix built so far
	# Returns: {dirs: [...], total_dsize: N, total_asize: N}
	def collect_dirs(prefix):
	(node_name // "") as $name
	\| (if prefix == "" then $name else prefix + "/" + $name end) as $path
	\| node_meta as $meta
	\| node_children as $children

	# Start with this node'\''s own size (0 for directories, non-zero for files)
	\| ($meta.dsize // 0) as $own_dsize
	\| ($meta.asize // 0) as $own_asize

	# Recursively collect from children and sum their sizes
	\| ($children \| map(
	if is_valid_node then
	collect_dirs($path)
	else
	empty
	end
	)) as $child_results

	# Sum up all children'\''s total sizes
	\| ($child_results \| map(.total_dsize) \| add // 0) as $children_dsize
	\| ($child_results \| map(.total_asize) \| add // 0) as $children_asize

	# Total for this node
	\| ($own_dsize + $children_dsize) as $total_dsize
	\| ($own_asize + $children_asize) as $total_asize

	# Collect all directory records from children
	\| ($child_results \| map(.dirs) \| add // []) as $child_dirs

	# Only include this node if it is a directory (has children)
	\| if ($children \| length) > 0 then
	{
	dirs: ([{
	path: $path,
	dsize: $total_dsize,
	asize: $total_asize,
	mtime: ($meta.mtime // null)
	}] + $child_dirs),
	total_dsize: $total_dsize,
	total_asize: $total_asize
	}
	else
	# This is a file - contribute size but no dir record
	{
	dirs: $child_dirs,
	total_dsize: $total_dsize,
	total_asize: $total_asize
	}
	end;

	# ============================================================================
	# Main pipeline
	# ============================================================================

	find_root as $root
	\| if $root == null then
	# Could not find valid root — output empty array (will trigger error in shell)
	[]
	else
	$root
	\| collect_dirs("")
	\| .dirs # Extract the dirs array from the result
	\| sort_by(.dsize)
	\| reverse
	\| .[:$N]
	end
	'

	# Run the extraction
	jq --argjson N "$TOP_N" "$JQ_FILTER" "$FULL_JSON" > "$TOP_JSON"

	# -----------------------------------------------------------------------------
	# Validate output — fail loudly if extraction yielded no directories
	# -----------------------------------------------------------------------------
	DIR_COUNT=$(jq 'length' "$TOP_JSON" 2>/dev/null \|\| echo "0")

	if [[ "$DIR_COUNT" -eq 0 ]]; then
	echo "" >&2
	echo "========================================================================" >&2
	echo "ERROR: Extracted zero directories from ncdu export." >&2
	echo "========================================================================" >&2
	echo "" >&2
	echo "This may indicate:" >&2
	echo " 1. The scanned path contains only files (no subdirectories)" >&2
	echo " 2. An unsupported ncdu export format" >&2
	echo " 3. The ncdu export is corrupt or empty" >&2
	echo "" >&2
	echo "Diagnostic info:" >&2
	echo " Export file: $FULL_JSON" >&2
	echo " File size: $(wc -c < "$FULL_JSON" \| tr -d ' ') bytes" >&2
	echo "" >&2
	echo "First 500 chars of export:" >&2
	head -c 500 "$FULL_JSON" >&2
	echo "" >&2
	echo "" >&2
	echo "To debug, run: jq '.[3]' \"$FULL_JSON\" \| head -c 1000" >&2
	echo "========================================================================" >&2
	exit 1
	fi

	# -----------------------------------------------------------------------------
	# Generate human-readable TSV
	# Columns: dsize_GB asize_GB dsize_bytes asize_bytes path
	# -----------------------------------------------------------------------------
	{
	echo "dsize_GB\tasize_GB\tdsize_bytes\tasize_bytes\tpath"
	jq -r '
	.[]
	\| [
	((.dsize / 1073741824 * 100 \| floor) / 100), # dsize in GB (2 decimal places)
	((.asize / 1073741824 * 100 \| floor) / 100), # asize in GB (2 decimal places)
	.dsize,
	.asize,
	.path
	]
	\| @tsv
	' "$TOP_JSON"
	} > "$TOP_TSV"

	# -----------------------------------------------------------------------------
	# Summary
	# -----------------------------------------------------------------------------
	echo ""
	echo "Done. Extracted $DIR_COUNT directories."
	echo ""
	echo "Output files:"
	echo " Full export: $FULL_JSON"
	echo " Top JSON: $TOP_JSON"
	echo " Top TSV: $TOP_TSV"
No results found