Skip to content

Instantly share code, notes, and snippets.

@ultranity
Created September 29, 2025 13:48
Show Gist options
  • Select an option

  • Save ultranity/c97cf7a020997adb65886399bbb3d7a6 to your computer and use it in GitHub Desktop.

Select an option

Save ultranity/c97cf7a020997adb65886399bbb3d7a6 to your computer and use it in GitHub Desktop.
alipay miniapp unpack 支付宝新版0.data小程序解包
#!/usr/bin/env python3
"""
Alipay mini-program 0.data unpacker.
Observation: the provided 0.data is a ustar TAR archive (uncompressed).
This script safely lists or extracts its contents.
Usage examples:
- List: python unpack.py --list 0.data
- Extract: python unpack.py 0.data -o 0.data.out
- Overwrite existing output dir: python unpack.py 0.data -o out --overwrite
"""
import argparse
import os
import sys
import tarfile
from pathlib import Path, PurePosixPath
import shutil
import io
from typing import Optional, Tuple, Iterable, List
def is_within_directory(base_directory: Path, target_path: Path) -> bool:
"""Return True if target_path is inside base_directory (after resolving)."""
try:
base_real = base_directory.resolve(strict=False)
target_real = target_path.resolve(strict=False)
return (
str(target_real).startswith(str(base_real) + os.sep)
or target_real == base_real
)
except Exception:
# Fallback on any resolution issues
base_abs = base_directory.absolute()
target_abs = target_path.absolute()
return (
str(target_abs).startswith(str(base_abs) + os.sep) or target_abs == base_abs
)
def safe_members(members, out_dir: Path):
"""Yield safe tar members only (directories and regular files), sanitized paths."""
for member in members:
# Normalize POSIX path to local filesystem path
posix = PurePosixPath(member.name)
# Skip empty names
if not str(posix):
continue
# Drop absolute and parent traversal
parts = []
for part in posix.parts:
if part in ("", "/"):
continue
if part == ".":
continue
if part == "..":
# Skip any parent traversal segment entirely
continue
parts.append(part)
if not parts:
continue
local_path = Path(*parts)
# Final destination path
dest_path = out_dir / local_path
# Allow only regular files and directories
if member.isdir():
# Ensure directory path is safe
if not is_within_directory(out_dir, dest_path):
continue
# Keep directory member
member.name = str(local_path.as_posix())
yield member
continue
if member.isreg():
# Ensure file path is safe
if not is_within_directory(out_dir, dest_path):
continue
member.name = str(local_path.as_posix())
yield member
continue
# Skip symlinks, hardlinks, devices, fifos, etc.
def list_tar(tar: tarfile.TarFile) -> None:
"""Print a simple listing of tar contents."""
print(f"Archive: {getattr(tar.fileobj, 'name', '') or getattr(tar, 'name', '')}")
print("Type Size Path")
print("----- ---------- ------------------------------")
for m in tar.getmembers():
t = "dir" if m.isdir() else ("file" if m.isreg() else m.type)
size = m.size if hasattr(m, "size") else 0
print(f"{t:<8}{size:>10} {m.name}")
def open_tar_with_offset(
src_path: Path, manual_offset: Optional[int] = None
) -> Tuple[tarfile.TarFile, int]:
"""Open TAR, supporting non-zero offset by scanning for ustar magic.
Returns (tar, offset).
"""
# Fast path: starts as tar
if manual_offset is None and tarfile.is_tarfile(src_path):
return tarfile.open(src_path, mode="r:*"), 0
data = Path(src_path).read_bytes()
# If manual offset provided, trust it
if manual_offset is not None:
if manual_offset < 0 or manual_offset >= len(data):
raise ValueError("Invalid --offset value")
buf = io.BytesIO(memoryview(data)[manual_offset:])
try:
return tarfile.open(fileobj=buf, mode="r:"), manual_offset
except tarfile.ReadError as exc:
raise tarfile.ReadError(
f"Failed to read TAR at offset {manual_offset}: {exc}"
)
# Scan for 'ustar\x0000' magic; header is 257 bytes before magic and aligned to 512
blob = data
magic = b"ustar\x0000"
idx = 0
found_offset = None
while True:
j = blob.find(magic, idx)
if j == -1:
break
# Header candidate
start = j - 257
if start >= 0 and (start % 512) == 0:
found_offset = start
break
idx = j + 1
if found_offset is None:
# Fallback: try plain 'ustar' search
idx = 0
magic2 = b"ustar"
while True:
j = blob.find(magic2, idx)
if j == -1:
break
start = j - 257
if start >= 0 and (start % 512) == 0:
found_offset = start
break
idx = j + 1
if found_offset is None:
raise tarfile.ReadError("Could not locate TAR header (ustar magic) in file")
buf = io.BytesIO(memoryview(data)[found_offset:])
return tarfile.open(fileobj=buf, mode="r:"), found_offset
def extract_tar(
src_path: Path, out_dir: Path, overwrite: bool, manual_offset: Optional[int] = None
) -> int:
if out_dir.exists():
if not overwrite:
raise FileExistsError(f"Output directory already exists: {out_dir}")
if out_dir.is_file():
raise FileExistsError(f"Output path exists and is a file: {out_dir}")
out_dir.mkdir(parents=True, exist_ok=True)
tar, offset = open_tar_with_offset(src_path, manual_offset)
try:
for member in safe_members(tar.getmembers(), out_dir):
target_path = out_dir / PurePosixPath(member.name).as_posix()
if member.isdir():
target_path.mkdir(parents=True, exist_ok=True)
continue
# Regular file
target_path.parent.mkdir(parents=True, exist_ok=True)
extracted = tar.extractfile(member)
if extracted is None:
# Create empty file if no data stream is present
target_path.touch(exist_ok=True)
continue
with extracted as src, open(target_path, "wb") as dst:
shutil.copyfileobj(src, dst)
finally:
tar.close()
return offset
def find_inner_tars(out_dir: Path) -> Iterable[Path]:
# supported_suffixes = {".tar", ".tgz", ".tar.gz"}
for root, _dirs, files in os.walk(out_dir):
rpath = Path(root)
for f in files:
p = rpath / f
low = f.lower()
if low.endswith(".tar") or low.endswith(".tgz") or low.endswith(".tar.gz"):
yield p
def expand_inner_tars(
out_dir: Path, remove_inner: bool = False
) -> List[Tuple[Path, Path]]:
results: List[Tuple[Path, Path]] = []
for tar_path in find_inner_tars(out_dir):
# Compute output directory for this tar
name = tar_path.name
if name.lower().endswith(".tar.gz"):
base = name[:-7]
elif name.lower().endswith(".tgz"):
base = name[:-4]
elif name.lower().endswith(".tar"):
base = name[:-4]
else:
base = name
target = tar_path.parent / f"{base}.out"
target.mkdir(parents=True, exist_ok=True)
# Open and extract safely
try:
with tarfile.open(tar_path, mode="r:*") as inner:
for member in safe_members(inner.getmembers(), target):
dst_path = target / PurePosixPath(member.name).as_posix()
if member.isdir():
dst_path.mkdir(parents=True, exist_ok=True)
continue
dst_path.parent.mkdir(parents=True, exist_ok=True)
extracted = inner.extractfile(member)
if extracted is None:
dst_path.touch(exist_ok=True)
continue
with extracted as src, open(dst_path, "wb") as dst:
shutil.copyfileobj(src, dst)
results.append((tar_path, target))
if remove_inner:
try:
tar_path.unlink()
except Exception:
pass
except tarfile.TarError:
# Skip non-tar files erroneously matched
continue
return results
def main(argv=None) -> int:
parser = argparse.ArgumentParser(
description="Unpack Alipay mini-program 0.data (ustar TAR)"
)
parser.add_argument(
"input", nargs="?", default="0.data", help="Path to 0.data (TAR) file"
)
parser.add_argument(
"-o", "--out-dir", default=None, help="Output directory (default: <input>.out)"
)
parser.add_argument(
"--list", action="store_true", help="List archive contents without extracting"
)
parser.add_argument(
"--overwrite", action="store_true", help="Overwrite existing output directory"
)
parser.add_argument(
"--offset", type=int, default=None, help="Force TAR start offset if non-zero"
)
parser.add_argument(
"--expand-inner",
action="store_true",
help="Expand nested .tar/.tar.gz inside output directory",
)
parser.add_argument(
"--remove-inner",
action="store_true",
help="Remove inner tar files after expansion",
)
args = parser.parse_args(argv)
src_path = Path(args.input)
if not src_path.exists():
print(f"Input not found: {src_path}", file=sys.stderr)
return 1
if not src_path.is_file():
print(f"Input is not a file: {src_path}", file=sys.stderr)
return 1
# Determine default out-dir
out_dir = Path(args.out_dir) if args.out_dir else Path(str(src_path) + ".out")
# Try open with possible non-zero offset
try:
tar, offset = open_tar_with_offset(src_path, args.offset)
except Exception as exc:
print(f"Failed to open archive: {exc}", file=sys.stderr)
return 2
try:
if args.list:
try:
print(f"Detected TAR offset: {offset}")
list_tar(tar)
finally:
tar.close()
return 0
finally:
tar.close()
try:
offset = extract_tar(
src_path, out_dir, overwrite=args.overwrite, manual_offset=args.offset
)
except Exception as exc:
print(f"Extraction failed: {exc}", file=sys.stderr)
return 3
print(f"Detected TAR offset: {offset}")
print(f"Extracted to: {out_dir}")
if args.expand_inner:
expanded = expand_inner_tars(out_dir, remove_inner=args.remove_inner)
if expanded:
print("Expanded inner tar files:")
for src, dst in expanded:
print(f" {src} -> {dst}")
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment