Created
September 29, 2025 13:48
-
-
Save ultranity/c97cf7a020997adb65886399bbb3d7a6 to your computer and use it in GitHub Desktop.
alipay miniapp unpack 支付宝新版0.data小程序解包
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Alipay mini-program 0.data unpacker. | |
| Observation: the provided 0.data is a ustar TAR archive (uncompressed). | |
| This script safely lists or extracts its contents. | |
| Usage examples: | |
| - List: python unpack.py --list 0.data | |
| - Extract: python unpack.py 0.data -o 0.data.out | |
| - Overwrite existing output dir: python unpack.py 0.data -o out --overwrite | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| import tarfile | |
| from pathlib import Path, PurePosixPath | |
| import shutil | |
| import io | |
| from typing import Optional, Tuple, Iterable, List | |
| def is_within_directory(base_directory: Path, target_path: Path) -> bool: | |
| """Return True if target_path is inside base_directory (after resolving).""" | |
| try: | |
| base_real = base_directory.resolve(strict=False) | |
| target_real = target_path.resolve(strict=False) | |
| return ( | |
| str(target_real).startswith(str(base_real) + os.sep) | |
| or target_real == base_real | |
| ) | |
| except Exception: | |
| # Fallback on any resolution issues | |
| base_abs = base_directory.absolute() | |
| target_abs = target_path.absolute() | |
| return ( | |
| str(target_abs).startswith(str(base_abs) + os.sep) or target_abs == base_abs | |
| ) | |
| def safe_members(members, out_dir: Path): | |
| """Yield safe tar members only (directories and regular files), sanitized paths.""" | |
| for member in members: | |
| # Normalize POSIX path to local filesystem path | |
| posix = PurePosixPath(member.name) | |
| # Skip empty names | |
| if not str(posix): | |
| continue | |
| # Drop absolute and parent traversal | |
| parts = [] | |
| for part in posix.parts: | |
| if part in ("", "/"): | |
| continue | |
| if part == ".": | |
| continue | |
| if part == "..": | |
| # Skip any parent traversal segment entirely | |
| continue | |
| parts.append(part) | |
| if not parts: | |
| continue | |
| local_path = Path(*parts) | |
| # Final destination path | |
| dest_path = out_dir / local_path | |
| # Allow only regular files and directories | |
| if member.isdir(): | |
| # Ensure directory path is safe | |
| if not is_within_directory(out_dir, dest_path): | |
| continue | |
| # Keep directory member | |
| member.name = str(local_path.as_posix()) | |
| yield member | |
| continue | |
| if member.isreg(): | |
| # Ensure file path is safe | |
| if not is_within_directory(out_dir, dest_path): | |
| continue | |
| member.name = str(local_path.as_posix()) | |
| yield member | |
| continue | |
| # Skip symlinks, hardlinks, devices, fifos, etc. | |
| def list_tar(tar: tarfile.TarFile) -> None: | |
| """Print a simple listing of tar contents.""" | |
| print(f"Archive: {getattr(tar.fileobj, 'name', '') or getattr(tar, 'name', '')}") | |
| print("Type Size Path") | |
| print("----- ---------- ------------------------------") | |
| for m in tar.getmembers(): | |
| t = "dir" if m.isdir() else ("file" if m.isreg() else m.type) | |
| size = m.size if hasattr(m, "size") else 0 | |
| print(f"{t:<8}{size:>10} {m.name}") | |
| def open_tar_with_offset( | |
| src_path: Path, manual_offset: Optional[int] = None | |
| ) -> Tuple[tarfile.TarFile, int]: | |
| """Open TAR, supporting non-zero offset by scanning for ustar magic. | |
| Returns (tar, offset). | |
| """ | |
| # Fast path: starts as tar | |
| if manual_offset is None and tarfile.is_tarfile(src_path): | |
| return tarfile.open(src_path, mode="r:*"), 0 | |
| data = Path(src_path).read_bytes() | |
| # If manual offset provided, trust it | |
| if manual_offset is not None: | |
| if manual_offset < 0 or manual_offset >= len(data): | |
| raise ValueError("Invalid --offset value") | |
| buf = io.BytesIO(memoryview(data)[manual_offset:]) | |
| try: | |
| return tarfile.open(fileobj=buf, mode="r:"), manual_offset | |
| except tarfile.ReadError as exc: | |
| raise tarfile.ReadError( | |
| f"Failed to read TAR at offset {manual_offset}: {exc}" | |
| ) | |
| # Scan for 'ustar\x0000' magic; header is 257 bytes before magic and aligned to 512 | |
| blob = data | |
| magic = b"ustar\x0000" | |
| idx = 0 | |
| found_offset = None | |
| while True: | |
| j = blob.find(magic, idx) | |
| if j == -1: | |
| break | |
| # Header candidate | |
| start = j - 257 | |
| if start >= 0 and (start % 512) == 0: | |
| found_offset = start | |
| break | |
| idx = j + 1 | |
| if found_offset is None: | |
| # Fallback: try plain 'ustar' search | |
| idx = 0 | |
| magic2 = b"ustar" | |
| while True: | |
| j = blob.find(magic2, idx) | |
| if j == -1: | |
| break | |
| start = j - 257 | |
| if start >= 0 and (start % 512) == 0: | |
| found_offset = start | |
| break | |
| idx = j + 1 | |
| if found_offset is None: | |
| raise tarfile.ReadError("Could not locate TAR header (ustar magic) in file") | |
| buf = io.BytesIO(memoryview(data)[found_offset:]) | |
| return tarfile.open(fileobj=buf, mode="r:"), found_offset | |
| def extract_tar( | |
| src_path: Path, out_dir: Path, overwrite: bool, manual_offset: Optional[int] = None | |
| ) -> int: | |
| if out_dir.exists(): | |
| if not overwrite: | |
| raise FileExistsError(f"Output directory already exists: {out_dir}") | |
| if out_dir.is_file(): | |
| raise FileExistsError(f"Output path exists and is a file: {out_dir}") | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| tar, offset = open_tar_with_offset(src_path, manual_offset) | |
| try: | |
| for member in safe_members(tar.getmembers(), out_dir): | |
| target_path = out_dir / PurePosixPath(member.name).as_posix() | |
| if member.isdir(): | |
| target_path.mkdir(parents=True, exist_ok=True) | |
| continue | |
| # Regular file | |
| target_path.parent.mkdir(parents=True, exist_ok=True) | |
| extracted = tar.extractfile(member) | |
| if extracted is None: | |
| # Create empty file if no data stream is present | |
| target_path.touch(exist_ok=True) | |
| continue | |
| with extracted as src, open(target_path, "wb") as dst: | |
| shutil.copyfileobj(src, dst) | |
| finally: | |
| tar.close() | |
| return offset | |
| def find_inner_tars(out_dir: Path) -> Iterable[Path]: | |
| # supported_suffixes = {".tar", ".tgz", ".tar.gz"} | |
| for root, _dirs, files in os.walk(out_dir): | |
| rpath = Path(root) | |
| for f in files: | |
| p = rpath / f | |
| low = f.lower() | |
| if low.endswith(".tar") or low.endswith(".tgz") or low.endswith(".tar.gz"): | |
| yield p | |
| def expand_inner_tars( | |
| out_dir: Path, remove_inner: bool = False | |
| ) -> List[Tuple[Path, Path]]: | |
| results: List[Tuple[Path, Path]] = [] | |
| for tar_path in find_inner_tars(out_dir): | |
| # Compute output directory for this tar | |
| name = tar_path.name | |
| if name.lower().endswith(".tar.gz"): | |
| base = name[:-7] | |
| elif name.lower().endswith(".tgz"): | |
| base = name[:-4] | |
| elif name.lower().endswith(".tar"): | |
| base = name[:-4] | |
| else: | |
| base = name | |
| target = tar_path.parent / f"{base}.out" | |
| target.mkdir(parents=True, exist_ok=True) | |
| # Open and extract safely | |
| try: | |
| with tarfile.open(tar_path, mode="r:*") as inner: | |
| for member in safe_members(inner.getmembers(), target): | |
| dst_path = target / PurePosixPath(member.name).as_posix() | |
| if member.isdir(): | |
| dst_path.mkdir(parents=True, exist_ok=True) | |
| continue | |
| dst_path.parent.mkdir(parents=True, exist_ok=True) | |
| extracted = inner.extractfile(member) | |
| if extracted is None: | |
| dst_path.touch(exist_ok=True) | |
| continue | |
| with extracted as src, open(dst_path, "wb") as dst: | |
| shutil.copyfileobj(src, dst) | |
| results.append((tar_path, target)) | |
| if remove_inner: | |
| try: | |
| tar_path.unlink() | |
| except Exception: | |
| pass | |
| except tarfile.TarError: | |
| # Skip non-tar files erroneously matched | |
| continue | |
| return results | |
| def main(argv=None) -> int: | |
| parser = argparse.ArgumentParser( | |
| description="Unpack Alipay mini-program 0.data (ustar TAR)" | |
| ) | |
| parser.add_argument( | |
| "input", nargs="?", default="0.data", help="Path to 0.data (TAR) file" | |
| ) | |
| parser.add_argument( | |
| "-o", "--out-dir", default=None, help="Output directory (default: <input>.out)" | |
| ) | |
| parser.add_argument( | |
| "--list", action="store_true", help="List archive contents without extracting" | |
| ) | |
| parser.add_argument( | |
| "--overwrite", action="store_true", help="Overwrite existing output directory" | |
| ) | |
| parser.add_argument( | |
| "--offset", type=int, default=None, help="Force TAR start offset if non-zero" | |
| ) | |
| parser.add_argument( | |
| "--expand-inner", | |
| action="store_true", | |
| help="Expand nested .tar/.tar.gz inside output directory", | |
| ) | |
| parser.add_argument( | |
| "--remove-inner", | |
| action="store_true", | |
| help="Remove inner tar files after expansion", | |
| ) | |
| args = parser.parse_args(argv) | |
| src_path = Path(args.input) | |
| if not src_path.exists(): | |
| print(f"Input not found: {src_path}", file=sys.stderr) | |
| return 1 | |
| if not src_path.is_file(): | |
| print(f"Input is not a file: {src_path}", file=sys.stderr) | |
| return 1 | |
| # Determine default out-dir | |
| out_dir = Path(args.out_dir) if args.out_dir else Path(str(src_path) + ".out") | |
| # Try open with possible non-zero offset | |
| try: | |
| tar, offset = open_tar_with_offset(src_path, args.offset) | |
| except Exception as exc: | |
| print(f"Failed to open archive: {exc}", file=sys.stderr) | |
| return 2 | |
| try: | |
| if args.list: | |
| try: | |
| print(f"Detected TAR offset: {offset}") | |
| list_tar(tar) | |
| finally: | |
| tar.close() | |
| return 0 | |
| finally: | |
| tar.close() | |
| try: | |
| offset = extract_tar( | |
| src_path, out_dir, overwrite=args.overwrite, manual_offset=args.offset | |
| ) | |
| except Exception as exc: | |
| print(f"Extraction failed: {exc}", file=sys.stderr) | |
| return 3 | |
| print(f"Detected TAR offset: {offset}") | |
| print(f"Extracted to: {out_dir}") | |
| if args.expand_inner: | |
| expanded = expand_inner_tars(out_dir, remove_inner=args.remove_inner) | |
| if expanded: | |
| print("Expanded inner tar files:") | |
| for src, dst in expanded: | |
| print(f" {src} -> {dst}") | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment