huynhbaoan/gist:63baf304353b1eb7ef330b72607238ef

## gistfile1.txt
#!/usr/bin/env python3
import argparse
import gzip
import re
from pathlib import Path

HEADER_RE = re.compile(r"###(\d{4})###")

def main():
    ap = argparse.ArgumentParser(description="Merge QR frames (###0000###...) into base64, decode, and decompress.")
    ap.add_argument("--in", dest="infile", required=True, help="Text file containing pasted QR scan outputs")
    ap.add_argument("--out-bin", default="recovered.bin", help="Output compressed binary (e.g., .gz or .zst) after base64 decode")
    ap.add_argument("--out-text", default="recovered.txt", help="Output decompressed text (for gzip only; for zstd, decompress separately)")
    ap.add_argument("--total", type=int, default=None, help="Expected total frames (e.g., 204). If set, checks missing.")
    ap.add_argument("--format", choices=["gzip", "raw"], default="gzip",
                    help="Decompression format: gzip (default) or raw (no decompress). For zstd use raw then zstd -d.")
    args = ap.parse_args()

    raw = Path(args.infile).read_text(encoding="utf-8", errors="strict")

    # Remove ALL whitespace; this neutralizes Qrox+ newline+space behavior.
    raw_no_ws = re.sub(r"\s+", "", raw)

    # Parse frames: ###NNNN###PAYLOAD...
    matches = list(HEADER_RE.finditer(raw_no_ws))
    if not matches:
        raise SystemExit("No frame headers found (pattern ###0000###). Check your input file.")

    frames = {}
    duplicates = 0

    for idx, m in enumerate(matches):
        n = int(m.group(1))
        start = m.end()
        end = matches[idx + 1].start() if idx + 1 < len(matches) else len(raw_no_ws)
        payload = raw_no_ws[start:end]

        if n in frames:
            duplicates += 1
            continue
        frames[n] = payload

    got = len(frames)
    min_n = min(frames.keys())
    max_n = max(frames.keys())

    print(f"Found headers: {len(matches)} (raw), unique frames: {got}, duplicates skipped: {duplicates}")
    print(f"Frame range: {min_n:04d}..{max_n:04d}")

    expected_total = args.total
    if expected_total is None:
        # infer expected total as max+1 (common) if starts at 0000
        if min_n == 0:
            expected_total = max_n + 1

    if expected_total is not None:
        missing = [i for i in range(0, expected_total) if i not in frames]
        extra = [i for i in frames.keys() if i < 0 or i >= expected_total]
        if missing:
            print(f"❌ Missing {len(missing)} frame(s): {', '.join(f'{i:04d}' for i in missing[:50])}"
                  + (" ..." if len(missing) > 50 else ""))
            raise SystemExit("Stop: missing frames. Rescan only those numbers and append, then rerun.")
        if extra:
            print(f"⚠️ Extra frame ids outside 0..{expected_total-1}: {extra[:20]}")

        order = list(range(0, expected_total))
    else:
        order = sorted(frames.keys())

    b64 = "".join(frames[i] for i in order)

    # Add base64 padding if needed (some transports may drop it)
    pad = (-len(b64)) % 4
    if pad:
        b64 += "=" * pad
        print(f"Added base64 padding: {pad} '='")

    import base64
    data = base64.b64decode(b64, validate=False)
    Path(args.out_bin).write_bytes(data)
    print(f"Wrote decoded binary: {args.out_bin} ({len(data)} bytes)")

    if args.format == "gzip":
        try:
            text = gzip.decompress(data).decode("utf-8", errors="replace")
        except Exception as e:
            raise SystemExit(f"Failed to gzip-decompress. If you used zstd, rerun with --format raw. Error: {e}")
        Path(args.out_text).write_text(text, encoding="utf-8")
        print(f"✅ Wrote decompressed text: {args.out_text}")
    else:
        print("Done (raw). Decompress manually if needed (e.g., zstd -d).")

if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import argparse
	import gzip
	import re
	from pathlib import Path

	HEADER_RE = re.compile(r"###(\d{4})###")

	def main():
	ap = argparse.ArgumentParser(description="Merge QR frames (###0000###...) into base64, decode, and decompress.")
	ap.add_argument("--in", dest="infile", required=True, help="Text file containing pasted QR scan outputs")
	ap.add_argument("--out-bin", default="recovered.bin", help="Output compressed binary (e.g., .gz or .zst) after base64 decode")
	ap.add_argument("--out-text", default="recovered.txt", help="Output decompressed text (for gzip only; for zstd, decompress separately)")
	ap.add_argument("--total", type=int, default=None, help="Expected total frames (e.g., 204). If set, checks missing.")
	ap.add_argument("--format", choices=["gzip", "raw"], default="gzip",
	help="Decompression format: gzip (default) or raw (no decompress). For zstd use raw then zstd -d.")
	args = ap.parse_args()

	raw = Path(args.infile).read_text(encoding="utf-8", errors="strict")

	# Remove ALL whitespace; this neutralizes Qrox+ newline+space behavior.
	raw_no_ws = re.sub(r"\s+", "", raw)

	# Parse frames: ###NNNN###PAYLOAD...
	matches = list(HEADER_RE.finditer(raw_no_ws))
	if not matches:
	raise SystemExit("No frame headers found (pattern ###0000###). Check your input file.")

	frames = {}
	duplicates = 0

	for idx, m in enumerate(matches):
	n = int(m.group(1))
	start = m.end()
	end = matches[idx + 1].start() if idx + 1 < len(matches) else len(raw_no_ws)
	payload = raw_no_ws[start:end]

	if n in frames:
	duplicates += 1
	continue
	frames[n] = payload

	got = len(frames)
	min_n = min(frames.keys())
	max_n = max(frames.keys())

	print(f"Found headers: {len(matches)} (raw), unique frames: {got}, duplicates skipped: {duplicates}")
	print(f"Frame range: {min_n:04d}..{max_n:04d}")

	expected_total = args.total
	if expected_total is None:
	# infer expected total as max+1 (common) if starts at 0000
	if min_n == 0:
	expected_total = max_n + 1

	if expected_total is not None:
	missing = [i for i in range(0, expected_total) if i not in frames]
	extra = [i for i in frames.keys() if i < 0 or i >= expected_total]
	if missing:
	print(f"❌ Missing {len(missing)} frame(s): {', '.join(f'{i:04d}' for i in missing[:50])}"
	+ (" ..." if len(missing) > 50 else ""))
	raise SystemExit("Stop: missing frames. Rescan only those numbers and append, then rerun.")
	if extra:
	print(f"⚠️ Extra frame ids outside 0..{expected_total-1}: {extra[:20]}")

	order = list(range(0, expected_total))
	else:
	order = sorted(frames.keys())

	b64 = "".join(frames[i] for i in order)

	# Add base64 padding if needed (some transports may drop it)
	pad = (-len(b64)) % 4
	if pad:
	b64 += "=" * pad
	print(f"Added base64 padding: {pad} '='")

	import base64
	data = base64.b64decode(b64, validate=False)
	Path(args.out_bin).write_bytes(data)
	print(f"Wrote decoded binary: {args.out_bin} ({len(data)} bytes)")

	if args.format == "gzip":
	try:
	text = gzip.decompress(data).decode("utf-8", errors="replace")
	except Exception as e:
	raise SystemExit(f"Failed to gzip-decompress. If you used zstd, rerun with --format raw. Error: {e}")
	Path(args.out_text).write_text(text, encoding="utf-8")
	print(f"✅ Wrote decompressed text: {args.out_text}")
	else:
	print("Done (raw). Decompress manually if needed (e.g., zstd -d).")

	if __name__ == "__main__":
	main()
No results found