Last active
February 18, 2026 22:15
-
-
Save SomeKitten/fb2151b1a6757f90aebeae53efac2867 to your computer and use it in GitHub Desktop.
Scripts to extract/pack files from/to a BG4 archive. BG4 archives are used in some of the Mario & Luigi games. Make sure to modify the paths at the top of each script as necessary! Files need to be decompressed/recompressed using Backwards LZ77. I found Kuriimu2 to work well for Backwards LZ77.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import struct | |
| import json | |
| import os | |
| # MODIFY THESE | |
| archive_file = "./BAI.dat" | |
| out_dir = "./BAI_CONTENTS/" | |
| entries_json = "./entries.json" | |
| # MODIFY THESE | |
| if not os.path.exists(out_dir): | |
| os.mkdir(out_dir) | |
| header_magic = b"BG4\0" | |
| header_version_0105_ssdx = 0x0105 # 1.5 (SSDX) | |
| header_version_0104_ssdx = 0x0104 # 1.4 (SSDX) | |
| header_version_0305_bisdx = 0x0305 # 3.5 (BIS) | |
| header_file_entry_count = 0 | |
| header_meta_sec_size = 0 | |
| header_file_entry_count_derived = 0 | |
| header_file_entry_count_multiplier = 1 | |
| entry_file_offset = 0 | |
| entry_file_size = 0 | |
| entry_name_hash = 0 | |
| entry_name_offset = 0 | |
| with open(archive_file, "rb") as f: | |
| # Header | |
| magic = f.read(4) | |
| if magic != header_magic: | |
| print("Invalid header magic") | |
| exit(1) | |
| version = struct.unpack("<H", f.read(2))[0] | |
| if version not in [header_version_0105_ssdx, header_version_0104_ssdx, header_version_0305_bisdx]: | |
| print("Invalid header version") | |
| exit(1) | |
| file_entry_count = struct.unpack("<H", f.read(2))[0] | |
| meta_sec_size = struct.unpack("<I", f.read(4))[0] | |
| file_entry_count_derived = struct.unpack("<H", f.read(2))[0] | |
| file_entry_count_multiplier = struct.unpack("<H", f.read(2))[0] | |
| print("Header:") | |
| print(" File entry count: %d" % file_entry_count) | |
| print(" Meta section size: %d" % meta_sec_size) | |
| print(" Derived file entry count: %d" % file_entry_count_derived) | |
| print(" File entry count multiplier: %d" % file_entry_count_multiplier) | |
| # File entries | |
| entries = [] | |
| for i in range(file_entry_count): | |
| entry = {} | |
| entry["offset"] = f.tell() | |
| entry["file_offset"] = struct.unpack("<I", f.read(4))[0] & 0x7FFFFFFF | |
| entry["file_size"] = struct.unpack("<I", f.read(4))[0] & 0x7FFFFFFF | |
| entry["name_hash"] = struct.unpack("<I", f.read(4))[0] | |
| entry["name_offset"] = struct.unpack("<H", f.read(2))[0] | |
| entries.append(entry) | |
| if entry["name_offset"] == 0x087C or entry["name_offset"] == 0x0891: | |
| print(f'{hex(entry["offset"])}: {entry}') | |
| earliest_name = 100000000 | |
| origin = f.tell() | |
| # File names | |
| names = [] | |
| for i in range(file_entry_count): | |
| f.seek(origin + entries[i]["name_offset"]) | |
| earliest_name = min(earliest_name, f.tell()) | |
| name = "" | |
| while True: | |
| c = f.read(1) | |
| if c == b"\0": | |
| break | |
| name += c.decode("ascii") | |
| names.append(name) | |
| entries[i]["name"] = name | |
| furthest = 0 | |
| earliest_file = 100000000 | |
| # Extract files | |
| for i in range(file_entry_count): | |
| f.seek(entries[i]["file_offset"]) | |
| earliest_file = min(earliest_file, entries[i]["file_offset"]) | |
| data = f.read(entries[i]["file_size"]) | |
| furthest = max(furthest, f.tell()) | |
| with open(out_dir + names[i], "wb") as out: | |
| out.write(data) | |
| with open(entries_json, "w") as out: | |
| json.dump(entries, out, indent=4) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import struct | |
| import json | |
| # MODIFY THESE | |
| # data for new archive | |
| in_dir = "./BAI_RECOMPRESSED/" | |
| out_archive_file = "./BAI.dat_RECOMPRESS" | |
| # data obtained from the original archive and bg4_extract script | |
| entries_json = "./entries.json" | |
| original_archive_file = "./BAI.dat" | |
| # MODIFY THESE | |
| header_magic = b"BG4\0" | |
| header_version = 0x0305 # 3.5 (BIS) | |
| header_file_entry_count = 0 | |
| header_meta_sec_size = 0 | |
| header_file_entry_count_derived = 0 | |
| header_file_entry_count_multiplier = 1 | |
| entry_file_offset = 0 | |
| entry_file_size = 0 | |
| entry_name_hash = 0 | |
| entry_name_offset = 0 | |
| with open(entries_json, "r") as f: | |
| entries = json.load(f) | |
| with open(original_archive_file, "rb") as f: | |
| f.seek(8) | |
| header_end = struct.unpack("<H", f.read(2))[0] | |
| f.seek(0) | |
| data = f.read(header_end) | |
| with open(out_archive_file, "wb") as f: | |
| f.write(data) | |
| current = f.tell() | |
| f.seek(current) | |
| for entry in entries: | |
| file_offset = f.tell() | |
| with open(in_dir + entry["name"], "rb") as g: | |
| f.write(g.read()) | |
| file_size = f.tell() - file_offset | |
| current = f.tell() | |
| f.seek(entry["offset"]) | |
| f.write(struct.pack("<I", file_offset | 0x80000000)) | |
| f.write(struct.pack("<I", file_size)) | |
| f.seek(current) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import shutil | |
| import os | |
| def move(src, dest): | |
| if not os.path.exists(dest): | |
| os.mkdir(dest) | |
| for file in os.listdir(src): | |
| if file.endswith(".out"): | |
| shutil.move(src + file, dest + file[:-4]) | |
| # MODIFY THIS (src, dest) | |
| move("./BAI_CONTENTS/", "./BAI_DECOMPRESSED/") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment