Last active
September 13, 2024 16:46
-
-
Save Yanis002/22b56716e4d6e37c83e035d0bd321a46 to your computer and use it in GitHub Desktop.
ST10 string table parser/editor for the wii virtual console emulator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import hashlib | |
| from pathlib import Path | |
| from dataclasses import dataclass | |
| # Methods of StringTable the user can use: | |
| # - append() to add an element | |
| # - insert() to insert an element | |
| # - remove() to remove an element | |
| # - edit() to edit the string of an element | |
| # - clear() to reset the table | |
| # - to_bytes() to get the bytes to write | |
| # - from_file() to get a table from an existing file | |
| # the size of the header | |
| HEADER_SIZE = 0x20 | |
| # the maximum length of a line vc can draw | |
| LINE_LENGTH = 60 | |
| # the table id for the error message (always the same value) | |
| TID_ERRORS = 0x2842C987 | |
| # the total length of the encoding name | |
| ENCODING_NAME_LENGTH = 18 | |
| # the size of an entry, shouldn't change | |
| ENTRY_SIZE = 0x10 | |
| class String(str): | |
| """Custom str class to handle bytes conversion""" | |
| def to_bytes(self, encoding: str = "utf-8", add_extras: bool = False, auto_newlines: bool = False): | |
| """convert strings to bytes (aligned to 0x4)""" | |
| # add a newline every N characters, N being the value of LINE_LENGTH | |
| if auto_newlines: | |
| self = "\n".join(self[i:i + LINE_LENGTH] for i in range(0, len(self), LINE_LENGTH)) | |
| out_bytes = bytearray(self, encoding) | |
| if add_extras: | |
| # add the end-of-string char | |
| if 0x00 not in out_bytes: | |
| out_bytes.append(0x00) | |
| # add alignment bytes if necessary | |
| while len(bytes(out_bytes)) % 4: | |
| out_bytes.append(0xBB) | |
| return bytes(out_bytes) | |
| @dataclass | |
| class STEntry: | |
| """Defines an entry of the string table""" | |
| nStringID: int # unique identifier, new values are a md5 hash of the string | |
| nTextOffset1: int # offset to the string | |
| nTextOffset2: int # same as above | |
| nTextSize1: int # size of the string (not counting the '\0' char) | |
| nTextSize2: int # same as above | |
| def to_bytes(self): | |
| output = bytearray() | |
| output.extend(self.nStringID.to_bytes(4, byteorder="big")) | |
| output.extend(self.nTextOffset1.to_bytes(4, byteorder="big")) | |
| output.extend(self.nTextOffset2.to_bytes(4, byteorder="big")) | |
| output.extend(self.nTextSize1.to_bytes(2, byteorder="big")) | |
| output.extend(self.nTextSize2.to_bytes(2, byteorder="big")) | |
| return bytes(output) | |
| class STHeader: | |
| """Defines the header of the string table""" | |
| def __init__( | |
| self, | |
| magic: String = String("ST10"), # the "version" of the format | |
| eTableID: int = TID_ERRORS, # unique identifier of the error table (should stay the same) | |
| nEntries: int = int(), # the number of entries of the table | |
| szEncoding: String = String("utf-8" + "\x00" * (ENCODING_NAME_LENGTH - 5)), # the name of the encoding used | |
| code: String = String("en"), # the language, the original tool called it "code" according to left-over config files | |
| nSizeEntry: int = ENTRY_SIZE, # the size of an entry | |
| unk1F: int = 0xC0, # unknown, seems to stay at 0xC0, always | |
| ): | |
| self.magic = magic | |
| self.eTableID = eTableID | |
| self.nEntries = nEntries | |
| self.szEncoding = szEncoding | |
| self.code = code | |
| self.nSizeEntry = nSizeEntry | |
| self.unk1F = unk1F | |
| self.entries: list[STEntry] = [] | |
| def validate(self): | |
| if len(self.entries) == 0: | |
| raise ValueError("ERROR: No entries found.") | |
| def to_bytes(self, encoding: str = "utf-8"): | |
| output = bytearray() | |
| output.extend(self.magic.to_bytes(encoding)) | |
| output.extend(self.eTableID.to_bytes(4, byteorder="big")) | |
| output.extend(self.nEntries.to_bytes(2, byteorder="big")) | |
| output.extend(self.szEncoding.to_bytes(encoding)) | |
| output.extend(self.code.to_bytes(encoding)) | |
| output.extend(self.nSizeEntry.to_bytes(1, byteorder="big")) | |
| output.extend(self.unk1F.to_bytes(1, byteorder="big")) | |
| for entry in self.entries: | |
| output.extend(entry.to_bytes()) | |
| return bytes(output) | |
| class StringTable: | |
| """Defines the string table, following the ST10 format""" | |
| def __init__(self, strings: list[str | String] = list()): | |
| self.header: STHeader = STHeader() | |
| self.szStrings: list[String] = [] | |
| for string in strings: | |
| if isinstance(string, str): | |
| string = String(string) | |
| self.append(string) | |
| def get_entries_offset(self): | |
| return HEADER_SIZE | |
| def get_strings_offset(self): | |
| return self.header.nEntries * self.header.nSizeEntry + HEADER_SIZE | |
| def get_encoding(self): | |
| return "shift-jis" if self.header.code == "jp" else "utf-8" | |
| def get_new_id(self, string: str): | |
| id = int(hashlib.md5(string.encode(self.get_encoding())).hexdigest(), 16) % 10**8 | |
| for entry in self.header.entries: | |
| if entry.nStringID == id: | |
| print(f"WARNING: this ID already exists! ('0x{id:08X}')") | |
| return id | |
| def get_offset(self, prev_offset: int, index: int): | |
| # the new offset is the offset of the previous entry + the size of the previous string | |
| # assuming it's not the first entry, else it's simply the offset of the start of the strings | |
| if index > 0: | |
| return prev_offset + len(self.szStrings[index - 1].to_bytes(self.get_encoding(), True)) | |
| else: | |
| return self.get_strings_offset() | |
| def update(self): | |
| self.header.nEntries = len(self.header.entries) | |
| for i, (string, entry) in enumerate(zip(self.szStrings, self.header.entries)): | |
| prev_offset = self.header.entries[i - 1].nTextOffset1 if i > 0 else 0 | |
| entry.nTextOffset1 = entry.nTextOffset2 = self.get_offset(prev_offset, i) | |
| entry.nTextSize1 = entry.nTextSize2 = len(string) | |
| def append(self, string: str | String): | |
| if isinstance(string, str): | |
| string = String(string) | |
| index = len(self.header.entries) - 1 | |
| prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0 | |
| self.header.entries.append( | |
| STEntry( | |
| self.get_new_id(string), | |
| self.get_offset(prev_offset, index), | |
| self.get_offset(prev_offset, index), | |
| len(string), | |
| len(string), | |
| ) | |
| ) | |
| self.szStrings.append(string) | |
| def insert(self, index: int, string: str | String): | |
| if isinstance(string, str): | |
| string = String(string) | |
| prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0 | |
| self.header.entries.insert( | |
| index, | |
| STEntry( | |
| self.get_new_id(string), | |
| self.get_offset(prev_offset, index), | |
| self.get_offset(prev_offset, index), | |
| len(string), | |
| len(string), | |
| ) | |
| ) | |
| self.szStrings.insert(index, string) | |
| self.update() | |
| def remove(self, index: int): | |
| self.header.entries.pop(index) | |
| self.szStrings.pop(index) | |
| self.update() | |
| def edit(self, index: int, new_string: str | String): | |
| if isinstance(new_string, str): | |
| new_string = String(new_string) | |
| self.szStrings[index] = new_string | |
| self.update() | |
| def clear(self): | |
| self.header.nEntries = 0 | |
| self.header.entries.clear() | |
| self.szStrings.clear() | |
| def validate(self): | |
| self.header.validate() | |
| if len(self.szStrings) == 0: | |
| raise ValueError("ERROR: No strings found.") | |
| def to_bytes(self, auto_newlines: bool = False): | |
| output = bytearray() | |
| self.validate() | |
| self.update() | |
| output.extend(self.header.to_bytes(self.get_encoding())) | |
| for string in self.szStrings: | |
| output.extend(string.to_bytes(self.get_encoding(), True, auto_newlines)) | |
| return bytes(output) | |
| @staticmethod | |
| def from_file(path: Path): | |
| with path.open("rb") as file: | |
| data = file.read() | |
| new_table = StringTable() | |
| new_table.header = STHeader( | |
| String(data[0x00:0x04].decode()), | |
| int.from_bytes(data[0x04:0x08], byteorder="big"), | |
| int.from_bytes(data[0x08:0x0A], byteorder="big"), | |
| String(data[0x0A:0x1C].decode()), | |
| String(data[0x1C:0x1E].decode()), | |
| int.from_bytes(data[0x1E:0x1F], byteorder="big"), | |
| int.from_bytes(data[0x1F:0x20], byteorder="big"), | |
| ) | |
| if new_table.header.magic != "ST10": | |
| raise ValueError("ERROR: This file is not compatible.") | |
| for i in range(new_table.header.nEntries): | |
| offset = i * new_table.header.nSizeEntry + new_table.get_entries_offset() | |
| new_table.header.entries.append( | |
| STEntry( | |
| int.from_bytes(data[offset + 0x00:offset + 0x04], byteorder="big"), | |
| int.from_bytes(data[offset + 0x04:offset + 0x08], byteorder="big"), | |
| int.from_bytes(data[offset + 0x08:offset + 0x0C], byteorder="big"), | |
| int.from_bytes(data[offset + 0x0C:offset + 0x0E], byteorder="big"), | |
| int.from_bytes(data[offset + 0x0E:offset + 0x10], byteorder="big"), | |
| ) | |
| ) | |
| assert len(new_table.header.entries) == new_table.header.nEntries | |
| j = 0 | |
| str_bytes = bytes() | |
| for i, byte in enumerate(data[new_table.get_strings_offset():]): | |
| if j < len(new_table.header.entries) and new_table.header.entries[j].nTextSize1 == 0: | |
| new_table.szStrings.append(String("")) | |
| j += 1 | |
| else: | |
| if byte == 0x00 and len(str_bytes) == 0: | |
| continue | |
| if byte == 0xBB and len(str_bytes) == 0: | |
| continue | |
| if byte == 0x00 or data[new_table.get_strings_offset() + i + 1] == 0xBB: | |
| if len(str_bytes) > 0: | |
| new_table.szStrings.append(String(str_bytes.decode(new_table.get_encoding()))) | |
| j += 1 | |
| str_bytes = bytes() | |
| continue | |
| str_bytes += byte.to_bytes(byteorder="big") | |
| assert len(new_table.szStrings) == new_table.header.nEntries | |
| return new_table | |
| if __name__ == "__main__": | |
| # new table example | |
| new_table = StringTable(["abc", "123", "def", "456"]) | |
| with Path("new_table.bin").resolve().open("wb") as file: | |
| file.write(new_table.to_bytes()) | |
| # existing table example | |
| Errors_VC64ErrorStrings_en = StringTable.from_file(Path("Errors_VC64ErrorStrings_en.bin").resolve()) | |
| Errors_VC64ErrorStrings_en.edit(1, "Hello World!") | |
| with Path("NEW_Errors_VC64ErrorStrings_en.bin").resolve().open("wb") as file: | |
| file.write(Errors_VC64ErrorStrings_en.to_bytes(True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment