Yanis002/stringtable.py

## stringtable.py
#!/usr/bin/env python3

import hashlib

from pathlib import Path
from dataclasses import dataclass

# Methods of StringTable the user can use:
# - append() to add an element
# - insert() to insert an element
# - remove() to remove an element
# - edit() to edit the string of an element
# - clear() to reset the table
# - to_bytes() to get the bytes to write
# - from_file() to get a table from an existing file

# the size of the header
HEADER_SIZE = 0x20

# the maximum length of a line vc can draw
LINE_LENGTH = 60

# the table id for the error message (always the same value)
TID_ERRORS = 0x2842C987

# the total length of the encoding name
ENCODING_NAME_LENGTH = 18

# the size of an entry, shouldn't change
ENTRY_SIZE = 0x10

class String(str):
    """Custom str class to handle bytes conversion"""

    def to_bytes(self, encoding: str = "utf-8", add_extras: bool = False, auto_newlines: bool = False):
        """convert strings to bytes (aligned to 0x4)"""

        # add a newline every N characters, N being the value of LINE_LENGTH
        if auto_newlines:
            self = "\n".join(self[i:i + LINE_LENGTH] for i in range(0, len(self), LINE_LENGTH))

        out_bytes = bytearray(self, encoding)

        if add_extras:
            # add the end-of-string char
            if 0x00 not in out_bytes:
                out_bytes.append(0x00)

            # add alignment bytes if necessary
            while len(bytes(out_bytes)) % 4:
                out_bytes.append(0xBB)

        return bytes(out_bytes)


@dataclass
class STEntry:
    """Defines an entry of the string table"""

    nStringID: int # unique identifier, new values are a md5 hash of the string
    nTextOffset1: int # offset to the string
    nTextOffset2: int # same as above
    nTextSize1: int # size of the string (not counting the '\0' char)
    nTextSize2: int # same as above

    def to_bytes(self):
        output = bytearray()
        output.extend(self.nStringID.to_bytes(4, byteorder="big"))
        output.extend(self.nTextOffset1.to_bytes(4, byteorder="big"))
        output.extend(self.nTextOffset2.to_bytes(4, byteorder="big"))
        output.extend(self.nTextSize1.to_bytes(2, byteorder="big"))
        output.extend(self.nTextSize2.to_bytes(2, byteorder="big"))
        return bytes(output)


class STHeader:
    """Defines the header of the string table"""

    def __init__(
            self,
            magic: String = String("ST10"), # the "version" of the format
            eTableID: int = TID_ERRORS, # unique identifier of the error table (should stay the same)
            nEntries: int = int(), # the number of entries of the table
            szEncoding: String = String("utf-8" + "\x00" * (ENCODING_NAME_LENGTH - 5)), # the name of the encoding used
            code: String = String("en"), # the language, the original tool called it "code" according to left-over config files
            nSizeEntry: int = ENTRY_SIZE, # the size of an entry
            unk1F: int = 0xC0, # unknown, seems to stay at 0xC0, always
    ):
        self.magic = magic
        self.eTableID = eTableID
        self.nEntries = nEntries
        self.szEncoding = szEncoding
        self.code = code
        self.nSizeEntry = nSizeEntry
        self.unk1F = unk1F
        self.entries: list[STEntry] = []

    def validate(self):
        if len(self.entries) == 0:
            raise ValueError("ERROR: No entries found.")

    def to_bytes(self, encoding: str = "utf-8"):
        output = bytearray()
        output.extend(self.magic.to_bytes(encoding))
        output.extend(self.eTableID.to_bytes(4, byteorder="big"))
        output.extend(self.nEntries.to_bytes(2, byteorder="big"))
        output.extend(self.szEncoding.to_bytes(encoding))
        output.extend(self.code.to_bytes(encoding))
        output.extend(self.nSizeEntry.to_bytes(1, byteorder="big"))
        output.extend(self.unk1F.to_bytes(1, byteorder="big"))
        for entry in self.entries:
            output.extend(entry.to_bytes())
        return bytes(output)


class StringTable:
    """Defines the string table, following the ST10 format"""

    def __init__(self, strings: list[str | String] = list()):
        self.header: STHeader = STHeader()
        self.szStrings: list[String] = []

        for string in strings:
            if isinstance(string, str):
                string = String(string)
            self.append(string)

    def get_entries_offset(self):
        return HEADER_SIZE

    def get_strings_offset(self):
        return self.header.nEntries * self.header.nSizeEntry + HEADER_SIZE

    def get_encoding(self):
        return "shift-jis" if self.header.code == "jp" else "utf-8"

    def get_new_id(self, string: str):
        id = int(hashlib.md5(string.encode(self.get_encoding())).hexdigest(), 16) % 10**8
        for entry in self.header.entries:
            if entry.nStringID == id:
                print(f"WARNING: this ID already exists! ('0x{id:08X}')")
        return id

    def get_offset(self, prev_offset: int, index: int):
        # the new offset is the offset of the previous entry + the size of the previous string
        # assuming it's not the first entry, else it's simply the offset of the start of the strings
        if index > 0:
            return prev_offset + len(self.szStrings[index - 1].to_bytes(self.get_encoding(), True))
        else:
            return self.get_strings_offset()

    def update(self):
        self.header.nEntries = len(self.header.entries)

        for i, (string, entry) in enumerate(zip(self.szStrings, self.header.entries)):
            prev_offset = self.header.entries[i - 1].nTextOffset1 if i > 0 else 0
            entry.nTextOffset1 = entry.nTextOffset2 = self.get_offset(prev_offset, i)
            entry.nTextSize1 = entry.nTextSize2 = len(string)

    def append(self, string: str | String):
        if isinstance(string, str):
            string = String(string)

        index = len(self.header.entries) - 1
        prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0

        self.header.entries.append(
            STEntry(
                self.get_new_id(string),
                self.get_offset(prev_offset, index),
                self.get_offset(prev_offset, index),
                len(string),
                len(string),
            )
        )

        self.szStrings.append(string)

    def insert(self, index: int, string: str | String):
        if isinstance(string, str):
            string = String(string)

        prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0

        self.header.entries.insert(
            index,
            STEntry(
                self.get_new_id(string),
                self.get_offset(prev_offset, index),
                self.get_offset(prev_offset, index),
                len(string),
                len(string),
            )
        )

        self.szStrings.insert(index, string)
        self.update()

    def remove(self, index: int):
        self.header.entries.pop(index)
        self.szStrings.pop(index)
        self.update()

    def edit(self, index: int, new_string: str | String):
        if isinstance(new_string, str):
            new_string = String(new_string)

        self.szStrings[index] = new_string
        self.update()

    def clear(self):
        self.header.nEntries = 0
        self.header.entries.clear()
        self.szStrings.clear()

    def validate(self):
        self.header.validate()

        if len(self.szStrings) == 0:
            raise ValueError("ERROR: No strings found.")

    def to_bytes(self, auto_newlines: bool = False):
        output = bytearray()

        self.validate()
        self.update()
        output.extend(self.header.to_bytes(self.get_encoding()))

        for string in self.szStrings:
            output.extend(string.to_bytes(self.get_encoding(), True, auto_newlines))

        return bytes(output)

    @staticmethod
    def from_file(path: Path):
        with path.open("rb") as file:
            data = file.read()

        new_table = StringTable()

        new_table.header = STHeader(
            String(data[0x00:0x04].decode()),
            int.from_bytes(data[0x04:0x08], byteorder="big"),
            int.from_bytes(data[0x08:0x0A], byteorder="big"),
            String(data[0x0A:0x1C].decode()),
            String(data[0x1C:0x1E].decode()),
            int.from_bytes(data[0x1E:0x1F], byteorder="big"),
            int.from_bytes(data[0x1F:0x20], byteorder="big"),
        )

        if new_table.header.magic != "ST10":
            raise ValueError("ERROR: This file is not compatible.")

        for i in range(new_table.header.nEntries):
            offset = i * new_table.header.nSizeEntry + new_table.get_entries_offset()

            new_table.header.entries.append(
                STEntry(
                    int.from_bytes(data[offset + 0x00:offset + 0x04], byteorder="big"),
                    int.from_bytes(data[offset + 0x04:offset + 0x08], byteorder="big"),
                    int.from_bytes(data[offset + 0x08:offset + 0x0C], byteorder="big"),
                    int.from_bytes(data[offset + 0x0C:offset + 0x0E], byteorder="big"),
                    int.from_bytes(data[offset + 0x0E:offset + 0x10], byteorder="big"),
                )
            )
        assert len(new_table.header.entries) == new_table.header.nEntries

        j = 0
        str_bytes = bytes()
        for i, byte in enumerate(data[new_table.get_strings_offset():]):
            if j < len(new_table.header.entries) and new_table.header.entries[j].nTextSize1 == 0:
                new_table.szStrings.append(String(""))
                j += 1
            else:
                if byte == 0x00 and len(str_bytes) == 0:
                    continue

                if byte == 0xBB and len(str_bytes) == 0:
                    continue

                if byte == 0x00 or data[new_table.get_strings_offset() + i + 1] == 0xBB:
                    if len(str_bytes) > 0:
                        new_table.szStrings.append(String(str_bytes.decode(new_table.get_encoding())))
                        j += 1
                    str_bytes = bytes()
                    continue

                str_bytes += byte.to_bytes(byteorder="big")
        assert len(new_table.szStrings) == new_table.header.nEntries

        return new_table

if __name__ == "__main__":
    # new table example
    new_table = StringTable(["abc", "123", "def", "456"])
    with Path("new_table.bin").resolve().open("wb") as file:
        file.write(new_table.to_bytes())

    # existing table example
    Errors_VC64ErrorStrings_en = StringTable.from_file(Path("Errors_VC64ErrorStrings_en.bin").resolve())
    Errors_VC64ErrorStrings_en.edit(1, "Hello World!")
    with Path("NEW_Errors_VC64ErrorStrings_en.bin").resolve().open("wb") as file:
        file.write(Errors_VC64ErrorStrings_en.to_bytes(True))
	#!/usr/bin/env python3

	import hashlib

	from pathlib import Path
	from dataclasses import dataclass

	# Methods of StringTable the user can use:
	# - append() to add an element
	# - insert() to insert an element
	# - remove() to remove an element
	# - edit() to edit the string of an element
	# - clear() to reset the table
	# - to_bytes() to get the bytes to write
	# - from_file() to get a table from an existing file

	# the size of the header
	HEADER_SIZE = 0x20

	# the maximum length of a line vc can draw
	LINE_LENGTH = 60

	# the table id for the error message (always the same value)
	TID_ERRORS = 0x2842C987

	# the total length of the encoding name
	ENCODING_NAME_LENGTH = 18

	# the size of an entry, shouldn't change
	ENTRY_SIZE = 0x10

	class String(str):
	"""Custom str class to handle bytes conversion"""

	def to_bytes(self, encoding: str = "utf-8", add_extras: bool = False, auto_newlines: bool = False):
	"""convert strings to bytes (aligned to 0x4)"""

	# add a newline every N characters, N being the value of LINE_LENGTH
	if auto_newlines:
	self = "\n".join(self[i:i + LINE_LENGTH] for i in range(0, len(self), LINE_LENGTH))

	out_bytes = bytearray(self, encoding)

	if add_extras:
	# add the end-of-string char
	if 0x00 not in out_bytes:
	out_bytes.append(0x00)

	# add alignment bytes if necessary
	while len(bytes(out_bytes)) % 4:
	out_bytes.append(0xBB)

	return bytes(out_bytes)


	@dataclass
	class STEntry:
	"""Defines an entry of the string table"""

	nStringID: int # unique identifier, new values are a md5 hash of the string
	nTextOffset1: int # offset to the string
	nTextOffset2: int # same as above
	nTextSize1: int # size of the string (not counting the '\0' char)
	nTextSize2: int # same as above

	def to_bytes(self):
	output = bytearray()
	output.extend(self.nStringID.to_bytes(4, byteorder="big"))
	output.extend(self.nTextOffset1.to_bytes(4, byteorder="big"))
	output.extend(self.nTextOffset2.to_bytes(4, byteorder="big"))
	output.extend(self.nTextSize1.to_bytes(2, byteorder="big"))
	output.extend(self.nTextSize2.to_bytes(2, byteorder="big"))
	return bytes(output)


	class STHeader:
	"""Defines the header of the string table"""

	def __init__(
	self,
	magic: String = String("ST10"), # the "version" of the format
	eTableID: int = TID_ERRORS, # unique identifier of the error table (should stay the same)
	nEntries: int = int(), # the number of entries of the table
	szEncoding: String = String("utf-8" + "\x00" * (ENCODING_NAME_LENGTH - 5)), # the name of the encoding used
	code: String = String("en"), # the language, the original tool called it "code" according to left-over config files
	nSizeEntry: int = ENTRY_SIZE, # the size of an entry
	unk1F: int = 0xC0, # unknown, seems to stay at 0xC0, always
	):
	self.magic = magic
	self.eTableID = eTableID
	self.nEntries = nEntries
	self.szEncoding = szEncoding
	self.code = code
	self.nSizeEntry = nSizeEntry
	self.unk1F = unk1F
	self.entries: list[STEntry] = []

	def validate(self):
	if len(self.entries) == 0:
	raise ValueError("ERROR: No entries found.")

	def to_bytes(self, encoding: str = "utf-8"):
	output = bytearray()
	output.extend(self.magic.to_bytes(encoding))
	output.extend(self.eTableID.to_bytes(4, byteorder="big"))
	output.extend(self.nEntries.to_bytes(2, byteorder="big"))
	output.extend(self.szEncoding.to_bytes(encoding))
	output.extend(self.code.to_bytes(encoding))
	output.extend(self.nSizeEntry.to_bytes(1, byteorder="big"))
	output.extend(self.unk1F.to_bytes(1, byteorder="big"))
	for entry in self.entries:
	output.extend(entry.to_bytes())
	return bytes(output)


	class StringTable:
	"""Defines the string table, following the ST10 format"""

	def __init__(self, strings: list[str \| String] = list()):
	self.header: STHeader = STHeader()
	self.szStrings: list[String] = []

	for string in strings:
	if isinstance(string, str):
	string = String(string)
	self.append(string)

	def get_entries_offset(self):
	return HEADER_SIZE

	def get_strings_offset(self):
	return self.header.nEntries * self.header.nSizeEntry + HEADER_SIZE

	def get_encoding(self):
	return "shift-jis" if self.header.code == "jp" else "utf-8"

	def get_new_id(self, string: str):
	id = int(hashlib.md5(string.encode(self.get_encoding())).hexdigest(), 16) % 10**8
	for entry in self.header.entries:
	if entry.nStringID == id:
	print(f"WARNING: this ID already exists! ('0x{id:08X}')")
	return id

	def get_offset(self, prev_offset: int, index: int):
	# the new offset is the offset of the previous entry + the size of the previous string
	# assuming it's not the first entry, else it's simply the offset of the start of the strings
	if index > 0:
	return prev_offset + len(self.szStrings[index - 1].to_bytes(self.get_encoding(), True))
	else:
	return self.get_strings_offset()

	def update(self):
	self.header.nEntries = len(self.header.entries)

	for i, (string, entry) in enumerate(zip(self.szStrings, self.header.entries)):
	prev_offset = self.header.entries[i - 1].nTextOffset1 if i > 0 else 0
	entry.nTextOffset1 = entry.nTextOffset2 = self.get_offset(prev_offset, i)
	entry.nTextSize1 = entry.nTextSize2 = len(string)

	def append(self, string: str \| String):
	if isinstance(string, str):
	string = String(string)

	index = len(self.header.entries) - 1
	prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0

	self.header.entries.append(
	STEntry(
	self.get_new_id(string),
	self.get_offset(prev_offset, index),
	self.get_offset(prev_offset, index),
	len(string),
	len(string),
	)
	)

	self.szStrings.append(string)

	def insert(self, index: int, string: str \| String):
	if isinstance(string, str):
	string = String(string)

	prev_offset = self.header.entries[index - 1].nTextOffset1 if index > 0 else 0

	self.header.entries.insert(
	index,
	STEntry(
	self.get_new_id(string),
	self.get_offset(prev_offset, index),
	self.get_offset(prev_offset, index),
	len(string),
	len(string),
	)
	)

	self.szStrings.insert(index, string)
	self.update()

	def remove(self, index: int):
	self.header.entries.pop(index)
	self.szStrings.pop(index)
	self.update()

	def edit(self, index: int, new_string: str \| String):
	if isinstance(new_string, str):
	new_string = String(new_string)

	self.szStrings[index] = new_string
	self.update()

	def clear(self):
	self.header.nEntries = 0
	self.header.entries.clear()
	self.szStrings.clear()

	def validate(self):
	self.header.validate()

	if len(self.szStrings) == 0:
	raise ValueError("ERROR: No strings found.")

	def to_bytes(self, auto_newlines: bool = False):
	output = bytearray()

	self.validate()
	self.update()
	output.extend(self.header.to_bytes(self.get_encoding()))

	for string in self.szStrings:
	output.extend(string.to_bytes(self.get_encoding(), True, auto_newlines))

	return bytes(output)

	@staticmethod
	def from_file(path: Path):
	with path.open("rb") as file:
	data = file.read()

	new_table = StringTable()

	new_table.header = STHeader(
	String(data[0x00:0x04].decode()),
	int.from_bytes(data[0x04:0x08], byteorder="big"),
	int.from_bytes(data[0x08:0x0A], byteorder="big"),
	String(data[0x0A:0x1C].decode()),
	String(data[0x1C:0x1E].decode()),
	int.from_bytes(data[0x1E:0x1F], byteorder="big"),
	int.from_bytes(data[0x1F:0x20], byteorder="big"),
	)

	if new_table.header.magic != "ST10":
	raise ValueError("ERROR: This file is not compatible.")

	for i in range(new_table.header.nEntries):
	offset = i * new_table.header.nSizeEntry + new_table.get_entries_offset()

	new_table.header.entries.append(
	STEntry(
	int.from_bytes(data[offset + 0x00:offset + 0x04], byteorder="big"),
	int.from_bytes(data[offset + 0x04:offset + 0x08], byteorder="big"),
	int.from_bytes(data[offset + 0x08:offset + 0x0C], byteorder="big"),
	int.from_bytes(data[offset + 0x0C:offset + 0x0E], byteorder="big"),
	int.from_bytes(data[offset + 0x0E:offset + 0x10], byteorder="big"),
	)
	)
	assert len(new_table.header.entries) == new_table.header.nEntries

	j = 0
	str_bytes = bytes()
	for i, byte in enumerate(data[new_table.get_strings_offset():]):
	if j < len(new_table.header.entries) and new_table.header.entries[j].nTextSize1 == 0:
	new_table.szStrings.append(String(""))
	j += 1
	else:
	if byte == 0x00 and len(str_bytes) == 0:
	continue

	if byte == 0xBB and len(str_bytes) == 0:
	continue

	if byte == 0x00 or data[new_table.get_strings_offset() + i + 1] == 0xBB:
	if len(str_bytes) > 0:
	new_table.szStrings.append(String(str_bytes.decode(new_table.get_encoding())))
	j += 1
	str_bytes = bytes()
	continue

	str_bytes += byte.to_bytes(byteorder="big")
	assert len(new_table.szStrings) == new_table.header.nEntries

	return new_table

	if __name__ == "__main__":
	# new table example
	new_table = StringTable(["abc", "123", "def", "456"])
	with Path("new_table.bin").resolve().open("wb") as file:
	file.write(new_table.to_bytes())

	# existing table example
	Errors_VC64ErrorStrings_en = StringTable.from_file(Path("Errors_VC64ErrorStrings_en.bin").resolve())
	Errors_VC64ErrorStrings_en.edit(1, "Hello World!")
	with Path("NEW_Errors_VC64ErrorStrings_en.bin").resolve().open("wb") as file:
	file.write(Errors_VC64ErrorStrings_en.to_bytes(True))
No results found