Skip to content

Instantly share code, notes, and snippets.

@YOCKOW
Created September 26, 2025 07:09
Show Gist options
  • Select an option

  • Save YOCKOW/2af7a44937536aedea92f11d2c573ec7 to your computer and use it in GitHub Desktop.

Select an option

Save YOCKOW/2af7a44937536aedea92f11d2c573ec7 to your computer and use it in GitHub Desktop.
Sample script for SF-0033.
#!/usr/bin/python3
import re
from typing import List, Optional
import urllib.request as request
import xml.etree.ElementTree as ElemTree
REQUIRED_CHARSET_NAMES: List[str] = [
"UTF-8",
"US-ASCII",
"EUC-JP",
"ISO-8859-1",
"Shift_JIS",
"ISO-8859-2",
"UTF-16",
"windows-1251",
"windows-1252",
"windows-1253",
"windows-1254",
"windows-1250",
"ISO-2022-JP",
"macintosh",
"UTF-16BE",
"UTF-16LE",
"UTF-32",
"UTF-32BE",
"UTF-32LE",
]
CHARSETS_XML_URL = "https://www.iana.org/assignments/character-sets/character-sets.xml"
CHARSETS_XML_STRING = request.urlopen(request.Request(CHARSETS_XML_URL)).read()
CHARSETS_XML_ROOT = ElemTree.fromstring(CHARSETS_XML_STRING)
CHARSETS_XML_NS = "http://www.iana.org/assignments"
CHARSETS_XML_RECORD_ELEMENTS = CHARSETS_XML_ROOT.findall(
"./{%s}registry/{%s}record" %(CHARSETS_XML_NS, CHARSETS_XML_NS)
)
SWIFT_CODE_INDENT = " "
class IANACharsetNameRecord:
def __init__(self, recordElem: ElemTree.Element):
self._name: str = recordElem.find('./{%s}name' %(CHARSETS_XML_NS)).text
self._preferredMIMEName: Optional[str] = getattr(recordElem.find('./{%s}preferred_alias' %(CHARSETS_XML_NS)), 'text', None)
self._aliases: List[str] = list(map(
lambda aliasElem: aliasElem.text,
recordElem.findall('./{%s}alias' %(CHARSETS_XML_NS))
))
self._camelCasedName = None
@property
def name(self) -> str:
return self._name
@property
def preferredMIMEName(self) -> Optional[str]:
return self._preferredMIMEName
@property
def representativeName(self) -> str:
return self.preferredMIMEName or self.name
@property
def aliases(self) -> List[str]:
return self._aliases
@property
def camelCasedName(self) -> str:
if (self._camelCasedName is not None):
return self._camelCasedName
camelCasedName = ""
previousWord = None
for ii, word in enumerate(re.split(r"[^0-9A-Za-z]", self.representativeName)):
if (ii == 0):
camelCasedName = word.lower()
else:
if re.search(r"[0-9]$", previousWord) and re.search(r"^[0-9]", word):
camelCasedName += "_"
if (re.fullmatch("[A-Z]+", word)):
camelCasedName += word
else:
camelCasedName += word.capitalize()
previousWord = word
self._camelCasedName = camelCasedName
return camelCasedName
@property
def swiftCodeLines(self) -> List[str]:
def __stringLiteralOrNil(string: Optional[str]) -> str:
if (string is None):
return 'nil'
return f'"{string}"'
lines: List[str] = []
lines.append(f"/// IANA Charset `{self.representativeName}`.")
lines.append(f"static let {self.camelCasedName} = IANACharset(")
lines.append(f"{SWIFT_CODE_INDENT}preferredMIMEName: {__stringLiteralOrNil(self.preferredMIMEName)},")
lines.append(f'{SWIFT_CODE_INDENT}name: "{self.name}",')
lines.append(f"{SWIFT_CODE_INDENT}aliases: [")
for alias in self.aliases:
lines.append(f"{SWIFT_CODE_INDENT * 2}\"{alias}\",")
lines.append(f"{SWIFT_CODE_INDENT}]")
lines.append(")")
return lines
def generateSwiftCode() -> str:
result = "extension IANACharset {"
for record in map(lambda recordElem: IANACharsetNameRecord(recordElem), CHARSETS_XML_RECORD_ELEMENTS):
if (record.representativeName not in REQUIRED_CHARSET_NAMES):
continue
result += "\n"
result += "\n".join(map(lambda line: SWIFT_CODE_INDENT + line, record.swiftCodeLines))
result += "\n"
result += "}\n"
return result
print(generateSwiftCode())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment