Skip to content

Instantly share code, notes, and snippets.

@fcole90
Created May 30, 2020 09:07
Show Gist options
  • Select an option

  • Save fcole90/d2ad38cc691a5483af33177eed4dee24 to your computer and use it in GitHub Desktop.

Select an option

Save fcole90/d2ad38cc691a5483af33177eed4dee24 to your computer and use it in GitHub Desktop.
Convert simple inline LateX to unicode
import re
__SYMBOLS_DICTIONARY__ = {
# Greek Letters
"Gamma": "Γ",
"Delta": "Δ",
"Lambda": "Λ",
"Phi": "Φ",
"Pi": "Π",
"Psi": "Ψ",
"Sigma": "Σ",
"Theta": "Θ",
"Upsilon": "Υ",
"Xi": "Ξ",
"Omega": "Ω",
"alpha": "α",
"beta": "β",
"gamma": "γ",
"delta": "δ",
"epsilon": "ϵ",
"zeta": "ζ",
"eta": "η",
"theta": "θ",
"iota": "ι",
"kappa": "κ",
"lambda": "λ",
"mu": "μ",
"nu": "ν",
"xi": "ξ",
"pi": "π",
"rho": "ρ",
"sigma": "σ",
"tau": "τ",
"upsilon": "υ",
"phi": "ϕ",
"chi": "χ",
"psi": "ψ",
"omega": "ω",
"digamma": "ϝ",
"varepsilon": "ε",
"varkappa": "ϰ",
"varphi": "φ",
"varpi": "ϖ",
"varrho": "ϱ",
"varsigma": "ς",
"vartheta": "ϑ",
# Other alphabetic characters
"aleph": "ℵ",
"beth": "ℶ",
"daleth": "ℸ",
"gimel": "ℷ",
"complement": "∁",
"ell": "ℓ",
"eth": "ð",
"hbar": "ℏ",
"hslash": "ℏ",
"mho": "℧",
"partial": "∂",
"wp": "℘",
"circledS": "Ⓢ",
"Bbbk": "k",
"Finv": "Ⅎ",
"Game": "⅁",
"Im": "J",
"Re": "R",
# Other simple symbols
"#": "#",
"angle": "∠",
"backprime": "‵",
"bigstar": "★",
"blacklozenge": "⧫",
"blacksquare": "■",
"blacktriangle": "▲",
"blacktriangledown": "▼",
"bot": "⊥",
"clubsuit": "♣",
"diagdown": "╲",
"diagup": "╱",
"diamondsuit": "♢",
"emptyset": "∅",
"exists": "∃",
"flat": "♭",
"forall": "∀",
"heartsuit": "♡",
"infty": "∞",
"lozenge": "◊",
"measuredangle": "∡",
"nabla": "∇",
"natural": "♮",
"neg": "¬",
"lnot": "¬",
"nexists": "∄",
"prime": "′",
"S": "§",
"sharp": "♯",
"spadesuit": "♠",
"sphericalangle": "∢",
"square p": "□p",
"surd": "√",
"top": "⊤",
"triangle": "△",
"triangledown": "▽",
"varnothing": "∅",
# Accumulation operators: sum, integral, union, etc
"int": "∫",
"oint": "∮",
"bigcap": "⋂",
"bigcup": "⋃",
"bigodot": "⨀",
"bigoplus": "⨁",
"bigotimes": "⨂",
"bigsqcup": "⨆",
"biguplus": "⨄",
"bigvee": "⋁",
"bigwedge": "⋀",
"coprod": "∐",
"prod": "ℿ",
"sum": "⅀",
"smallint": "∫",
# Binary operators
"amalg": "⨿",
"ast": "∗",
"barwedge": "⊼",
"bigcirc": "◯",
"bigtriangledown": "▽",
"bigtriangleup": "△",
"boxdot": "⊡",
"boxminus": "⊟",
"boxplus": "⊞",
"boxtimes": "⊠",
"bullet": "∙",
"cap": "∩",
"Cap": "⋒",
"doublecap": "⋒",
"cdot": "⋅",
"centerdot": "⋅",
"circ": "∘",
"circledast": "⊛",
"circledcirc": "⊚",
"circleddash": "⊝",
"cup": "∪",
"Cup": "⋓",
"doublecup": "⋓",
"curlyvee": "⋎",
"curlywedge": "⋏",
"dagger": "†",
"ddagger": "‡",
"diamond": "⋄",
"div": "÷",
"divideontimes": "⋇",
"dotplus": "∔",
"doublebarwedge": "⩞",
"gtrdot": "⋗",
"intercal": "⊺",
"leftthreetimes": "⋋",
"lessdot": "⋖",
"ltimes": "⋉",
"mp": "∓",
"odot": "⊙",
"ominus": "⊖",
"oplus": "⊕",
"oslash": "⊘",
"otimes": "⊗",
"pm": "±",
"rightthreetimes": "⋌",
"rtimes": "⋊",
"setminus": "∖",
"smallsetminus": "∖",
"sqcap": "⊓",
"sqcup": "⊔",
"star": "⋆",
"times": "×",
"triangleleft": "◃",
"triangleright": "▹",
"uplus": "⊎",
"vee": "∨",
"lor": "∨",
"veebar": "⊻",
"wedge": "∧",
"land": "∧",
"wr": "≀",
# Relation / comparison
"approx": "≈",
"approxeq": "≊",
"asymp": "≍",
"backsim": "∽",
"backsimeq": "⋍",
"bumpeq": "≏",
"Bumpeq": "≎",
"circeq": "≗",
"cong": "≅",
"curlyeqprec": "⋞",
"curlyeqsucc": "⋟",
"doteq": "≐",
"doteqdot": "≑",
"Doteq": "≑",
"eqcirc": "≖",
"eqsim": "≂",
"eqslantgtr": "⪖",
"eqslantless": "⪕",
"equiv": "≡",
"fallingdotseq": "≒",
"geq": "≥",
"ge": "≥",
"geqq": "≧",
"geqslant": "⩾",
"gg": "≫",
"ggg": "⋙",
"gggtr": "⋙",
"gnapprox": "⪊",
"gneq": "⪈",
"gneqq": "≩",
"gnsim": "⋧",
"gtrapprox": "⪆",
"gtreqless": "⋛",
"gtreqqless": "⪌",
"gtrless": "≷",
"gtrsim": "≳",
"gvertneqq": "",
"leq": "≤",
"le": "≤",
"leqq": "≦",
"leqslant": "⩽",
"lessapprox": "⪅",
"lesseqgtr": "⋚",
"lesseqqgtr": "⪋",
"lessgtr": "≶",
"lesssim": "≲",
"ll": "≪",
"lll": "⋘",
"llless": "⋘",
"lnapprox": "⪉",
"lneq": "⪇",
"lneqq": "≨",
"lnsim": "⋦",
"lvertneqq": "",
"ncong": "≆",
"neq": "≠",
"ne": "≠",
"ngeq": "≱",
"ngeqq": "",
"ngeqslant": "",
"ngtr": "≯",
"nleq": "≰",
"nleqq": "",
"nleqslant": "",
"nless": "≮",
"nprec": "⊀",
"npreceq": "⋠",
"nsim": "≁",
"nsucc": "⊁",
"nsucceq": "⋡",
"prec": "≺",
"precapprox": "⪷",
"preccurlyeq": "≼",
"preceq": "⪯",
"precnapprox": "⪹",
"precneqq": "⪵",
"precnsim": "⋨",
"precsim": "≾",
"risingdotseq": "≓",
"sim": "∼",
"simeq": "≃",
"succ": "≻",
"succapprox": "⪸",
"succcurlyeq": "≽",
"succeq": "⪰",
"succnapprox": "⪺",
"succneqq": "⪶",
"succnsim": "⋩",
"succsim": "≿",
"thickapprox": "≈",
"thicksim": "∼",
"triangleq": "≜",
# Arrows
"circlearrowleft": "↺",
"circlearrowright": "↻",
"curvearrowleft": "↶",
"curvearrowright": "↷",
"downdownarrows": "⇊",
"downharpoonleft": "⇃",
"downharpoonright": "⇂",
"hookleftarrow": "↩",
"hookrightarrow": "↪",
"leftarrow": "←",
"gets": "←",
"Leftarrow": "⇐",
"leftarrowtail": "↢",
"leftharpoondown": "↽",
"leftharpoonup": "↼",
"leftleftarrows": "⇇",
"leftrightarrow": "↔",
"Leftrightarrow": "⇔",
"leftrightarrows": "⇆",
"leftrightharpoons": "⇋",
"leftrightsquigarrow": "↭",
"Lleftarrow": "⇚",
"longleftarrow": "⟵",
"Longleftarrow": "⟸",
"longleftrightarrow": "⟷",
"Longleftrightarrow": "⟺",
"longmapsto": "⟼",
"longrightarrow": "⟶",
"Longrightarrow": "⟹",
"implies": "⟹",
"looparrowleft": "↫",
"looparrowright": "↬",
"Lsh": "↰",
"mapsto": "↦",
"multimap": "⊸",
"nLeftarrow": "⇍",
"nLeftrightarrow": "⇎",
"nRightarrow": "⇏",
"nearrow": "↗",
"nleftarrow": "↚",
"nleftrightarrow": "↮",
"nrightarrow": "↛",
"nwarrow": "↖",
"rightarrow": "→",
"to": "→",
"Rightarrow": "⇒",
"rightarrowtail": "↣",
"rightharpoondown": "⇁",
"rightharpoonup": "⇀",
"rightleftarrows": "⇄",
"rightleftharpoons": "⇌",
"rightrightarrows": "⇉",
"rightsquigarrow": "⇝",
"Rrightarrow": "⇛",
"Rsh": "↱",
"searrow": "↘",
"swarrow": "↙",
"twoheadleftarrow": "↞",
"twoheadrightarrow": "↠",
"upharpoonleft": "↿",
"upharpoonright": "↾",
"restriction": "↾",
"upuparrows": "⇈",
"uparrow": "↑",
"Uparrow": "⇑",
"downarrow": "↓",
"Downarrow": "⇓",
"updownarrow": "↕",
"Updownarrow": "⇕",
# Other relation symbols
"backepsilon": "∍",
"because": "∵",
"between": "≬",
"blacktriangleleft": "◀",
"blacktriangleright": "▶",
"bowtie": "⋈",
"dashv": "⊣",
"frown": "⌢",
"in": "∈",
"mid": "∣",
"models": "⊨",
"ni": "∋",
"owns": "∋",
"nmid": "∤",
"notin": "∉",
"nparallel": "∦",
"nshortmid": "",
"nshortparallel": "",
"nsubseteq": "⊈",
"nsubseteqq": "",
"nsupseteq": "⊉",
"nsupseteqq": "",
"ntriangleleft": "⋪",
"ntrianglelefteq": "⋬",
"ntriangleright": "⋫",
"ntrianglerighteq": "⋭",
"nvdash": "⊬",
"nVdash": "⊮",
"nvDash": "⊭",
"nVDash": "⊯",
"parallel": "∥",
"perp": "⊥",
"pitchfork": "⋔",
"propto": "∝",
"shortmid": "∣",
"shortparallel": "∥",
"smallfrown": "⌢",
"smallsmile": "⌣",
"smile": "⌣",
"sqsubset": "⊏",
"sqsubseteq": "⊑",
"sqsupset": "⊐",
"sqsupseteq": "⊒",
"subset": "⊂",
"Subset": "⋐",
"subseteq": "⊆",
"subseteqq": "⫅",
"subsetneq": "⊊",
"subsetneqq": "⫋",
"supset": "⊃",
"Supset": "⋑",
"supseteq": "⊇",
"supseteqq": "⫆",
"supsetneq": "⊋",
"supsetneqq": "⫌",
"therefore": "∴",
"trianglelefteq": "⊴",
"trianglerighteq": "⊵",
"varpropto": "∝",
"varsubsetneq": "",
"varsubsetneqq": "",
"varsupsetneq": "",
"varsupsetneqq": "",
"vartriangle": "△",
"vartriangleleft": "⊲",
"vartriangleright": "⊳",
"vdash": "⊢",
"Vdash": "⊩",
"vDash": "⊨",
"Vvdash": "⊪",
"vert": "|",
"Vert": "∥",
# Special fonts
r"mathbb{C}": "ℂ",
r"mathbb{H}": "ℍ",
r"mathbb{N}": "ℕ",
r"mathbb{P}": "ℙ",
r"mathbb{Q}": "ℚ",
r"mathbb{R}": "ℝ",
r"mathbb{Z}": "ℤ",
}
class LatexCommandNotFound(KeyError):
def __init__(self, command):
super().__init__(f'Command not found: "{command}". See all available commands with "get_commands()"')
def get_commands():
return __SYMBOLS_DICTIONARY__.keys()
def get_converter(symbol="\\"):
regex = re.compile(re.escape(symbol) + r"\w+(\{\w+\})*")
n_skip = len(symbol)
def convert(txt):
repl = lambda key: symbol_replace(key.group(0)[n_skip:])
return re.sub(regex, repl, txt)
return convert
def symbol_replace(symbol):
try:
return __SYMBOLS_DICTIONARY__[symbol]
except KeyError:
raise LatexCommandNotFound(symbol)
# https://latex.wikia.org/wiki/List_of_LaTeX_symbols
def __parse_text_from_wikia__(text):
print("{")
for line in text.split("\n"):
if (len(line) < 3):
continue
sym, command = line.split(" ")
if (" or" in command):
commands = command.split(" or")
for command_item in commands:
command = command_item.strip()
print(f' "{command[1:]}": "{sym}",')
else:
print(f' "{command[1:]}": "{sym}",')
print("}")
if __name__ == '__main__':
# print(re.findall(r"\\\w{2,}", r"ajskbasjds jkdhfd fdjfkds \\asjkhs jhfdk \latex"))
c = get_converter("\\")
print(c(r"\forall x \in \mathbb{N} \| x = 5 * 3 \sigma"))
c(r"\forall x \in \mathbb{N} \| x = 5 * 3 \sigma")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment