Created
January 1, 2023 21:41
-
-
Save queercat/3e56736a27ead18fa3abe0d9d861f3f0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using import struct | |
| using import Array | |
| struct Token | |
| value : string = "" | |
| kind : i8 | |
| position : i32 | |
| struct InputStream | |
| source : string = "" | |
| length : i32 | |
| fn peek (self cursor) | |
| self.source @ (cursor + 1) | |
| fn next (self cursor) | |
| cursor = cursor + 1 | |
| self.source @ cursor | |
| fn atEnd (self cursor) | |
| (not (cursor < self.length - 1)) | |
| fn create-token (value kind position) | |
| Token | |
| value | |
| kind | |
| position | |
| fn error(msg cursor) | |
| print msg | |
| print cursor | |
| let | |
| T_NUMBER = 0:i8 | |
| T_STRING = 1:i8 | |
| T_ARRAY_START = 2:i8 | |
| T_ARRAY_END = 3:i8 | |
| T_OBJECT_START = 4:i8 | |
| T_OBJECT_END = 5:i8 | |
| T_BOOL = 6:i8 | |
| T_NULL = 7:i8 | |
| T_COMMA = 8:i8 | |
| T_EOF = 9:i8 | |
| T_NUMBER_START = 10:i8 # utilized as a special production rule as the start of numbers are non-same | |
| # see https://www.json.org/img/number.png | |
| T_WHITESPACE = 11:i8 # as above but with whitespace | |
| # see https://www.json.org/img/whitespace.png | |
| T_COLON = 13:i8 | |
| fn match(character rule) | |
| if (rule == T_NUMBER_START) | |
| return ( | |
| character == "0" or | |
| character == "1" or | |
| character == "2" or | |
| character == "3" or | |
| character == "4" or | |
| character == "5" or | |
| character == "6" or | |
| character == "7" or | |
| character == "8" or | |
| character == "9" or | |
| character == "." or | |
| character == "-" | |
| ) | |
| elseif (rule == T_NUMBER) | |
| return ( | |
| character == "0" or | |
| character == "1" or | |
| character == "2" or | |
| character == "3" or | |
| character == "4" or | |
| character == "5" or | |
| character == "6" or | |
| character == "7" or | |
| character == "8" or | |
| character == "9" or | |
| character == "." or | |
| character == "-" or | |
| character == "+" or | |
| character == "e" or | |
| character == "E" | |
| ) | |
| elseif (rule == T_STRING) | |
| return (character == "\"") | |
| elseif (rule == T_ARRAY_START) | |
| return (character == "[") | |
| elseif (rule == T_ARRAY_END) | |
| return (character == "]") | |
| elseif (rule == T_OBJECT_START) | |
| return (character == "{") | |
| elseif (rule == T_OBJECT_END) | |
| return (character == "}") | |
| elseif (rule == T_COMMA) | |
| return (character == ",") | |
| elseif (rule == T_COLON) | |
| return (character == ":") | |
| elseif (rule == T_WHITESPACE) | |
| return ( | |
| character == " " or | |
| character == "\n" or | |
| character == "\r" or | |
| character == "\t" | |
| ) | |
| return false | |
| fn lex (source) | |
| let stream = | |
| InputStream | |
| source | |
| countof source | |
| local tokens = ((Array Token)) | |
| local cursor = -1 | |
| while (not ('atEnd stream cursor)) | |
| let character = ('peek stream cursor) | |
| let c = (character as string) | |
| if (match c T_NUMBER_START) | |
| let position = (cursor + 1) | |
| local value = ("" as string) | |
| while (match (('peek stream cursor) as string) T_NUMBER) | |
| let num = (('peek stream cursor) as string) | |
| value ..= num | |
| ('next stream cursor) | |
| 'append tokens | |
| ( | |
| Token | |
| value | |
| T_NUMBER | |
| T_NUMBER_START | |
| ) | |
| elseif (match c T_STRING) | |
| let position = (cursor + 1) | |
| # consume the initial " token | |
| ('next stream cursor) | |
| local value = ("" as string) | |
| while ((('peek stream cursor) as string) != "\"") | |
| value ..= (('peek stream cursor) as string) | |
| ('next stream cursor) | |
| 'append tokens | |
| Token | |
| value | |
| T_STRING | |
| position | |
| elseif ((match c T_ARRAY_START) or (match c T_ARRAY_END)) | |
| let position = (cursor + 1) | |
| local kind = T_ARRAY_START | |
| if (c == "]") | |
| kind = T_ARRAY_END | |
| 'append tokens | |
| Token | |
| c | |
| kind | |
| position | |
| elseif ((match c T_OBJECT_START) or (match c T_OBJECT_END)) | |
| let position = (cursor + 1) | |
| local kind = T_OBJECT_START | |
| if (c == "}") | |
| kind = T_OBJECT_END | |
| 'append tokens | |
| Token | |
| c | |
| kind | |
| position | |
| elseif (match c T_COMMA) | |
| let position = (cursor + 1) | |
| let kind = T_COMMA | |
| 'append tokens | |
| Token | |
| c | |
| kind | |
| position | |
| elseif (match c T_COLON) | |
| let position = (cursor + 1) | |
| let kind = T_COLON | |
| 'append tokens | |
| Token | |
| c | |
| kind | |
| position | |
| elseif (match c T_WHITESPACE) | |
| else | |
| error "unknown token found in production" cursor | |
| ('next stream cursor) | |
| return tokens | |
| fn parse (source) | |
| let tokens = (lex source) | |
| for token in tokens | |
| print token.value | |
| print token.position | |
| parse "{\n\t\"emp_details\": [\n\t\t{\n\t\t\t\"emp_name\": \"Shubham\"}]}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment