Created
January 21, 2026 13:45
-
-
Save prenaissance/6775fe1b52b4324fb1677d6185e0769a to your computer and use it in GitHub Desktop.
Typescript Nested Array parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| type Input = { | |
| text: string; | |
| index: number; | |
| }; | |
| type ParseResult<T> = | |
| | { success: true; value: T; remaining: Input } | |
| | { success: false }; | |
| // biome-ignore lint/correctness/noUnusedVariables: unused | |
| type InferParser<U> = U extends P<infer T> ? T : never; | |
| class P<T> { | |
| constructor(private parseFn: (input: Input) => ParseResult<T>) {} | |
| parse(inputText: string): ParseResult<T> { | |
| const input: Input = { text: inputText, index: 0 }; | |
| return this.parseFn(input); | |
| } | |
| private run(input: Input): ParseResult<T> { | |
| return this.parseFn(input); | |
| } | |
| static char<C extends string>(c: C): P<C> { | |
| return new P<C>((input) => { | |
| if (input.index < input.text.length && input.text[input.index] === c) { | |
| return { | |
| success: true, | |
| value: c, | |
| remaining: { text: input.text, index: input.index + 1 }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static dictionary(str: string): P<string> { | |
| const set = new Set([...str]); | |
| return new P<string>((input) => { | |
| for (const word of set) { | |
| if (input.text.slice(input.index, input.index + word.length) === word) { | |
| return { | |
| success: true, | |
| value: word, | |
| remaining: { text: input.text, index: input.index + word.length }, | |
| }; | |
| } | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static digit(): P<string> { | |
| return P.dictionary("0123456789"); | |
| } | |
| static string<S extends string>(str: S): P<S> { | |
| return new P<S>((input) => { | |
| if (input.text.slice(input.index, input.index + str.length) === str) { | |
| return { | |
| success: true, | |
| value: str, | |
| remaining: { text: input.text, index: input.index + str.length }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static whitespace(): P<string> { | |
| return P.dictionary(" \t\n\r"); | |
| } | |
| static regex(regex: RegExp): P<string> { | |
| return new P<string>((input) => { | |
| const match = regex.exec(input.text.slice(input.index)); | |
| if (match && match.index === 0) { | |
| const matchedString = match[0]; | |
| return { | |
| success: true, | |
| value: matchedString, | |
| remaining: { | |
| text: input.text, | |
| index: input.index + matchedString.length, | |
| }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static seq<T1, T2>(parsers: [P<T1>, P<T2>]): P<[T1, T2]>; | |
| static seq<T1, T2, T3>(parsers: [P<T1>, P<T2>, P<T3>]): P<[T1, T2, T3]>; | |
| static seq<T1, T2, T3, T4>( | |
| parsers: [P<T1>, P<T2>, P<T3>, P<T4>], | |
| ): P<[T1, T2, T3, T4]>; | |
| static seq<T1, T2, T3, T4, T5>( | |
| parsers: [P<T1>, P<T2>, P<T3>, P<T4>, P<T5>], | |
| ): P<[T1, T2, T3, T4, T5]>; | |
| static seq(parsers: P<unknown>[]): P<unknown[]> { | |
| return new P<unknown[]>((input) => { | |
| const values: unknown[] = []; | |
| let currentInput = input; | |
| for (const parser of parsers) { | |
| const result = parser.run(currentInput); | |
| if (!result.success) { | |
| return { success: false }; | |
| } | |
| values.push(result.value); | |
| currentInput = result.remaining; | |
| } | |
| return { success: true, value: values, remaining: currentInput }; | |
| }); | |
| } | |
| static lazy<T>(thunk: () => P<T>): P<T> { | |
| return new P<T>((input) => { | |
| return thunk().run(input); | |
| }); | |
| } | |
| or<U>(other: P<U>): P<T | U> { | |
| return new P<T | U>((input) => { | |
| const result = this.run(input); | |
| if (result.success) { | |
| return result; | |
| } | |
| return other.run(input); | |
| }); | |
| } | |
| map<U>(fn: (value: T) => U): P<U> { | |
| return new P<U>((input) => { | |
| const result = this.run(input); | |
| if (result.success) { | |
| return { | |
| success: true, | |
| value: fn(result.value), | |
| remaining: result.remaining, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| many(): P<T[]> { | |
| return new P<T[]>((input) => { | |
| const values: T[] = []; | |
| let currentInput = input; | |
| while (true) { | |
| const result = this.run(currentInput); | |
| if (!result.success) { | |
| break; | |
| } | |
| values.push(result.value); | |
| currentInput = result.remaining; | |
| } | |
| return { success: true, value: values, remaining: currentInput }; | |
| }); | |
| } | |
| many1(): P<T[]> { | |
| return new P<T[]>((input) => { | |
| const result = this.run(input); | |
| if (!result.success) { | |
| return { success: false }; | |
| } | |
| // P::many parsers always succeed | |
| const manyResult = this.many().run(result.remaining) as ParseResult< | |
| T[] | |
| > & { success: true }; | |
| return { | |
| success: true, | |
| value: [result.value, ...manyResult.value], | |
| remaining: manyResult.remaining, | |
| }; | |
| }); | |
| } | |
| sepBy<U>(separator: P<U>): P<T[]> { | |
| return new P<T[]>((input) => { | |
| const firstResult = this.run(input); | |
| if (!firstResult.success) { | |
| return { success: true, value: [], remaining: input }; | |
| } | |
| const othersParser = P.seq([separator, this]) | |
| .map(([, value]) => value) | |
| .many(); | |
| const othersResult = othersParser.run( | |
| firstResult.remaining, | |
| ) as ParseResult<T[]> & { success: true }; | |
| return { | |
| success: true, | |
| value: [firstResult.value, ...othersResult.value], | |
| remaining: othersResult.remaining, | |
| }; | |
| }); | |
| } | |
| between(boundary: P<unknown>): P<T>; | |
| between(left: P<unknown>, right: P<unknown>): P<T>; | |
| between(left: P<unknown>, right?: P<unknown>): P<T> { | |
| const rightBoundary = right ?? left; | |
| return P.seq([left, this, rightBoundary]).map(([, value]) => value); | |
| } | |
| } | |
| const zero = P.char("0"); | |
| const nonZero = P.dictionary("123456789"); | |
| const integer = P.seq([nonZero, P.digit().many()]) | |
| .map(([first, rest]) => first + rest.join("")) | |
| .or(zero); | |
| const integerToken = integer.map((value) => ({ | |
| type: "IntLiteral" as const, | |
| value: value, | |
| })); | |
| const float = P.seq([integer, P.char("."), P.digit().many1()]).map( | |
| ([intPart, dot, fracPart]) => intPart + dot + fracPart.join(""), | |
| ); | |
| const floatToken = float.map((value) => ({ | |
| type: "FloatLiteral" as const, | |
| value: value, | |
| })); | |
| const numberParser = floatToken.or(integerToken); | |
| const result = numberParser.parse("123.0014"); | |
| if (result.success) { | |
| console.log(result.value); | |
| } | |
| const intArrayElement = integer.map(Number.parseInt); | |
| const arraySeparator = P.char(",").between(P.whitespace().many()); | |
| type NestedElement = number | NestedElement[]; | |
| const intArrayParser: P<NestedElement[]> = intArrayElement | |
| .or(P.lazy(() => intArrayParser)) | |
| .sepBy(arraySeparator) | |
| .between(P.whitespace().many()) | |
| .between(P.char("["), P.char("]")); | |
| const arrayResult = intArrayParser.parse("[ 1, 22 , [15, [6,7]], 3, 4,5]"); | |
| if (arrayResult.success) { | |
| console.log(arrayResult.value); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment