Created
March 14, 2026 16:29
-
-
Save richytong/87fbdc19e4725f7f4f1ec7e77cae1ce0 to your computer and use it in GitHub Desktop.
XML parser coded by Claude.ai (Anthropic)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 'use strict' | |
| const XML = {} | |
| XML.parse = function parse(input, options = {}) { | |
| const astMode = options.ast === true | |
| let pos = 0 | |
| function skipWhitespace() { | |
| while (pos < input.length && /\s/.test(input[pos])) pos++ | |
| } | |
| function parsePreamble() { | |
| pos += 5 // skip '<?xml' | |
| const attrs = parseAttributes() | |
| if (input[pos] === '?' && input[pos + 1] === '>') pos += 2 | |
| return attrs | |
| } | |
| function parseAttributes() { | |
| const attrs = {} | |
| while (pos < input.length) { | |
| skipWhitespace() | |
| const ch = input[pos] | |
| if (ch === '>' || ch === '/' || ch === '?') break | |
| // attr name | |
| const nameStart = pos | |
| while (pos < input.length && !/[\s=>/]/.test(input[pos])) pos++ | |
| const name = input.slice(nameStart, pos) | |
| if (!name) break | |
| skipWhitespace() | |
| if (input[pos] !== '=') throw new SyntaxError('Expected = for attribute') | |
| pos++ // skip '=' | |
| skipWhitespace() | |
| if (input[pos] !== '"' && input[pos] !== "'") { | |
| throw new SyntaxError('Expected quote for attribute value') | |
| } | |
| const quote = input[pos++] | |
| const valueStart = pos | |
| while (pos < input.length && input[pos] !== quote) pos++ | |
| attrs[name] = input.slice(valueStart, pos) | |
| pos++ // skip closing quote | |
| } | |
| return attrs | |
| } | |
| function parseElement() { | |
| if (input[pos] !== '<') throw new SyntaxError('Malformed tag') | |
| pos++ // skip '<' | |
| const nameStart = pos | |
| while (pos < input.length && !/[\s>/]/.test(input[pos])) pos++ | |
| const name = input.slice(nameStart, pos) | |
| if (!name) throw new SyntaxError('Malformed tag') | |
| const attrs = parseAttributes() | |
| if (input[pos] === '/') { | |
| pos++ // skip '/' | |
| if (input[pos] !== '>') throw new SyntaxError('Malformed tag') | |
| pos++ // skip '>' | |
| return { $name: name, ...attrs, $children: [] } | |
| } | |
| if (input[pos] !== '>') throw new SyntaxError('Malformed tag') | |
| pos++ // skip '>' | |
| const children = [] | |
| while (pos < input.length) { | |
| if (input[pos] === '<') { | |
| if (input[pos + 1] === '/') { | |
| // closing tag | |
| pos += 2 // skip '</' | |
| const closeStart = pos | |
| while (pos < input.length && input[pos] !== '>') pos++ | |
| const closeName = input.slice(closeStart, pos).trim() | |
| pos++ // skip '>' | |
| if (closeName !== name) { | |
| throw new SyntaxError(`Mismatched </${closeName}> \u2013 expected </${name}>`) | |
| } | |
| break | |
| } else { | |
| children.push(parseElement()) | |
| } | |
| } else { | |
| const textStart = pos | |
| while (pos < input.length && input[pos] !== '<') pos++ | |
| const text = input.slice(textStart, pos).trim() | |
| if (text) children.push(text) | |
| } | |
| } | |
| return { $name: name, ...attrs, $children: children } | |
| } | |
| function nodeValue(node) { | |
| const { $name, $children, ...attrs } = node | |
| const hasAttrs = Object.keys(attrs).length > 0 | |
| const children = $children // already trimmed/filtered during parse | |
| if (children.length === 0) { | |
| return hasAttrs ? { ...attrs } : '' | |
| } | |
| if (children.length === 1 && typeof children[0] === 'string') { | |
| if (!hasAttrs) return children[0] | |
| const s = new String(children[0]) | |
| for (const [k, v] of Object.entries(attrs)) s[k] = v | |
| return s | |
| } | |
| const allElements = children.every(c => typeof c !== 'string') | |
| if (allElements) { | |
| const obj = { ...attrs } | |
| for (const child of children) { | |
| const k = child.$name | |
| const v = nodeValue(child) | |
| if (k in obj) { | |
| if (!Array.isArray(obj[k])) obj[k] = [obj[k]] | |
| obj[k].push(v) | |
| } else { | |
| obj[k] = v | |
| } | |
| } | |
| return obj | |
| } | |
| // mixed content: text nodes and element nodes interleaved | |
| return children.map(c => | |
| typeof c === 'string' ? c : { [c.$name]: nodeValue(c) } | |
| ) | |
| } | |
| function run() { | |
| skipWhitespace() | |
| let preamble = null | |
| if (input.slice(pos, pos + 5) === '<?xml') { | |
| preamble = parsePreamble() | |
| skipWhitespace() | |
| } | |
| if (pos >= input.length) { | |
| return astMode ? (preamble ? { $preamble: preamble } : {}) : {} | |
| } | |
| const node = parseElement() | |
| skipWhitespace() | |
| if (pos < input.length) throw new SyntaxError('Extra content') | |
| if (astMode) { | |
| if (preamble) node.$preamble = preamble | |
| return node | |
| } | |
| const result = {} | |
| result[node.$name] = nodeValue(node) | |
| return result | |
| } | |
| return run() | |
| } | |
| module.exports = XML |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment