Skip to content

Instantly share code, notes, and snippets.

@richytong
Created March 14, 2026 16:29
Show Gist options
  • Select an option

  • Save richytong/87fbdc19e4725f7f4f1ec7e77cae1ce0 to your computer and use it in GitHub Desktop.

Select an option

Save richytong/87fbdc19e4725f7f4f1ec7e77cae1ce0 to your computer and use it in GitHub Desktop.
XML parser coded by Claude.ai (Anthropic)
'use strict'
const XML = {}
XML.parse = function parse(input, options = {}) {
const astMode = options.ast === true
let pos = 0
function skipWhitespace() {
while (pos < input.length && /\s/.test(input[pos])) pos++
}
function parsePreamble() {
pos += 5 // skip '<?xml'
const attrs = parseAttributes()
if (input[pos] === '?' && input[pos + 1] === '>') pos += 2
return attrs
}
function parseAttributes() {
const attrs = {}
while (pos < input.length) {
skipWhitespace()
const ch = input[pos]
if (ch === '>' || ch === '/' || ch === '?') break
// attr name
const nameStart = pos
while (pos < input.length && !/[\s=>/]/.test(input[pos])) pos++
const name = input.slice(nameStart, pos)
if (!name) break
skipWhitespace()
if (input[pos] !== '=') throw new SyntaxError('Expected = for attribute')
pos++ // skip '='
skipWhitespace()
if (input[pos] !== '"' && input[pos] !== "'") {
throw new SyntaxError('Expected quote for attribute value')
}
const quote = input[pos++]
const valueStart = pos
while (pos < input.length && input[pos] !== quote) pos++
attrs[name] = input.slice(valueStart, pos)
pos++ // skip closing quote
}
return attrs
}
function parseElement() {
if (input[pos] !== '<') throw new SyntaxError('Malformed tag')
pos++ // skip '<'
const nameStart = pos
while (pos < input.length && !/[\s>/]/.test(input[pos])) pos++
const name = input.slice(nameStart, pos)
if (!name) throw new SyntaxError('Malformed tag')
const attrs = parseAttributes()
if (input[pos] === '/') {
pos++ // skip '/'
if (input[pos] !== '>') throw new SyntaxError('Malformed tag')
pos++ // skip '>'
return { $name: name, ...attrs, $children: [] }
}
if (input[pos] !== '>') throw new SyntaxError('Malformed tag')
pos++ // skip '>'
const children = []
while (pos < input.length) {
if (input[pos] === '<') {
if (input[pos + 1] === '/') {
// closing tag
pos += 2 // skip '</'
const closeStart = pos
while (pos < input.length && input[pos] !== '>') pos++
const closeName = input.slice(closeStart, pos).trim()
pos++ // skip '>'
if (closeName !== name) {
throw new SyntaxError(`Mismatched </${closeName}> \u2013 expected </${name}>`)
}
break
} else {
children.push(parseElement())
}
} else {
const textStart = pos
while (pos < input.length && input[pos] !== '<') pos++
const text = input.slice(textStart, pos).trim()
if (text) children.push(text)
}
}
return { $name: name, ...attrs, $children: children }
}
function nodeValue(node) {
const { $name, $children, ...attrs } = node
const hasAttrs = Object.keys(attrs).length > 0
const children = $children // already trimmed/filtered during parse
if (children.length === 0) {
return hasAttrs ? { ...attrs } : ''
}
if (children.length === 1 && typeof children[0] === 'string') {
if (!hasAttrs) return children[0]
const s = new String(children[0])
for (const [k, v] of Object.entries(attrs)) s[k] = v
return s
}
const allElements = children.every(c => typeof c !== 'string')
if (allElements) {
const obj = { ...attrs }
for (const child of children) {
const k = child.$name
const v = nodeValue(child)
if (k in obj) {
if (!Array.isArray(obj[k])) obj[k] = [obj[k]]
obj[k].push(v)
} else {
obj[k] = v
}
}
return obj
}
// mixed content: text nodes and element nodes interleaved
return children.map(c =>
typeof c === 'string' ? c : { [c.$name]: nodeValue(c) }
)
}
function run() {
skipWhitespace()
let preamble = null
if (input.slice(pos, pos + 5) === '<?xml') {
preamble = parsePreamble()
skipWhitespace()
}
if (pos >= input.length) {
return astMode ? (preamble ? { $preamble: preamble } : {}) : {}
}
const node = parseElement()
skipWhitespace()
if (pos < input.length) throw new SyntaxError('Extra content')
if (astMode) {
if (preamble) node.$preamble = preamble
return node
}
const result = {}
result[node.$name] = nodeValue(node)
return result
}
return run()
}
module.exports = XML
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment