Skip to content

Instantly share code, notes, and snippets.

@moleike
Last active March 11, 2026 15:02
Show Gist options
  • Select an option

  • Save moleike/d765d6be3d338a5f6d989d8d679cadde to your computer and use it in GitHub Desktop.

Select an option

Save moleike/d765d6be3d338a5f6d989d8d679cadde to your computer and use it in GitHub Desktop.
//> using scala 3.3.1
//> using files staged.scala unstaged.scala
//> using dependency org.scala-lang::scala3-staging:3.3.1 org.typelevel::cats-core:2.13.0
//> using jmh
package benchmarks
import org.openjdk.jmh.annotations.*
import java.util.concurrent.TimeUnit
import scala.quoted.staging.*
import org.openjdk.jmh.infra.Blackhole
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
def loadJsonFile(path: String): String =
val bytes = Files.readAllBytes(Paths.get(path))
new String(bytes, StandardCharsets.UTF_8)
@State(Scope.Benchmark)
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
class ParserBench:
@Param(Array("1MB.json", "5MB.json"))
var fileName: String = _
var input: String = _
var stagedParse: String => Option[staged3.Json] = _
@Setup
def setup(): Unit =
given compiler: Compiler = Compiler.make(getClass.getClassLoader)
input = loadJsonFile(fileName)
stagedParse = staged3.Parser.compile(staged3.json)
//@Benchmark
//def baseline(blackhole: Blackhole): Unit =
// var i = 0; while(i < input.length) { blackhole.consume(input.charAt(i)); i += 1 }
@Benchmark
def benchStaged(blackhole: Blackhole): Unit =
blackhole.consume(stagedParse(input))
@Benchmark
def benchUnstaged(blackhole: Blackhole): Unit =
blackhole.consume(unstaged.json.parse(input))
//> using scala 3.3.1
//> using dependency org.scala-lang::scala3-staging:3.3.1
package staged
import scala.quoted.*
import scala.quoted.staging.*
import scala.util.control.TailCalls.{tailcall, done, TailRec}
type Res = TailRec[Option[(Any, Int)]]
type Succ[A] = (Expr[A], Expr[Int]) => Expr[Res]
type Fail = () => Expr[Res]
case class Parser[A: Type](
generate: (Expr[String], Expr[Int], Succ[A], Fail) => Expr[Res]
):
def ~[B: Type](that: Parser[B])(using Quotes): Parser[(A, B)] =
Parser: (in, off, succ, fail) =>
this.generate(in, off,
(a, off1) => that.generate(in, off1,
(b, off2) => succ('{ ($a, $b) }, off2),
fail
),
fail
)
def |(that: Parser[A]): Parser[A] =
Parser: (in, off, succ, fail) =>
this.generate(in, off, succ, () => that.generate(in, off, succ, fail))
inline def map[B: Type](inline f: A => B)(using Quotes): Parser[B] =
Parser: (in, off, succ, fail) =>
this.generate(in, off,
(a, nextOff) => succ(Expr.betaReduce('{ f($a) }), nextOff),
fail
)
object Parser:
inline def pure[A: Type](inline value: A)(using Quotes): Parser[A] =
Parser: (in, off, succ, fail) =>
succ('{ value }, off)
inline def satisfy(inline f: Char => Boolean)(using Quotes): Parser[Char] =
Parser: (in, off, succ, fail) =>
'{
if ($off < $in.length && ${Expr.betaReduce('{ f($in.charAt($off)) })})
${ succ('{$in.charAt($off)}, '{$off + 1}) }
else
${ fail() }
}
inline def char(inline c: Char)(using Quotes): Parser[Char] =
satisfy(_ == c)
def whitespace(using Quotes): Parser[Unit] =
satisfy(_.isWhitespace).many.map(_ => ())
def token[A: Type](p: Parser[A])(using Quotes): Parser[A] =
whitespace *> p <* whitespace
def string(s: String)(using Quotes): Parser[String] =
Parser: (in, off, succ, fail) =>
'{
if ($in.startsWith(${ Expr(s) }, $off))
${ succ(Expr(s), '{$off + ${ Expr(s.length) }}) }
else
${ fail() }
}
def fix[A: Type](f: Parser[A] => Parser[A])(using Quotes): Parser[A] =
Parser: (in, off, succ, fail) =>
'{
def rec(currOff: Int): Res = tailcall {
${
val stub = Parser[A]: (sIn, sOff, sSucc, sFail) =>
'{ tailcall(rec($sOff)).flatMap {
case Some((res, next)) =>
tailcall { ${ sSucc('{res.asInstanceOf[A]}, '{next}) } }
case None =>
tailcall { ${ sFail() } }
}}
f(stub).generate(in, 'currOff,
(a, next) => '{ done(Some(($a, $next))) },
() => '{ done(None) })
}
}
rec($off).flatMap {
case Some((res, next)) => ${ succ('{res.asInstanceOf[A]}, '{next}) }
case None => ${ fail() }
}
}
inline def compile[T](build: Quotes ?=> Parser[T])(using Compiler): String => Option[T] =
staging.run { (q: Quotes) ?=>
'{ (in: String) =>
${
val parser = build
parser.generate(
'in,
'{0},
(res, outOff) => '{ done(Some(($res, $outOff))) },
() => '{ done(None) }
)
}.result match {
case Some((res, _)) => Some(res.asInstanceOf[T])
case None => None
}
}
}
import Parser.*
extension [A: Type](p: Parser[A])(using Quotes)
def *>[B: Type](other: Parser[B]): Parser[B] =
(p ~ other).map { case (a, b) => b }
def <*[B: Type](other: Parser[B]): Parser[A] =
(p ~ other).map { case (a, b) => a }
def many: Parser[List[A]] =
Parser: (in, off, succ, fail) =>
'{
val buf = scala.collection.mutable.ListBuffer[A]()
def loop(currentOff: Int): Res = tailcall {
${ p.generate(in, 'currentOff,
(a, next) => '{
buf += $a
loop($next)
},
() => succ('{buf.toList}, 'currentOff)
)}
}
loop($off)
}
def many1: Parser[List[A]] =
(p ~ p.many).map(_ :: _)
def sepBy[B: Type](sep: Parser[B]): Parser[List[A]] =
(p ~ (sep *> p).many).map(_ :: _) | Parser.pure(Nil)
def optional: Parser[Option[A]] =
Parser: (in, off, succ, fail) =>
p.generate(in, off,
(a, next) => succ('{ Some($a) }, next),
() => succ('{ None }, off)
)
def slice: Parser[String] =
Parser: (in, off, succ, fail) =>
p.generate(in, off,
(_, next) => succ('{ $in.substring($off, $next) }, next),
fail
)
inline def between(inline `open`: Char, inline close: Char): Parser[A] =
token(char(`open`)) *> p <* token(char(close))
enum Json:
case JObject(fields: Map[String, Json])
case JArray(items: List[Json])
case JString(value: String)
case JBoolean(value: Boolean)
case JNumber(value: Double)
case JNull
inline def quotedString(using Quotes): Parser[String] =
satisfy(_ != '"').many.between('"', '"').slice
inline def digit(using Quotes): Parser[Char] =
satisfy(c => c >= '0' && c <= '9')
inline def json(using Quotes): Parser[Json] = fix: self =>
val jNull = string("null").map(_ => Json.JNull)
val jBool =
string("true").map(_ => Json.JBoolean(true)) | string("false").map(_ =>
Json.JBoolean(false)
)
val jStr = quotedString.map(Json.JString(_))
val jNum = (char('-').optional ~ digit.many1 ~ (char('.') ~ digit.many1).optional)
.slice.map(_.toDouble).map(Json.JNumber(_))
val jArr =
self.sepBy(token(char(','))).between('[', ']').map(Json.JArray(_))
val field = (quotedString <* token(char(':'))) ~ self
val jObj = field
.sepBy(token(char(',')))
.between('{', '}')
.map(fs => Json.JObject(fs.toMap))
jNull | jBool | jNum | jStr | jArr | jObj
enum Sexp:
case Sym(name: String)
case Seq(items: List[Sexp])
def letter(using Quotes): Parser[Char] =
Parser.satisfy(c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
def symbol(using Quotes): Parser[String] =
letter.many1.map(_.mkString)
inline def sexp(using Quotes): Parser[Sexp] = Parser.fix: self =>
symbol.map(Sexp.Sym(_))
| token(self).many.between('(', ')').map(Sexp.Seq(_))
@main def runParser3(): Unit =
given compiler: Compiler = Compiler.make(getClass.getClassLoader)
val jsonParser = Parser.compile(json)
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
def loadJsonFile(path: String): String =
val bytes = Files.readAllBytes(Paths.get(path))
new String(bytes, StandardCharsets.UTF_8)
val input = loadJsonFile("25MB.json")
jsonParser(input) match
case Some(json) => println(s"Success!")
case None => println("Failed to parse :(")
val sexpParser = Parser.compile(sexp)
val input2 = """(foo bar (baz (quux) ()))"""
//val deepInput = "(" * 5000 + "target" + ")" * 5000
//val result2 = sexpParser(deepInput)
//def measureDepth(s: Sexp, currentDepth: Int = 0): Int = s match {
// case Sexp.Sym(_) => currentDepth
// case Sexp.Seq(items) =>
// // Since your test case is perfectly nested,
// // we just look at the first item.
// items.headOption match {
// case Some(inner) => measureDepth(inner, currentDepth + 1)
// case None => currentDepth
// }
//}
//result2 match {
// case Some(sexp) =>
// println(s"Success! Total nesting depth: ${measureDepth(sexp)}")
// case None =>
// println("Failed to parse")
//}
//> using scala 3.3.1
//> using dependency org.scala-lang::scala3-staging:3.3.1 org.typelevel::cats-core:2.13.0
package unstaged
import cats.{Alternative, Defer, Eval}
import cats.syntax.all.*
case class Parser[A](run: (String, Int) => Eval[Option[(A, Int)]]):
def parse(input: String): Option[A] =
run(input, 0).value.map(_._1)
object Parser:
// The Defer instance handles the "Knot" for recursion
given Defer[Parser] with
def defer[A](p: => Parser[A]): Parser[A] =
Parser((in, offset) => Eval.defer(p.run(in, offset)))
given Alternative[Parser] with
def pure[A](x: A): Parser[A] =
Parser((_, off) => Eval.now(Some((x, off))))
def empty[A]: Parser[A] =
Parser((_, _) => Eval.now(None))
def combineK[A](x: Parser[A], y: Parser[A]): Parser[A] =
Parser((in, off) => x.run(in, off).flatMap {
case None => y.run(in, off)
case s => Eval.now(s)
})
def ap[A, B](ff: Parser[A => B])(fa: Parser[A]): Parser[B] =
Parser((in, off) => ff.run(in, off).flatMap {
case Some((f, next)) => fa.run(in, next).map(_.map((a, res) => (f(a), res)))
case None => Eval.now(None)
})
def satisfy(p: Char => Boolean): Parser[Char] =
Parser((in, off) => Eval.later {
Option.when(off < in.length && p(in(off)))((in(off), off + 1))
})
def whitespace: Parser[Unit] = Parser.satisfy(_.isWhitespace).many.void
def token[A](p: Parser[A]): Parser[A] = whitespace *> p <* whitespace
def char(c: Char): Parser[Char] = Parser.satisfy(_ == c)
def string(s: String): Parser[String] =
Parser: (in, off) =>
Eval.later {
Option.when(in.startsWith(s, off))((s, off + s.length))
}
import Parser.*
extension [A](p: Parser[A])(using alt: Alternative[Parser], d: Defer[Parser])
def many: Parser[List[A]] =
d.defer(alt.combineK(p.many1, alt.pure(Nil)))
def many1: Parser[List[A]] =
d.defer(p.map2(p.many)(_ :: _))
def |(other: => Parser[A]): Parser[A] =
alt.combineK(p, other)
def ~[B](other: => Parser[B]): Parser[(A, B)] =
Parser: (in, off) =>
p.run(in, off).flatMap:
case Some((a, rem)) =>
// We wrap 'other' in Eval.defer to stay stack-safe
Eval.defer(other.run(in, rem)).map(_.map((b, finalRem) => ((a, b), finalRem)))
case None =>
Eval.now(None)
def sepBy(sep: Parser[?]): Parser[List[A]] =
sepBy1(sep) | alt.pure(List.empty[A])
def sepBy1(sep: Parser[?]): Parser[List[A]] =
(p ~ (sep *> p).many).map(_ :: _)
def between(`open`: Char, close: Char): Parser[A] =
char(`open`) *> token(p) <* char(close)
def optional: Parser[Option[A]] =
Parser: (in, off) =>
p.run(in, off).map:
case Some((a, rem)) => Some(Some(a), rem)
case None => Some(None, off)
def slice: Parser[String] =
Parser: (in, off) =>
p.run(in, off).map(_.map((_, rem) => (in.substring(off, rem), rem)))
enum Json:
case JObject(fields: Map[String, Json])
case JArray(items: List[Json])
case JString(value: String)
case JBoolean(value: Boolean)
case JNumber(value: Double)
case JNull
val quotedString: Parser[String] =
satisfy(_ != '"').many.between('"', '"').slice
val digit: Parser[Char] =
satisfy(c => c >= '0' && c <= '9')
val json: Parser[Json] = Defer[Parser].fix: self =>
val jNull = string("null").map(_ => Json.JNull)
val jBool =
string("true").map(_ => Json.JBoolean(true)) | string("false").map(_ =>
Json.JBoolean(false)
)
val jStr = quotedString.map(Json.JString(_))
val jNum = (char('-').optional ~ digit.many1 ~ (char('.') ~ digit.many1).optional)
.slice.map(_.toDouble).map(Json.JNumber(_))
val jArr =
self.sepBy(token(char(','))).between('[', ']').map(Json.JArray(_))
val field = (quotedString <* token(char(':'))) ~ self
val jObj = field
.sepBy(token(char(',')))
.between('{', '}')
.map(fs => Json.JObject(fs.toMap))
jNull | jBool | jNum | jStr | jArr | jObj
enum Sexp:
case Sym(name: String)
case Seq(items: List[Sexp])
val letter: Parser[Char] =
Parser.satisfy(c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
val symbol: Parser[String] =
letter.many1.map(_.mkString)
val sexp: Parser[Sexp] = Defer[Parser].fix: self =>
symbol.map(Sexp.Sym(_))
| token(self).many.between('(', ')').map(Sexp.Seq(_))
@main def runUnstaged(): Unit =
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
def loadJsonFile(path: String): String =
val bytes = Files.readAllBytes(Paths.get(path))
new String(bytes, StandardCharsets.UTF_8)
val input = loadJsonFile("1MB.json")
json.parse(input) match
case Some(json) => println(s"Success!")
case None => println("Failed to parse :(")
val input2 = """(foo bar (baz (quux) ()))"""
val result2 = sexp.parse(input2)
println(s"Result: $result2")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment