Last active
March 11, 2026 15:02
-
-
Save moleike/d765d6be3d338a5f6d989d8d679cadde to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.1 | |
| //> using files staged.scala unstaged.scala | |
| //> using dependency org.scala-lang::scala3-staging:3.3.1 org.typelevel::cats-core:2.13.0 | |
| //> using jmh | |
| package benchmarks | |
| import org.openjdk.jmh.annotations.* | |
| import java.util.concurrent.TimeUnit | |
| import scala.quoted.staging.* | |
| import org.openjdk.jmh.infra.Blackhole | |
| import java.nio.file.{Files, Paths} | |
| import java.nio.charset.StandardCharsets | |
| def loadJsonFile(path: String): String = | |
| val bytes = Files.readAllBytes(Paths.get(path)) | |
| new String(bytes, StandardCharsets.UTF_8) | |
| @State(Scope.Benchmark) | |
| @BenchmarkMode(Array(Mode.AverageTime)) | |
| @OutputTimeUnit(TimeUnit.MICROSECONDS) | |
| class ParserBench: | |
| @Param(Array("1MB.json", "5MB.json")) | |
| var fileName: String = _ | |
| var input: String = _ | |
| var stagedParse: String => Option[staged3.Json] = _ | |
| @Setup | |
| def setup(): Unit = | |
| given compiler: Compiler = Compiler.make(getClass.getClassLoader) | |
| input = loadJsonFile(fileName) | |
| stagedParse = staged3.Parser.compile(staged3.json) | |
| //@Benchmark | |
| //def baseline(blackhole: Blackhole): Unit = | |
| // var i = 0; while(i < input.length) { blackhole.consume(input.charAt(i)); i += 1 } | |
| @Benchmark | |
| def benchStaged(blackhole: Blackhole): Unit = | |
| blackhole.consume(stagedParse(input)) | |
| @Benchmark | |
| def benchUnstaged(blackhole: Blackhole): Unit = | |
| blackhole.consume(unstaged.json.parse(input)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.1 | |
| //> using dependency org.scala-lang::scala3-staging:3.3.1 | |
| package staged | |
| import scala.quoted.* | |
| import scala.quoted.staging.* | |
| import scala.util.control.TailCalls.{tailcall, done, TailRec} | |
| type Res = TailRec[Option[(Any, Int)]] | |
| type Succ[A] = (Expr[A], Expr[Int]) => Expr[Res] | |
| type Fail = () => Expr[Res] | |
| case class Parser[A: Type]( | |
| generate: (Expr[String], Expr[Int], Succ[A], Fail) => Expr[Res] | |
| ): | |
| def ~[B: Type](that: Parser[B])(using Quotes): Parser[(A, B)] = | |
| Parser: (in, off, succ, fail) => | |
| this.generate(in, off, | |
| (a, off1) => that.generate(in, off1, | |
| (b, off2) => succ('{ ($a, $b) }, off2), | |
| fail | |
| ), | |
| fail | |
| ) | |
| def |(that: Parser[A]): Parser[A] = | |
| Parser: (in, off, succ, fail) => | |
| this.generate(in, off, succ, () => that.generate(in, off, succ, fail)) | |
| inline def map[B: Type](inline f: A => B)(using Quotes): Parser[B] = | |
| Parser: (in, off, succ, fail) => | |
| this.generate(in, off, | |
| (a, nextOff) => succ(Expr.betaReduce('{ f($a) }), nextOff), | |
| fail | |
| ) | |
| object Parser: | |
| inline def pure[A: Type](inline value: A)(using Quotes): Parser[A] = | |
| Parser: (in, off, succ, fail) => | |
| succ('{ value }, off) | |
| inline def satisfy(inline f: Char => Boolean)(using Quotes): Parser[Char] = | |
| Parser: (in, off, succ, fail) => | |
| '{ | |
| if ($off < $in.length && ${Expr.betaReduce('{ f($in.charAt($off)) })}) | |
| ${ succ('{$in.charAt($off)}, '{$off + 1}) } | |
| else | |
| ${ fail() } | |
| } | |
| inline def char(inline c: Char)(using Quotes): Parser[Char] = | |
| satisfy(_ == c) | |
| def whitespace(using Quotes): Parser[Unit] = | |
| satisfy(_.isWhitespace).many.map(_ => ()) | |
| def token[A: Type](p: Parser[A])(using Quotes): Parser[A] = | |
| whitespace *> p <* whitespace | |
| def string(s: String)(using Quotes): Parser[String] = | |
| Parser: (in, off, succ, fail) => | |
| '{ | |
| if ($in.startsWith(${ Expr(s) }, $off)) | |
| ${ succ(Expr(s), '{$off + ${ Expr(s.length) }}) } | |
| else | |
| ${ fail() } | |
| } | |
| def fix[A: Type](f: Parser[A] => Parser[A])(using Quotes): Parser[A] = | |
| Parser: (in, off, succ, fail) => | |
| '{ | |
| def rec(currOff: Int): Res = tailcall { | |
| ${ | |
| val stub = Parser[A]: (sIn, sOff, sSucc, sFail) => | |
| '{ tailcall(rec($sOff)).flatMap { | |
| case Some((res, next)) => | |
| tailcall { ${ sSucc('{res.asInstanceOf[A]}, '{next}) } } | |
| case None => | |
| tailcall { ${ sFail() } } | |
| }} | |
| f(stub).generate(in, 'currOff, | |
| (a, next) => '{ done(Some(($a, $next))) }, | |
| () => '{ done(None) }) | |
| } | |
| } | |
| rec($off).flatMap { | |
| case Some((res, next)) => ${ succ('{res.asInstanceOf[A]}, '{next}) } | |
| case None => ${ fail() } | |
| } | |
| } | |
| inline def compile[T](build: Quotes ?=> Parser[T])(using Compiler): String => Option[T] = | |
| staging.run { (q: Quotes) ?=> | |
| '{ (in: String) => | |
| ${ | |
| val parser = build | |
| parser.generate( | |
| 'in, | |
| '{0}, | |
| (res, outOff) => '{ done(Some(($res, $outOff))) }, | |
| () => '{ done(None) } | |
| ) | |
| }.result match { | |
| case Some((res, _)) => Some(res.asInstanceOf[T]) | |
| case None => None | |
| } | |
| } | |
| } | |
| import Parser.* | |
| extension [A: Type](p: Parser[A])(using Quotes) | |
| def *>[B: Type](other: Parser[B]): Parser[B] = | |
| (p ~ other).map { case (a, b) => b } | |
| def <*[B: Type](other: Parser[B]): Parser[A] = | |
| (p ~ other).map { case (a, b) => a } | |
| def many: Parser[List[A]] = | |
| Parser: (in, off, succ, fail) => | |
| '{ | |
| val buf = scala.collection.mutable.ListBuffer[A]() | |
| def loop(currentOff: Int): Res = tailcall { | |
| ${ p.generate(in, 'currentOff, | |
| (a, next) => '{ | |
| buf += $a | |
| loop($next) | |
| }, | |
| () => succ('{buf.toList}, 'currentOff) | |
| )} | |
| } | |
| loop($off) | |
| } | |
| def many1: Parser[List[A]] = | |
| (p ~ p.many).map(_ :: _) | |
| def sepBy[B: Type](sep: Parser[B]): Parser[List[A]] = | |
| (p ~ (sep *> p).many).map(_ :: _) | Parser.pure(Nil) | |
| def optional: Parser[Option[A]] = | |
| Parser: (in, off, succ, fail) => | |
| p.generate(in, off, | |
| (a, next) => succ('{ Some($a) }, next), | |
| () => succ('{ None }, off) | |
| ) | |
| def slice: Parser[String] = | |
| Parser: (in, off, succ, fail) => | |
| p.generate(in, off, | |
| (_, next) => succ('{ $in.substring($off, $next) }, next), | |
| fail | |
| ) | |
| inline def between(inline `open`: Char, inline close: Char): Parser[A] = | |
| token(char(`open`)) *> p <* token(char(close)) | |
| enum Json: | |
| case JObject(fields: Map[String, Json]) | |
| case JArray(items: List[Json]) | |
| case JString(value: String) | |
| case JBoolean(value: Boolean) | |
| case JNumber(value: Double) | |
| case JNull | |
| inline def quotedString(using Quotes): Parser[String] = | |
| satisfy(_ != '"').many.between('"', '"').slice | |
| inline def digit(using Quotes): Parser[Char] = | |
| satisfy(c => c >= '0' && c <= '9') | |
| inline def json(using Quotes): Parser[Json] = fix: self => | |
| val jNull = string("null").map(_ => Json.JNull) | |
| val jBool = | |
| string("true").map(_ => Json.JBoolean(true)) | string("false").map(_ => | |
| Json.JBoolean(false) | |
| ) | |
| val jStr = quotedString.map(Json.JString(_)) | |
| val jNum = (char('-').optional ~ digit.many1 ~ (char('.') ~ digit.many1).optional) | |
| .slice.map(_.toDouble).map(Json.JNumber(_)) | |
| val jArr = | |
| self.sepBy(token(char(','))).between('[', ']').map(Json.JArray(_)) | |
| val field = (quotedString <* token(char(':'))) ~ self | |
| val jObj = field | |
| .sepBy(token(char(','))) | |
| .between('{', '}') | |
| .map(fs => Json.JObject(fs.toMap)) | |
| jNull | jBool | jNum | jStr | jArr | jObj | |
| enum Sexp: | |
| case Sym(name: String) | |
| case Seq(items: List[Sexp]) | |
| def letter(using Quotes): Parser[Char] = | |
| Parser.satisfy(c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) | |
| def symbol(using Quotes): Parser[String] = | |
| letter.many1.map(_.mkString) | |
| inline def sexp(using Quotes): Parser[Sexp] = Parser.fix: self => | |
| symbol.map(Sexp.Sym(_)) | |
| | token(self).many.between('(', ')').map(Sexp.Seq(_)) | |
| @main def runParser3(): Unit = | |
| given compiler: Compiler = Compiler.make(getClass.getClassLoader) | |
| val jsonParser = Parser.compile(json) | |
| import java.nio.file.{Files, Paths} | |
| import java.nio.charset.StandardCharsets | |
| def loadJsonFile(path: String): String = | |
| val bytes = Files.readAllBytes(Paths.get(path)) | |
| new String(bytes, StandardCharsets.UTF_8) | |
| val input = loadJsonFile("25MB.json") | |
| jsonParser(input) match | |
| case Some(json) => println(s"Success!") | |
| case None => println("Failed to parse :(") | |
| val sexpParser = Parser.compile(sexp) | |
| val input2 = """(foo bar (baz (quux) ()))""" | |
| //val deepInput = "(" * 5000 + "target" + ")" * 5000 | |
| //val result2 = sexpParser(deepInput) | |
| //def measureDepth(s: Sexp, currentDepth: Int = 0): Int = s match { | |
| // case Sexp.Sym(_) => currentDepth | |
| // case Sexp.Seq(items) => | |
| // // Since your test case is perfectly nested, | |
| // // we just look at the first item. | |
| // items.headOption match { | |
| // case Some(inner) => measureDepth(inner, currentDepth + 1) | |
| // case None => currentDepth | |
| // } | |
| //} | |
| //result2 match { | |
| // case Some(sexp) => | |
| // println(s"Success! Total nesting depth: ${measureDepth(sexp)}") | |
| // case None => | |
| // println("Failed to parse") | |
| //} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.1 | |
| //> using dependency org.scala-lang::scala3-staging:3.3.1 org.typelevel::cats-core:2.13.0 | |
| package unstaged | |
| import cats.{Alternative, Defer, Eval} | |
| import cats.syntax.all.* | |
| case class Parser[A](run: (String, Int) => Eval[Option[(A, Int)]]): | |
| def parse(input: String): Option[A] = | |
| run(input, 0).value.map(_._1) | |
| object Parser: | |
| // The Defer instance handles the "Knot" for recursion | |
| given Defer[Parser] with | |
| def defer[A](p: => Parser[A]): Parser[A] = | |
| Parser((in, offset) => Eval.defer(p.run(in, offset))) | |
| given Alternative[Parser] with | |
| def pure[A](x: A): Parser[A] = | |
| Parser((_, off) => Eval.now(Some((x, off)))) | |
| def empty[A]: Parser[A] = | |
| Parser((_, _) => Eval.now(None)) | |
| def combineK[A](x: Parser[A], y: Parser[A]): Parser[A] = | |
| Parser((in, off) => x.run(in, off).flatMap { | |
| case None => y.run(in, off) | |
| case s => Eval.now(s) | |
| }) | |
| def ap[A, B](ff: Parser[A => B])(fa: Parser[A]): Parser[B] = | |
| Parser((in, off) => ff.run(in, off).flatMap { | |
| case Some((f, next)) => fa.run(in, next).map(_.map((a, res) => (f(a), res))) | |
| case None => Eval.now(None) | |
| }) | |
| def satisfy(p: Char => Boolean): Parser[Char] = | |
| Parser((in, off) => Eval.later { | |
| Option.when(off < in.length && p(in(off)))((in(off), off + 1)) | |
| }) | |
| def whitespace: Parser[Unit] = Parser.satisfy(_.isWhitespace).many.void | |
| def token[A](p: Parser[A]): Parser[A] = whitespace *> p <* whitespace | |
| def char(c: Char): Parser[Char] = Parser.satisfy(_ == c) | |
| def string(s: String): Parser[String] = | |
| Parser: (in, off) => | |
| Eval.later { | |
| Option.when(in.startsWith(s, off))((s, off + s.length)) | |
| } | |
| import Parser.* | |
| extension [A](p: Parser[A])(using alt: Alternative[Parser], d: Defer[Parser]) | |
| def many: Parser[List[A]] = | |
| d.defer(alt.combineK(p.many1, alt.pure(Nil))) | |
| def many1: Parser[List[A]] = | |
| d.defer(p.map2(p.many)(_ :: _)) | |
| def |(other: => Parser[A]): Parser[A] = | |
| alt.combineK(p, other) | |
| def ~[B](other: => Parser[B]): Parser[(A, B)] = | |
| Parser: (in, off) => | |
| p.run(in, off).flatMap: | |
| case Some((a, rem)) => | |
| // We wrap 'other' in Eval.defer to stay stack-safe | |
| Eval.defer(other.run(in, rem)).map(_.map((b, finalRem) => ((a, b), finalRem))) | |
| case None => | |
| Eval.now(None) | |
| def sepBy(sep: Parser[?]): Parser[List[A]] = | |
| sepBy1(sep) | alt.pure(List.empty[A]) | |
| def sepBy1(sep: Parser[?]): Parser[List[A]] = | |
| (p ~ (sep *> p).many).map(_ :: _) | |
| def between(`open`: Char, close: Char): Parser[A] = | |
| char(`open`) *> token(p) <* char(close) | |
| def optional: Parser[Option[A]] = | |
| Parser: (in, off) => | |
| p.run(in, off).map: | |
| case Some((a, rem)) => Some(Some(a), rem) | |
| case None => Some(None, off) | |
| def slice: Parser[String] = | |
| Parser: (in, off) => | |
| p.run(in, off).map(_.map((_, rem) => (in.substring(off, rem), rem))) | |
| enum Json: | |
| case JObject(fields: Map[String, Json]) | |
| case JArray(items: List[Json]) | |
| case JString(value: String) | |
| case JBoolean(value: Boolean) | |
| case JNumber(value: Double) | |
| case JNull | |
| val quotedString: Parser[String] = | |
| satisfy(_ != '"').many.between('"', '"').slice | |
| val digit: Parser[Char] = | |
| satisfy(c => c >= '0' && c <= '9') | |
| val json: Parser[Json] = Defer[Parser].fix: self => | |
| val jNull = string("null").map(_ => Json.JNull) | |
| val jBool = | |
| string("true").map(_ => Json.JBoolean(true)) | string("false").map(_ => | |
| Json.JBoolean(false) | |
| ) | |
| val jStr = quotedString.map(Json.JString(_)) | |
| val jNum = (char('-').optional ~ digit.many1 ~ (char('.') ~ digit.many1).optional) | |
| .slice.map(_.toDouble).map(Json.JNumber(_)) | |
| val jArr = | |
| self.sepBy(token(char(','))).between('[', ']').map(Json.JArray(_)) | |
| val field = (quotedString <* token(char(':'))) ~ self | |
| val jObj = field | |
| .sepBy(token(char(','))) | |
| .between('{', '}') | |
| .map(fs => Json.JObject(fs.toMap)) | |
| jNull | jBool | jNum | jStr | jArr | jObj | |
| enum Sexp: | |
| case Sym(name: String) | |
| case Seq(items: List[Sexp]) | |
| val letter: Parser[Char] = | |
| Parser.satisfy(c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) | |
| val symbol: Parser[String] = | |
| letter.many1.map(_.mkString) | |
| val sexp: Parser[Sexp] = Defer[Parser].fix: self => | |
| symbol.map(Sexp.Sym(_)) | |
| | token(self).many.between('(', ')').map(Sexp.Seq(_)) | |
| @main def runUnstaged(): Unit = | |
| import java.nio.file.{Files, Paths} | |
| import java.nio.charset.StandardCharsets | |
| def loadJsonFile(path: String): String = | |
| val bytes = Files.readAllBytes(Paths.get(path)) | |
| new String(bytes, StandardCharsets.UTF_8) | |
| val input = loadJsonFile("1MB.json") | |
| json.parse(input) match | |
| case Some(json) => println(s"Success!") | |
| case None => println("Failed to parse :(") | |
| val input2 = """(foo bar (baz (quux) ()))""" | |
| val result2 = sexp.parse(input2) | |
| println(s"Result: $result2") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment