Last active
August 4, 2025 14:12
-
-
Save StarSugar/62430df44ea6a23f7e2df16beba914b9 to your computer and use it in GitHub Desktop.
(Non-Pure) Functional Parsing Expression Grammar (PEG)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #| | |
| Copyright © 2025 Tsing | |
| Permission is hereby granted, free of charge, to any person obtaining a copy | |
| of this software and associated documentation files (the “Software”), to deal | |
| in the Software without restriction, including without limitation the rights | |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| copies of the Software, and to permit persons to whom the Software is | |
| furnished to do so, subject to the following conditions: | |
| The above copyright notice and this permission notice shall be included in | |
| all copies or substantial portions of the Software. | |
| THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| SOFTWARE. | |
| |# | |
| (defpackage :pegfn | |
| (:use :cl) | |
| (:export match-fail mf-pos ref litfn []fn .fn seqfn /fn ?fn *fn +fn !fn &fn | |
| match/parse) | |
| (:documentation | |
| "(Non-Pure) Functional Parsing Expression Grammar (PEG) | |
| You'd better learn the PEG firstly, before trying to read this docstring. | |
| INTRODUCTION | |
| For example, for the PEG that recognizes mathematical formulas, from wikipedia | |
| PEG page, | |
| Expr ← Sum | |
| Sum ← Product (('+' / '-') Product)* | |
| Product ← Power (('*' / '/') Power)* | |
| Power ← Value ('^' Power)? | |
| Value ← [0-9]+ / '(' Expr ')' | |
| can be represent in lisp like | |
| (defun ign (&rest _) | |
| ;; ignore all arguments | |
| (declare (ignore _)) | |
| (values)) | |
| (defvar expr t) (defvar sum t) (defvar product t) (defvar power t) | |
| (defvar value t) | |
| ;; the (ref var) below is used for recursion | |
| (setf expr (ref sum)) | |
| (setf sum (seqfn #'ign (ref product) (*fn #'ign (seqfn #'ign ([]fn #'ign \"+-\") (ref product))))) | |
| (setf product (seqfn #'ign (ref power) (*fn #'ign (seqfn #'ign ([]fn #'ign \"*/\") (ref power))))) | |
| (setf power (seqfn #'ign (ref value) (?fn #'ign (litfn #'ign \"^\") (ref power)))) | |
| (setf value (/fn #'ign (+fn #'ign ([]fn #'ign '(#\0 . #\9))) (seqfn #'ign (litfn #'ign \"(\") (ref value) (litfn #'ign \")\")))) | |
| Which can be used as: | |
| (match expr (make-string-input-stream \"123+456\")) | |
| You may notice that there are abundants of `ign', which are called `usefn', used | |
| to handler the matched results. | |
| The match results are passed to `usefn' functions as arguments, and the result | |
| of `usefn' are returned by the corresponding parsing function. | |
| For example, (litfn #'parse-integer \"123\") will try to match string \"123\" | |
| firstly, and return (parse-integer \"123\") if it is matched. | |
| There are also some functions pass multiple match results as multiple arguments | |
| to the `usefn', such as `seqfn'. | |
| It is easy to collect the `seqfn' results, there is an expample: | |
| (seqfn #'list (litfn #'values \"123\") (litfn #'values \"abc\")) ; | |
| this will try to match 123 then abc firstly, and collect them by list secondly, | |
| and finally, the result is (\"123\" \"abc\"). | |
| CONDITIONS | |
| Every function this package exported may signal a condition called match-fail. | |
| SEE ALSO | |
| see docstring of each functions.")) | |
| (in-package :pegfn) | |
| (defmacro handler-case* (expr clear &body body) | |
| "Like handler-case, but runs cleanup code before condition handling. Evaluates | |
| `expr' and executes `clear' when any condition (error or non-error) is signaled | |
| during evaluation. After cleanup, the condition is handling by the `body' | |
| handlers." | |
| (let ((var (gensym))) | |
| `(handler-case | |
| (progn | |
| (handler-case ,expr | |
| (t (,var) | |
| ,clear | |
| (signal ,var)))) | |
| ,@body))) | |
| (defmacro for/map ((var seq &optional (res-type 'list)) &body form) | |
| `(map ',res-type (lambda (,var) ,@form) ,seq)) | |
| (defun discard (&rest vals) | |
| (declare (ignore vals)) | |
| t) | |
| (define-condition match-fail () | |
| ((pos :accessor mf-pos | |
| :initarg :pos))) | |
| (defmacro ref (x) | |
| "use (ref var) to reference variable, this is useful for recursion" | |
| (let ((stream (make-symbol "STREAM"))) | |
| `(lambda (,stream) (funcall ,x ,stream)))) | |
| (defun litfn (usefn str) | |
| "litteral string, like \"\" and '' in original paper | |
| `usefn' consumes a string" | |
| (let ((len (length str))) | |
| (lambda (stream) | |
| (let ((p (file-position stream))) | |
| (labels ((read-chars () | |
| (let* ((buf (make-string len)) | |
| (readed (read-sequence buf stream))) | |
| (cond ((< readed len) | |
| (file-position stream p) | |
| (signal 'match-fail :pos p)) | |
| (t buf))))) | |
| (let ((readed (read-chars))) | |
| (if (string-equal str readed) | |
| (funcall usefn readed) | |
| (signal 'match-fail :pos p)))))))) | |
| (defun []fn (usefn &rest sets) | |
| "character class, like [] in original paper | |
| `usefn' consumes a character | |
| [abc] represent like ([]fn usefn #\a #\b #\c) or ([]fn usefn \"abc\") | |
| [a-z] represent like ([]fn usefn '(#\a . #\z)) | |
| " | |
| (let ((tb (make-hash-table))) | |
| (dolist (set sets) | |
| (etypecase set | |
| (character (setf (gethash set tb) t)) | |
| (string (for/map (ch set nil) | |
| (setf (gethash ch tb) t))) | |
| (cons (let* ((l (car set)) (r (cdr set)) | |
| (lc (char-code l)) (rc (char-code r))) | |
| (unless (and (characterp l) (characterp r) | |
| (char< l r)) | |
| (error "bad character class")) | |
| (loop for i from lc to rc do | |
| (setf (gethash (code-char i) tb) t)))))) | |
| (lambda (stream) | |
| (let ((p (file-position stream))) | |
| (handler-case* | |
| (let ((ch (peek-char nil stream))) | |
| (if (nth-value 1 (gethash ch tb)) | |
| (funcall usefn (read-char stream)) | |
| (signal 'match-fail :pos p))) | |
| (file-position stream p) | |
| (end-of-file () (signal 'match-fail :pos p))))))) | |
| (defun .fn (usefn) | |
| "any charater, like . in original paper | |
| `usefn' consumes a character" | |
| (lambda (stream) | |
| (handler-case (funcall usefn (read-char stream)) | |
| (end-of-file () (signal 'match-fail :pos (file-position stream)))))) | |
| (defun seqfn (usefn &rest fns) | |
| "sequence or grouping, e or (e) in original paper | |
| `usefn' consumes as many as arguments except `usefn' | |
| there should be at least subpattern function in seqfn" | |
| (when (= 0 (length fns)) | |
| (error "there should be at least one subpattern in seqfn")) | |
| (lambda (stream) | |
| (let ((p (file-position stream))) | |
| (handler-case* | |
| (apply usefn | |
| (for/map (f fns) | |
| (funcall f stream))) | |
| (file-position stream p))))) | |
| (defun /fn (usefn &rest fns) | |
| "prioritized choice, e1 / e2 in original paper, | |
| `usefn' consumes a value" | |
| (labels ((build (fns) | |
| (cond ((null fns) | |
| (lambda (stream) | |
| (signal 'match-fail :pos (file-position stream)))) | |
| (t | |
| (let ((f (car fns)) | |
| (f* (build (cdr fns)))) | |
| (lambda (stream) | |
| (let ((p (file-position stream))) | |
| (handler-case* | |
| (funcall f stream) | |
| (file-position stream p) | |
| (match-fail () (funcall f* stream)))))))))) | |
| (let ((f (build fns))) | |
| (lambda (stream) | |
| (funcall usefn (funcall f stream)))))) | |
| (defun ?fn (usefn &rest fns) | |
| "optional, compose with seq, (e)? in original paper language | |
| `usefn' consumes as many as arguments except `usefn' if matched, otherwise | |
| `usefn' consumes nothing" | |
| (let ((seqfn (apply #'seqfn usefn fns))) | |
| (lambda (stream) | |
| (let ((p (file-position stream))) | |
| (handler-case (funcall seqfn stream) | |
| (match-fail () | |
| (file-position stream p) | |
| (funcall usefn))))))) | |
| (defun *fn (usefn fn) | |
| "zero or more, e* in original paper | |
| `usefn' consumes all matched result, uncertain number of values" | |
| (lambda (stream) | |
| (loop named the-loop | |
| with res = (cons nil nil) | |
| for p = (file-position stream) | |
| as cur = res then (cdr cur) do | |
| (handler-case* | |
| (let ((val (funcall fn stream))) | |
| (setf (cdr cur) (cons val nil))) | |
| (progn | |
| (file-position stream p) | |
| (return-from the-loop (apply usefn (cdr res)))))))) | |
| (defun +fn (usefn fn) | |
| "one or more, e+ in original paper | |
| `usefn' consumes all matched result, uncertain number of values" | |
| (lambda (stream) | |
| (apply usefn (funcall (seqfn #'cons fn (*fn #'list fn)) stream)))) | |
| (defun !fn (&rest fns) | |
| "not predicate, !e in original paper, no `usefn'" | |
| (let ((seqfn (apply #'seqfn #'discard fns))) | |
| (lambda (stream) | |
| (prog ((p (file-position stream))) | |
| (unwind-protect | |
| (handler-case | |
| (progn | |
| (funcall seqfn stream) | |
| (go no-fail)) | |
| (match-fail () (return t))) | |
| (file-position stream p)) | |
| no-fail | |
| (signal 'match-fail :pos p))))) | |
| (defun &fn (&rest fns) | |
| "and predicate, &e in original paper, no `usefn'" | |
| (!fn (apply #'!fn fns))) | |
| (defun match/parse (stream rule &optional (errorp t)) | |
| "a simple wrapper for match and parse" | |
| (handler-case (funcall rule stream) | |
| (match-fail (e) | |
| (if errorp | |
| (error "match/parse fail at ~A" (mf-pos e))) | |
| (signal e)))) | |
| ;; tests | |
| #| | |
| (defvar expr t) (defvar sum t) (defvar product t) (defvar power t) | |
| (defvar value t) | |
| (defun ign (&rest _) | |
| ;; ignore all arguments | |
| (declare (ignore _)) | |
| (values)) | |
| (setf expr (ref sum)) | |
| (setf sum (seqfn (lambda (x ys) | |
| `(+ ,x ,@ys)) | |
| (ref product) | |
| (*fn #'list | |
| (seqfn (lambda (sign y) | |
| (if (equal sign #\-) | |
| `(- ,y) | |
| y)) | |
| ([]fn #'values "+-") | |
| (ref product))))) | |
| (setf product (seqfn (lambda (x ys) | |
| `(* ,x ,@ys)) | |
| (ref power) | |
| (*fn #'list | |
| (seqfn (lambda (sign y) | |
| (if (equal sign #\/) | |
| `(/ 1 ,y) | |
| y)) | |
| ([]fn #'values "*/") (ref power))))) | |
| (setf power (seqfn (lambda (base index) | |
| `(expt ,base ,index)) | |
| (ref value) | |
| (?fn (lambda (&rest xs) | |
| (if (null xs) | |
| 1 | |
| (second xs))) | |
| (litfn #'values "^") (ref power)))) | |
| (setf value (/fn #'values (+fn (lambda (&rest xs) | |
| (parse-integer (coerce xs 'string))) | |
| ([]fn #'values '(#\0 . #\9))) | |
| (seqfn (lambda (&rest xs) (second xs)) | |
| (litfn #'values "(") (ref expr) (litfn #'values ")")))) | |
| (match/parse (make-string-input-stream "123/2^(456+1)+2*2-3/2") | |
| expr) | |
| |# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment