truth-table/parse.lisp

377 lines
15 KiB
Common Lisp
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;; parse.lisp -- Proposition string parsing subroutines
;; Copyright (C) 2024 Alexander Rosenberg
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(in-package :truth-table/base)
(defun whitespace-p (char)
"Return nil unless CHAR is whitespace."
(member char '(#\newline #\space) :test 'eq))
(defun paren-p (char)
"Return nil unless CHAR is a parenthesis."
(member char '(#\( #\))))
(defun delim-p (char)
"Return nil unless CHAR is either whitespace or a parenthesis."
(or (whitespace-p char) (paren-p char)))
(defun symbol-char-p (char)
"Return nil until CHAR is a valid character for use in proposition variable
names."
(or (alpha-char-p char) (eq char #\_) (digit-char-p char)))
(defun replace-in-string (str chars new-char)
"Replace all instances of any of CHARS in STR with NEW-CHAR."
(coerce (loop with lchars = (if (atom chars)
(list chars)
chars)
for char across str
when (member char lchars)
collect new-char
else
collect char)
'string))
(define-condition proposition-parse-error (error)
((position :initarg :position
:accessor parse-error-position
:initform nil)
(proposition :initarg :proposition
:accessor parse-error-proposition
:initform nil)
(message :initarg :message
:accessor parse-error-message
:initform nil))
(:report (lambda (con stream)
(with-slots (position proposition message) con
(format stream
"parse error~@[ at column ~d~]~@[: ~a~]~
~@[:~% ~a~@[~% ~a^~]~]"
(when position
(1+ position))
message
(when proposition
(replace-in-string proposition
'(#\newline #\return)
#\space))
(when position
(make-string position
:initial-element #\space))))))
(:documentation "Condition representing an error during parsing of a
proposition."))
(defparameter *operator-symbol-table*
'((open-paren "(")
(close-paren ")")
(and "/\\" "and" "&&" "&" "∧" ".")
(nand "nand" "↑" "⊼" "~&" "~&&" "!&" "!&&")
(or "\\/" "or" "||" "|" "∥" "+" "")
(nor "nor" "↓" "⊽" "~|" "~||" "!|" "!||")
(xor "xor" "⊕" "⊻" "↮" "≢" "^" "!=")
(not "¬" "~" "!" "not")
(implies "->" ">" "=>" "⇒" "⟹" "→" "⊃" "implies")
(converse "<-" "<" "<=" "←" "⇐" "⟸" "⊂" "converse")
(iff "<->" "<>" "<=>" "⇔" "↔" "≡" "iff" "=" "==" "xnor" "⊙"))
"Alist table of operator symbols and their possible string representations.")
(defun operator-symbol (oper-str)
"Return the symbol for OPER-STR, or nil if it is not a know operator."
(loop for (oper-sym . strs) in *operator-symbol-table*
when (member oper-str strs :test 'equalp)
do (return oper-sym)))
(defun operator-precedence (oper)
"Return the precedence for OPER."
(case oper
(not 1)
(and 2)
(implicit-and 2)
(nand 2)
(xor 3)
(or 4)
(nor 4)
(implies 5)
(converse 5)
(iff 6)
(open-paren most-positive-fixnum)
(t nil)))
(defun operator-argument-count (oper)
"Return the minimum number of arguments that OPER takes as the first value,
and the maximum number (or nil for infinity) as a second value."
(case oper
(and (values 2 nil))
(or (values 2 nil))
(xor (values 2 nil))
(not (values 1 1))
(implies (values 2 2))
(converse (values 2 2))
(iff (values 2 2))
(nand (values 2 nil))
(nor (values 2 2))
(open-paren (values 0 0))
(t (error "unknown operator: ~S" oper))))
(defun unary-p (oper)
"Return whether OPER is a unary operator or not."
(when oper
(= 1 (operator-argument-count oper))))
(defun interpret-operand (oper-str)
"Return a symbol representing OPER-STR, or the string itself if it represents
a variable."
(cond
((member oper-str '("t" "true" "" "1") :test 'equalp)
'true)
((member oper-str '("f" "false" "⊥" "0") :test 'equalp)
'false)
(t
(loop for char across oper-str
unless (symbol-char-p char)
do (return nil)
finally (return oper-str)))))
(defun string-first-char-safe (str)
"Return the first character of STR, or nil if it is empty."
(unless (zerop (length str))
(elt str 0)))
(defun next-symbol-token (str &key multi-char-names)
"Return the next token from STR that is not a paren. If MULTI-CHAR-NAMES is
non-nil, allow names to be more than one character long."
(loop with mode = (if (symbol-char-p (elt str 0))
'alpha
'sym)
for char across str
for chari from 0
while (or (and (eq mode 'alpha) (symbol-char-p char))
(and (eq mode 'sym) (not (symbol-char-p char))
(not (delim-p char))))
collect char into token
finally
(let ((str (coerce token 'string)))
(return
(string
(cond
;; single character unary operand
((let ((val (operator-symbol (string (first token)))))
(and (symbolp val) (unary-p val)))
(first token))
;; multi-char variable, constant (true or false), or operator
((or multi-char-names
(operator-symbol str)
(symbolp (interpret-operand str)))
str)
;; single letter variable (multi-char-names is off)
(t (first token))))))))
(defun next-token (str &key multi-char-names)
"Return a list of the next token in STR and how much whitespace it had."
(let ((whitespace-chars 0))
(loop for char across str
while (whitespace-p char) do
(setq whitespace-chars (1+ whitespace-chars)))
(setq str (subseq str whitespace-chars))
(let ((next-char (string-first-char-safe str)))
(cond
((not next-char)
(list nil whitespace-chars))
((paren-p next-char)
(list (string next-char) whitespace-chars))
(t
(let ((token (next-symbol-token str :multi-char-names multi-char-names)))
(list token whitespace-chars)))))))
(defmacro dotokens ((var pos-var str &optional (retval nil retvalp))
(&key multi-char-names &allow-other-keys)
&body body)
"Execute BODY once with VAR bound to each token in STR. Optionally, return
RETVAL. The position of each token will be stored in POS-VAR. If
MULTI-CHAR-NAMES is enabled, allow multiple characters in variable names."
(let ((stream-var (gensym))
(token-start-var (gensym))
(token-var (gensym))
(read-chars-var (gensym))
(whitespace-var (gensym)))
`(loop for ,stream-var = ,str then (subseq ,str ,read-chars-var)
for (,token-var ,whitespace-var)
= (next-token ,stream-var :multi-char-names ,multi-char-names)
for ,token-start-var = ,whitespace-var
then (+ ,read-chars-var ,whitespace-var)
for ,read-chars-var = (+ ,whitespace-var (length ,token-var))
then (+ ,read-chars-var ,whitespace-var (length ,token-var))
while ,token-var do
(let ((,var ,token-var)
(,pos-var ,token-start-var))
(declare (ignorable ,pos-var))
,@body)
finally
(return ,(when retvalp
retval)))))
(defun interpret-token (token)
"Return a list of the form (type value), where type is one of: `operator' or
`operand', and value is the tokens value, as returned by `operator-symbol' or
`interpret-operand'. If the token is of an unknown type, return a list of (nil
nil)."
(let ((operator-value (operator-symbol token)))
(if operator-value
(list 'operator operator-value)
(let ((operand-value (interpret-operand token)))
(if operand-value
(list 'operand operand-value)
(list nil nil))))))
(defun parse-proposition-string (prop-str &key (implicit-and t) multi-char-names)
"Parse PROP-STR, which is a proposition string.
The return value is the values set of the parsed string, and the list of all
found variables."
(let ((found-vars '())
(operators '())
(operands '())
(paren-operands (list 0)))
(labels ((peek-operator ()
(destructuring-bind (&optional value . pos) (car operators)
(values value pos)))
(pop-operator ()
(destructuring-bind (&optional value . pos) (pop operators)
(values value pos)))
(convert-implicit-and (this-name pos)
(multiple-value-bind (value top-pos) (peek-operator)
(when (eq value 'implicit-and)
(unless implicit-and
(cerror "Insert implicit AND operator"
'proposition-parse-error
:position pos
:proposition prop-str
:message
(format nil "expected binary operator, found ~a"
this-name)))
(pop-operator)
(push (cons 'and top-pos) operators))))
(apply-one-operator ()
(multiple-value-bind (oper pos) (pop-operator)
(when (not oper)
(error 'proposition-parse-error :message "no more operators"
:position pos
:proposition prop-str))
(let ((oper-args (operator-argument-count oper))
(cur-operands (list (pop operands))))
(when (not (car cur-operands))
(error 'proposition-parse-error
:position pos
:proposition prop-str
:message
(format nil "~A expects ~D arguments, found none"
oper oper-args)))
(when (= oper-args 2)
(push (pop operands) cur-operands)
(when (not (car cur-operands))
(error 'proposition-parse-error
:position pos
:proposition prop-str
:message
(format nil "~A expects ~D arguments, found 1"
oper oper-args))))
(push (cons oper cur-operands) operands))))
(apply-lower-precedent (prec)
(loop for top-oper = (peek-operator)
while (and top-oper
(<= (or (operator-precedence top-oper)
most-positive-fixnum)
prec)
(not (unary-p top-oper)))
do (apply-one-operator)))
(apply-all-unary ()
(loop while (unary-p (peek-operator))
do
(apply-one-operator)))
(push-operator (oper token-pos)
(apply-lower-precedent (operator-precedence oper))
(push (cons oper token-pos) operators)))
(dotokens (token-str token-pos prop-str)
(:multi-char-names multi-char-names)
(destructuring-bind (type value) (interpret-token token-str)
(cond
;; unknown token
((not type)
(error 'proposition-parse-error
:position token-pos
:proposition prop-str
:message "unknown token"))
;; operand
((eq type 'operand)
(convert-implicit-and "operand" token-pos)
(unless (member value '(true false))
(pushnew value found-vars :test 'equal))
(push value operands)
(incf (car paren-operands))
(apply-all-unary)
(convert-implicit-and "operand" token-pos)
(push-operator 'implicit-and token-pos))
;; open parenthesis
((eq value 'open-paren)
(convert-implicit-and "open parenthesis" token-pos)
(push (cons value token-pos) operators)
(push 0 paren-operands))
;; close parenthesis
((eq value 'close-paren)
(when (eq (peek-operator) 'implicit-and)
(pop-operator))
(loop while (not (eq (peek-operator) 'open-paren))
when (null operators) do
(error 'proposition-parse-error
:position token-pos
:proposition prop-str
:message "no matching open parenthesis")
do (apply-one-operator))
(when (zerop (pop paren-operands))
(error 'proposition-parse-error
;; open paren position
:position (cdr (pop operators))
:proposition prop-str
:message "empty parenthesis"))
;; remove open paren
(pop-operator)
(apply-all-unary)
(convert-implicit-and "close parenthesis" token-pos)
(push-operator 'implicit-and token-pos))
;; operator
(t
(if (eq (peek-operator) 'implicit-and)
(if (unary-p value)
(convert-implicit-and "unary operator" token-pos)
(pop-operator))
(unless (unary-p value)
(error 'proposition-parse-error
:position token-pos
:proposition prop-str
:message "expected operand, found operator")))
(push-operator value token-pos)))))
;; remove implicit-and
(when (eq 'implicit-and (peek-operator))
(pop-operator))
(loop for (top-oper . top-pos) = (car operators)
while top-oper
when (eq top-oper 'open-paren) do
(error 'proposition-parse-error
:message "no matching closing parenthesis"
:proposition prop-str
:position top-pos)
do
(apply-one-operator))
;; return variables in the order we found them
(values (car operands) (nreverse found-vars)))))