337 lines
13 KiB
Common Lisp
337 lines
13 KiB
Common Lisp
|
;; parse.lisp -- Proposition string parsing subroutines
|
|||
|
;; Copyright (C) 2024 Alexander Rosenberg
|
|||
|
;;
|
|||
|
;; This program is free software: you can redistribute it and/or modify
|
|||
|
;; it under the terms of the GNU General Public License as published by
|
|||
|
;; the Free Software Foundation, either version 3 of the License, or
|
|||
|
;; (at your option) any later version.
|
|||
|
;;
|
|||
|
;; This program is distributed in the hope that it will be useful,
|
|||
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
;; GNU General Public License for more details.
|
|||
|
;;
|
|||
|
;; You should have received a copy of the GNU General Public License
|
|||
|
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|||
|
(in-package :truth-table/base)
|
|||
|
|
|||
|
(defun whitespace-p (char)
|
|||
|
"Return nil unless CHAR is whitespace."
|
|||
|
(member char '(#\newline #\space) :test 'eq))
|
|||
|
|
|||
|
(defun paren-p (char)
|
|||
|
"Return nil unless CHAR is a parenthesis."
|
|||
|
(member char '(#\( #\))))
|
|||
|
|
|||
|
(defun delim-p (char)
|
|||
|
"Return nil unless CHAR is either whitespace or a parenthesis."
|
|||
|
(or (whitespace-p char) (paren-p char)))
|
|||
|
|
|||
|
(defun symbol-char-p (char)
|
|||
|
"Return nil until CHAR is a valid character for use in proposition variable
|
|||
|
names."
|
|||
|
(or (alpha-char-p char) (eq char #\_) (digit-char-p char)))
|
|||
|
|
|||
|
(defun replace-in-string (str chars new-char)
|
|||
|
"Replace all instances of any of CHARS in STR with NEW-CHAR."
|
|||
|
(coerce (loop with lchars = (if (atom chars)
|
|||
|
(list chars)
|
|||
|
chars)
|
|||
|
for char across str
|
|||
|
when (member char lchars)
|
|||
|
collect new-char
|
|||
|
else
|
|||
|
collect char)
|
|||
|
'string))
|
|||
|
|
|||
|
(define-condition proposition-parse-error (error)
|
|||
|
((position :initarg :position
|
|||
|
:accessor parse-error-position
|
|||
|
:initform nil)
|
|||
|
(proposition :initarg :proposition
|
|||
|
:accessor parse-error-proposition
|
|||
|
:initform nil)
|
|||
|
(message :initarg :message
|
|||
|
:accessor parse-error-message
|
|||
|
:initform nil))
|
|||
|
(:report (lambda (con stream)
|
|||
|
(with-slots (position proposition message) con
|
|||
|
(format stream
|
|||
|
"parse error~@[ at column ~d~]~@[: ~a~]~
|
|||
|
~@[:~% ~a~@[~% ~a^~]~]"
|
|||
|
(when position
|
|||
|
(1+ position))
|
|||
|
message
|
|||
|
(when proposition
|
|||
|
(replace-in-string proposition
|
|||
|
'(#\newline #\return)
|
|||
|
#\space))
|
|||
|
(when position
|
|||
|
(make-string position
|
|||
|
:initial-element #\space))))))
|
|||
|
(:documentation "Condition representing an error during parsing of a
|
|||
|
proposition."))
|
|||
|
|
|||
|
(defparameter *operator-symbol-table*
|
|||
|
'((open-paren "(")
|
|||
|
(close-paren ")")
|
|||
|
(and "/\\" "and" "&&" "&" "∧" ".")
|
|||
|
(nand "nand" "↑" "⊼")
|
|||
|
(or "\\/" "or" "||" "|" "∥" "+" "∨")
|
|||
|
(nor "nor" "↓" "⊽")
|
|||
|
(xor "xor" "⊕" "⊻" "↮" "≢" "^" "!=")
|
|||
|
(not "¬" "~" "!" "not")
|
|||
|
(implies "->" ">" "=>" "⇒" "⟹" "→" "⊃" "implies")
|
|||
|
(converse "<-" "<" "<=" "←" "⇐" "⟸" "⊂" "converse")
|
|||
|
(iff "<->" "<>" "<=>" "⇔" "↔" "≡" "iff" "=" "==" "xnor" "⊙"))
|
|||
|
"Alist table of operator symbols and their possible string representations.")
|
|||
|
|
|||
|
(defun operator-symbol (oper-str)
|
|||
|
"Return the symbol for OPER-STR, or nil if it is not a know operator."
|
|||
|
(loop for (oper-sym . strs) in *operator-symbol-table*
|
|||
|
when (member oper-str strs :test 'equalp)
|
|||
|
do (return oper-sym)))
|
|||
|
|
|||
|
(defun operator-precedence (oper)
|
|||
|
"Return the precedence for OPER."
|
|||
|
(case oper
|
|||
|
(not 1)
|
|||
|
(and 2)
|
|||
|
(nand 2)
|
|||
|
(xor 3)
|
|||
|
(or 4)
|
|||
|
(nor 4)
|
|||
|
(implies 5)
|
|||
|
(converse 5)
|
|||
|
(iff 6)
|
|||
|
(open-paren most-positive-fixnum)
|
|||
|
(t nil)))
|
|||
|
|
|||
|
(defun interpret-operand (oper-str)
|
|||
|
"Return a symbol representing OPER-STR, or the string itself if it represents
|
|||
|
a variable."
|
|||
|
(cond
|
|||
|
((member oper-str '("t" "true" "⊤" "1") :test 'equalp)
|
|||
|
'true)
|
|||
|
((member oper-str '("f" "false" "⊥" "0") :test 'equalp)
|
|||
|
'false)
|
|||
|
(t
|
|||
|
(loop for char across oper-str
|
|||
|
unless (symbol-char-p char)
|
|||
|
do (return nil)
|
|||
|
finally (return oper-str)))))
|
|||
|
|
|||
|
(defun string-first-char-safe (str)
|
|||
|
"Return the first character of STR, or nil if it is empty."
|
|||
|
(unless (zerop (length str))
|
|||
|
(elt str 0)))
|
|||
|
|
|||
|
(defun next-symbol-token (str &key multi-char-names)
|
|||
|
"Return the next token from STR that is not a paren. If MULTI-CHAR-NAMES is
|
|||
|
non-nil, allow names to be more than one character long."
|
|||
|
(loop with mode = (if (symbol-char-p (elt str 0))
|
|||
|
'alpha
|
|||
|
'sym)
|
|||
|
for char across str
|
|||
|
for chari from 0
|
|||
|
while (or (and (eq mode 'alpha) (symbol-char-p char))
|
|||
|
(and (eq mode 'sym) (not (symbol-char-p char))
|
|||
|
(not (delim-p char))))
|
|||
|
collect char into token
|
|||
|
finally
|
|||
|
(let ((str (coerce token 'string)))
|
|||
|
(return
|
|||
|
;; the multi-char token is an operator. its a variable, so defer
|
|||
|
;; to multi-char-names
|
|||
|
(if (or multi-char-names
|
|||
|
(operator-symbol str)
|
|||
|
(symbolp (interpret-operand str)))
|
|||
|
str
|
|||
|
(string (first token)))))))
|
|||
|
|
|||
|
(defun next-token (str &key multi-char-names)
|
|||
|
"Return a list of the next token in STR and how much whitespace it had."
|
|||
|
(let ((whitespace-chars 0))
|
|||
|
(loop for char across str
|
|||
|
while (whitespace-p char) do
|
|||
|
(setq whitespace-chars (1+ whitespace-chars)))
|
|||
|
(setq str (subseq str whitespace-chars))
|
|||
|
(let ((next-char (string-first-char-safe str)))
|
|||
|
(cond
|
|||
|
((not next-char)
|
|||
|
(list nil whitespace-chars))
|
|||
|
((paren-p next-char)
|
|||
|
(list (string next-char) whitespace-chars))
|
|||
|
(t
|
|||
|
(let ((token (next-symbol-token str :multi-char-names multi-char-names)))
|
|||
|
(list token whitespace-chars)))))))
|
|||
|
|
|||
|
(defmacro dotokens ((var pos-var str &optional (retval nil retvalp))
|
|||
|
(&key multi-char-names &allow-other-keys)
|
|||
|
&body body)
|
|||
|
"Execute BODY once with VAR bound to each token in STR. Optionally, return
|
|||
|
RETVAL. The position of each token will be stored in POS-VAR. If
|
|||
|
MULTI-CHAR-NAMES is enabled, allow multiple characters in variable names."
|
|||
|
(let ((stream-var (gensym))
|
|||
|
(token-start-var (gensym))
|
|||
|
(token-var (gensym))
|
|||
|
(read-chars-var (gensym))
|
|||
|
(whitespace-var (gensym)))
|
|||
|
`(loop for ,stream-var = ,str then (subseq ,str ,read-chars-var)
|
|||
|
for (,token-var ,whitespace-var)
|
|||
|
= (next-token ,stream-var :multi-char-names ,multi-char-names)
|
|||
|
for ,token-start-var = ,whitespace-var
|
|||
|
then (+ ,read-chars-var ,whitespace-var)
|
|||
|
for ,read-chars-var = (+ ,whitespace-var (length ,token-var))
|
|||
|
then (+ ,read-chars-var ,whitespace-var (length ,token-var))
|
|||
|
while ,token-var do
|
|||
|
(let ((,var ,token-var)
|
|||
|
(,pos-var ,token-start-var))
|
|||
|
(declare (ignorable ,pos-var))
|
|||
|
,@body)
|
|||
|
finally
|
|||
|
(return ,(when retvalp
|
|||
|
retval)))))
|
|||
|
|
|||
|
(defun interpret-token (token)
|
|||
|
"Return a list of the form (type value), where type is one of: `operator' or
|
|||
|
`operand', and value is the tokens value, as returned by `operator-symbol' or
|
|||
|
`interpret-operand'. If the token is of an unknown type, return a list of (nil
|
|||
|
nil)."
|
|||
|
(let ((operator-value (operator-symbol token)))
|
|||
|
(if operator-value
|
|||
|
(list 'operator operator-value)
|
|||
|
(let ((operand-value (interpret-operand token)))
|
|||
|
(if operand-value
|
|||
|
(list 'operand operand-value)
|
|||
|
(list nil nil))))))
|
|||
|
|
|||
|
(defun apply-one-operator (operator-stack operand-stack
|
|||
|
&optional proposition position)
|
|||
|
"Apply the next operator from OPERATOR-STACK to its operands from
|
|||
|
OPERAND-STACK, return the new state of both stacks as values."
|
|||
|
(let* ((operator (pop operator-stack)))
|
|||
|
(when (not operator)
|
|||
|
(error 'proposition-parse-error :message "no more operators"
|
|||
|
:position position :proposition proposition))
|
|||
|
(let ((oper-args (operator-argument-count operator))
|
|||
|
(cur-operands (list (pop operand-stack))))
|
|||
|
(when (not (car cur-operands))
|
|||
|
(error 'proposition-parse-error
|
|||
|
:position position
|
|||
|
:proposition proposition
|
|||
|
:message (format nil
|
|||
|
"operator ~A expects ~D arguments, found none"
|
|||
|
operator oper-args)))
|
|||
|
(when (= oper-args 2)
|
|||
|
(push (pop operand-stack) cur-operands)
|
|||
|
(when (not (car cur-operands))
|
|||
|
(error 'proposition-parse-error
|
|||
|
:position position
|
|||
|
:proposition proposition
|
|||
|
:message (format nil "operator ~A expects ~D arguments, found 1"
|
|||
|
operator oper-args))))
|
|||
|
(push (cons operator cur-operands) operand-stack)))
|
|||
|
(values operator-stack operand-stack))
|
|||
|
|
|||
|
(defun apply-lower-precedent (prec operators operands
|
|||
|
&optional proposition position)
|
|||
|
"Apply all operators with lower precedent than PREC. Return the values of the
|
|||
|
new operators and operands stack, as well as the number of operators removed."
|
|||
|
(loop with pop-count = 0
|
|||
|
while (<= (or (operator-precedence (car operators))
|
|||
|
most-positive-fixnum)
|
|||
|
prec)
|
|||
|
do
|
|||
|
(setf (values operators operands)
|
|||
|
(apply-one-operator operators operands
|
|||
|
proposition position)
|
|||
|
pop-count (1+ pop-count))
|
|||
|
finally (return (values operators operands pop-count))))
|
|||
|
|
|||
|
(defun parse-proposition-string (str &key (implicit-and t) multi-char-names)
|
|||
|
"Parse STR, which is a proposition string.
|
|||
|
The return value is the values set of the parsed string, and the list of all
|
|||
|
found variables."
|
|||
|
(let ((found-vars '())
|
|||
|
(operators '())
|
|||
|
(operands '())
|
|||
|
(oper-poses '())
|
|||
|
(last-was-operand nil))
|
|||
|
(dotokens (token token-pos str)
|
|||
|
(:multi-char-names multi-char-names)
|
|||
|
(destructuring-bind (type value) (interpret-token token)
|
|||
|
(cond
|
|||
|
;; unknown type
|
|||
|
((not type)
|
|||
|
(error 'proposition-parse-error
|
|||
|
:position token-pos
|
|||
|
:proposition str
|
|||
|
:message "unknown token"))
|
|||
|
;; operand
|
|||
|
((eq type 'operand)
|
|||
|
(when last-was-operand
|
|||
|
;; two operands next to each other often means "and" implicitly
|
|||
|
(unless implicit-and
|
|||
|
(error 'proposition-parse-error
|
|||
|
:position token-pos
|
|||
|
:proposition str
|
|||
|
:message "expected operator, found operand"))
|
|||
|
(multiple-value-bind (new-oper new-opan pop-count)
|
|||
|
(apply-lower-precedent (operator-precedence 'and)
|
|||
|
operators operands str token-pos)
|
|||
|
(setq operators new-oper
|
|||
|
operands new-opan)
|
|||
|
(dotimes (i pop-count)
|
|||
|
(pop oper-poses)))
|
|||
|
(push 'and operators)
|
|||
|
(push token-pos oper-poses))
|
|||
|
(unless (member value '(true false))
|
|||
|
(pushnew value found-vars :test 'equal))
|
|||
|
(push value operands)
|
|||
|
(setq last-was-operand t))
|
|||
|
;; open and close paren don't touch `last-was-operand'
|
|||
|
((eq value 'open-paren)
|
|||
|
(push value operators)
|
|||
|
(push token-pos oper-poses))
|
|||
|
((eq value 'close-paren)
|
|||
|
(loop while (not (eq (car operators) 'open-paren))
|
|||
|
when (null operators) do
|
|||
|
(error 'proposition-parse-error
|
|||
|
:position token-pos
|
|||
|
:proposition str
|
|||
|
:message "no matching open parenthesis")
|
|||
|
do
|
|||
|
(setf (values operators operands)
|
|||
|
(apply-one-operator operators operands
|
|||
|
str token-pos))
|
|||
|
(pop oper-poses))
|
|||
|
;; remove the open-paren
|
|||
|
(pop operators)
|
|||
|
(pop oper-poses))
|
|||
|
;; operator
|
|||
|
(t
|
|||
|
(multiple-value-bind (new-oper new-opan pop-count)
|
|||
|
(apply-lower-precedent (operator-precedence value)
|
|||
|
operators operands str token-pos)
|
|||
|
(setq operators new-oper
|
|||
|
operands new-opan)
|
|||
|
(dotimes (i pop-count)
|
|||
|
(pop oper-poses)))
|
|||
|
(push value operators)
|
|||
|
(push token-pos oper-poses)
|
|||
|
(setq last-was-operand nil)))))
|
|||
|
(loop while operators
|
|||
|
for oper-pos = (pop oper-poses)
|
|||
|
when (eq (car operators) 'open-paren) do
|
|||
|
(error 'proposition-parse-error
|
|||
|
:message "no matching closing parenthesis"
|
|||
|
:proposition str
|
|||
|
:position oper-pos)
|
|||
|
do
|
|||
|
(setf (values operators operands)
|
|||
|
(apply-one-operator operators operands
|
|||
|
str oper-pos)))
|
|||
|
;; return variables in the order we found them
|
|||
|
(values (car operands) (nreverse found-vars))))
|