truth-table/parse.lisp

461 lines
19 KiB
Common Lisp
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;; parse.lisp -- Proposition string parsing subroutines
;; Copyright (C) 2024 Alexander Rosenberg
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(in-package :truth-table/base)
(defun whitespace-p (char)
"Return nil unless CHAR is whitespace."
(member char '(#\newline #\space) :test 'eq))
(defun paren-p (char)
"Return nil unless CHAR is a parenthesis."
(member char '(#\( #\))))
(defun delim-p (char)
"Return nil unless CHAR is either whitespace or a parenthesis."
(or (whitespace-p char) (paren-p char)))
(defun symbol-char-p (char)
"Return nil until CHAR is a valid character for use in proposition variable
names."
(or (alpha-char-p char) (eq char #\_) (digit-char-p char)))
(defun replace-in-string (str chars new-char)
"Replace all instances of any of CHARS in STR with NEW-CHAR."
(coerce (loop with lchars = (if (atom chars)
(list chars)
chars)
for char across str
when (member char lchars)
collect new-char
else
collect char)
'string))
(define-condition proposition-parse-error (error)
((position :initarg :position
:accessor parse-error-position
:initform nil)
(proposition :initarg :proposition
:accessor parse-error-proposition
:initform nil)
(message :initarg :message
:accessor parse-error-message
:initform nil))
(:report (lambda (con stream)
(with-slots (position proposition message) con
(format stream
"parse error~@[ at column ~d~]~@[: ~a~]~
~@[:~% ~a~@[~% ~a^~]~]"
(when position
(1+ position))
message
(when proposition
(replace-in-string proposition
'(#\newline #\return)
#\space))
(when position
(make-string position
:initial-element #\space))))))
(:documentation "Condition representing an error during parsing of a
proposition."))
(defparameter *operator-symbol-table*
'((open-paren "(")
(close-paren ")")
(and "/\\" "and" "&&" "&" "∧" ".")
(nand "nand" "↑" "⊼" "~&" "~&&" "!&" "!&&")
(or "\\/" "or" "||" "|" "∥" "+" "")
(nor "nor" "↓" "⊽" "~|" "~||" "!|" "!||")
(xor "xor" "⊕" "⊻" "↮" "≢" "^" "!=")
(not "¬" "~" "!" "not")
(implies "->" ">" "=>" "⇒" "⟹" "→" "⊃" "implies")
(converse "<-" "<" "<=" "←" "⇐" "⟸" "⊂" "converse")
(iff "<->" "<>" "<=>" "⇔" "↔" "≡" "iff" "=" "==" "xnor" "⊙"))
"Alist table of operator symbols and their possible string representations.")
(defun alpha-string-p (str)
"Return t if STR is only alphabetical characters."
(not (find-if-not 'alpha-char-p str)))
(defparameter *longest-non-alpha-operator*
(apply 'max (mapcar
(lambda (entry)
(apply 'max
(mapcar 'length
(remove-if 'alpha-string-p (cdr entry)))))
*operator-symbol-table*))
"The longest operator in `*operator-symbol-table*' such that `alpha-string-p'
returns t.")
(defparameter *operator-descriptions* ;; noindent 30
`((open-paren ("open parenthesis")
,(format nil "Used in combination with a close parenthesis to denote ~
some terms to be evaluated before the surrounding terms.")
(((and (or true true) false) . false)
((or true (and true false)) . true)))
(close-paren ("close parenthesis")
"Used to close a group started with an open parenthesis."
()) ;; no examples for closed paren
(and ("and" "conjunction")
,(format nil "Evaluate to true only if the expressions to the left and ~
right evaluate to true.")
(((and true true) . true)
((and true false) . false)))
(nand ("nand" "non-conjunction")
,(format nil "Evaluate to true unless the expressions to the left and ~
right are both true. This is the negation of the 'and' operator.")
(((nand true true) . false)
((nand true false) . true)
((nand false false) . true)))
(or ("or" "disjunction")
,(format nil "Evaluate to true if the expression to the left is true, the ~
expression to the right is true, or both the left and right expressions are ~
true.")
(((or true true) . true)
((or true false) . true)
((or false false) . false)))
(nor ("nor" "non-disjunction")
,(format nil "Evaluate to true if the expressions to the left and right ~
are both false. This is the negation of the 'or' operator.")
(((nor true true) . false)
((nor true false) . false)
((nor false false) . true)))
(xor ("exclusive or" "exclusive disjunction") ;; noindent 30
,(format nil "Evaluate to true if the expression to the left is true or ~
if the expression to the right is true, but not if both of them are true.")
(((xor true true) . false)
((xor true false) . true)))
(not ("not" "negation")
,(format nil "Evaluate to false if the expression to the left evaluates ~
to true, and evaluate to true if the expression to the left evaluates to ~
false. This is a unary operator (it only applies to the expression following ~
it).")
(((not true) . false)
((not false) . true)))
(implies ("implies" "conditional")
,(format nil "Evaluate to false if the expression to the left evaluates ~
to true and the expressions to the right evaluates to false. Otherwise, ~
evaluate to true.")
(((implies true true) . true)
((implies true false) . false)
((implies false true) . true)
((implies false false) . true)))
(converse ("converse")
,(format nil "Evaluate to false if the expression to the right evaluates ~
to true and the expression to the left evaluates to false. Otherwise, evaluate ~
to true. This is the 'implies' operator with its arguments flipped.")
(((implies true true) . true)
((implies true false) . true)
((implies false true) . false)
((implies false false) . true)))
(iff ("biconditional" "equivalent")
,(format nil "Evaluate to true if the expressions to the left and rigt ~
evaluate to the same value. That is, they are both true or both false.")
(((iff true true) . true)
((iff true false) . false)
((iff false false) . true))))
"Alist table of operator symbols and their descriptions. The format of this
list is SYMBOL NAMES DESCRIPTION (&rest (EXAMPLE LEFT . EXAMPLE RIGHT)). These
are useful for use in things like syntax explanation messages.")
(defun operator-symbol (oper-str)
"Return the symbol for OPER-STR, or nil if it is not a know operator."
(loop for (oper-sym . strs) in *operator-symbol-table*
when (member oper-str strs :test 'equalp)
do (return oper-sym)))
(defun operator-precedence (oper)
"Return the precedence for OPER."
(case oper
(not 1)
(and 2)
(implicit-and 2)
(nand 2)
(xor 3)
(or 4)
(nor 4)
(implies 5)
(converse 5)
(iff 6)
(open-paren most-positive-fixnum)
(t nil)))
(defun unary-p (oper)
"Return whether OPER is a unary operator or not."
(eq oper 'not))
(defparameter *operand-symbol-table*
'((true "t" "true" "" "1")
(false "f" "false" "⊥" "0"))
"Alist mapping operand symbols (true and false) to their textual
representations.")
(defun interpret-operand (oper-str)
"Return a symbol representing OPER-STR, or the string itself if it represents
a variable."
(dolist (entry *operand-symbol-table*)
(when (member oper-str (cdr entry) :test 'equalp)
(return-from interpret-operand (car entry))))
;; check if OPER-STR is a valid variable name
(if (or (zerop (length oper-str))
(find-if-not 'symbol-char-p oper-str))
nil
oper-str))
(defun string-first-char-safe (str)
"Return the first character of STR, or nil if it is empty."
(unless (zerop (length str))
(elt str 0)))
(defun try-find-operator-for-token (token)
"Return the operator symbol for TOKEN, if it is an operator. As a second
value, return the matched portion of TOKEN. If no match is found, return
(values nil nil)."
(loop for len downfrom (min *longest-non-alpha-operator* (length token)) to 1
for cur-test = (subseq token 0 len)
for oper-sym = (operator-symbol cur-test)
when oper-sym do
(return-from try-find-operator-for-token
(values oper-sym cur-test)))
(values nil nil))
(defun next-symbol-token (str &key multi-char-names)
"Return the next token from STR that is not a paren. If MULTI-CHAR-NAMES is
non-nil, allow names to be more than one character long."
(loop with mode = (if (symbol-char-p (elt str 0))
'alpha
'sym)
for char across str
for chari from 0
while (or (and (eq mode 'alpha) (symbol-char-p char))
(and (eq mode 'sym) (not (symbol-char-p char))
(not (delim-p char))))
collect char into token
finally
(let ((str (coerce token 'string)))
(return
(string
(cond
;; operator
((multiple-value-bind (sym match)
(try-find-operator-for-token str)
(declare (ignorable sym))
match))
;; multi-char variable, constant (true or false)
((or multi-char-names
(symbolp (interpret-operand str)))
str)
;; single letter variable (multi-char-names is off)
(t (first token))))))))
(defun next-token (str &key multi-char-names)
"Return a list of the next token in STR and how much whitespace it had."
(let ((whitespace-chars 0))
(loop for char across str
while (whitespace-p char) do
(setq whitespace-chars (1+ whitespace-chars)))
(setq str (subseq str whitespace-chars))
(let ((next-char (string-first-char-safe str)))
(cond
((not next-char)
(list nil whitespace-chars))
((paren-p next-char)
(list (string next-char) whitespace-chars))
(t
(let ((token (next-symbol-token str :multi-char-names multi-char-names)))
(list token whitespace-chars)))))))
(defmacro dotokens ((var pos-var str &optional (retval nil retvalp))
(&key multi-char-names &allow-other-keys)
&body body)
"Execute BODY once with VAR bound to each token in STR. Optionally, return
RETVAL. The position of each token will be stored in POS-VAR. If
MULTI-CHAR-NAMES is enabled, allow multiple characters in variable names."
(let ((stream-var (gensym))
(token-start-var (gensym))
(token-var (gensym))
(read-chars-var (gensym))
(whitespace-var (gensym)))
`(loop for ,stream-var = ,str then (subseq ,str ,read-chars-var)
for (,token-var ,whitespace-var)
= (next-token ,stream-var :multi-char-names ,multi-char-names)
for ,token-start-var = ,whitespace-var
then (+ ,read-chars-var ,whitespace-var)
for ,read-chars-var = (+ ,whitespace-var (length ,token-var))
then (+ ,read-chars-var ,whitespace-var (length ,token-var))
while ,token-var do
(let ((,var ,token-var)
(,pos-var ,token-start-var))
(declare (ignorable ,pos-var))
,@body)
finally
(return ,(when retvalp
retval)))))
(defun interpret-token (token)
"Return a list of the form (type value), where type is one of: `operator' or
`operand', and value is the tokens value, as returned by `operator-symbol' or
`interpret-operand'. If the token is of an unknown type, return a list of (nil
nil)."
(let ((operator-value (operator-symbol token)))
(if operator-value
(list 'operator operator-value)
(let ((operand-value (interpret-operand token)))
(if operand-value
(list 'operand operand-value)
(list nil nil))))))
(defun parse-proposition-string (prop-str &key (implicit-and t) multi-char-names)
"Parse PROP-STR, which is a proposition string.
The return value is the values set of the parsed string, and the list of all
found variables."
(let ((found-vars '())
(operators '())
(operands '())
(paren-operands (list 0)))
(labels ((peek-operator ()
(destructuring-bind (&optional value . pos) (car operators)
(values value pos)))
(pop-operator ()
(destructuring-bind (&optional value . pos) (pop operators)
(values value pos)))
(convert-implicit-and (this-name pos)
(multiple-value-bind (value top-pos) (peek-operator)
(when (eq value 'implicit-and)
(unless implicit-and
(cerror "Insert implicit AND operator"
'proposition-parse-error
:position pos
:proposition prop-str
:message
(format nil "expected binary operator, found ~a"
this-name)))
(pop-operator)
(push (cons 'and top-pos) operators))))
(apply-one-operator ()
(multiple-value-bind (oper pos) (pop-operator)
(when (not oper)
(error 'proposition-parse-error :message "no more operators"
:position pos
:proposition prop-str))
(let ((oper-args (if (unary-p oper) 1 2))
(cur-operands (list (pop operands))))
(when (not (car cur-operands))
(error 'proposition-parse-error
:position pos
:proposition prop-str
:message
(format nil "~A expects ~D arguments, found none"
oper oper-args)))
(when (= oper-args 2)
(push (pop operands) cur-operands)
(when (not (car cur-operands))
(error 'proposition-parse-error
:position pos
:proposition prop-str
:message
(format nil "~A expects ~D arguments, found 1"
oper oper-args))))
(push (cons oper cur-operands) operands))))
(apply-lower-precedent (prec)
(loop for top-oper = (peek-operator)
while (and top-oper
(<= (or (operator-precedence top-oper)
most-positive-fixnum)
prec)
(not (unary-p top-oper)))
do (apply-one-operator)))
(apply-all-unary ()
(loop while (unary-p (peek-operator))
do
(apply-one-operator)))
(push-operator (oper token-pos)
(apply-lower-precedent (operator-precedence oper))
(push (cons oper token-pos) operators)))
(dotokens (token-str token-pos prop-str)
(:multi-char-names multi-char-names)
(destructuring-bind (type value) (interpret-token token-str)
(cond
;; unknown token
((not type)
(error 'proposition-parse-error
:position token-pos
:proposition prop-str
:message "unknown token"))
;; operand
((eq type 'operand)
(convert-implicit-and "operand" token-pos)
(unless (member value '(true false))
(pushnew value found-vars :test 'equal))
(push value operands)
(incf (car paren-operands))
(apply-all-unary)
(convert-implicit-and "operand" token-pos)
(push-operator 'implicit-and token-pos))
;; open parenthesis
((eq value 'open-paren)
(convert-implicit-and "open parenthesis" token-pos)
(push (cons value token-pos) operators)
(push 0 paren-operands))
;; close parenthesis
((eq value 'close-paren)
(when (eq (peek-operator) 'implicit-and)
(pop-operator))
(loop while (not (eq (peek-operator) 'open-paren))
when (null operators) do
(error 'proposition-parse-error
:position token-pos
:proposition prop-str
:message "no matching open parenthesis")
do (apply-one-operator))
(when (zerop (pop paren-operands))
(error 'proposition-parse-error
;; open paren position
:position (cdr (pop operators))
:proposition prop-str
:message "empty parenthesis"))
;; remove open paren
(pop-operator)
(apply-all-unary)
(convert-implicit-and "close parenthesis" token-pos)
(push-operator 'implicit-and token-pos))
;; operator
(t
(if (eq (peek-operator) 'implicit-and)
(if (unary-p value)
(convert-implicit-and "unary operator" token-pos)
(pop-operator))
(unless (unary-p value)
(error 'proposition-parse-error
:position token-pos
:proposition prop-str
:message "expected operand, found operator")))
(push-operator value token-pos)))))
;; remove implicit-and
(when (eq 'implicit-and (peek-operator))
(pop-operator))
(loop for (top-oper . top-pos) = (car operators)
while top-oper
when (eq top-oper 'open-paren) do
(error 'proposition-parse-error
:message "no matching closing parenthesis"
:proposition prop-str
:position top-pos)
do
(apply-one-operator))
;; return variables in the order we found them
(values (car operands) (nreverse found-vars)))))