truth-table/parse.lisp

337 lines
13 KiB
Common Lisp
Raw Normal View History

2024-09-04 03:14:57 -07:00
;; parse.lisp -- Proposition string parsing subroutines
;; Copyright (C) 2024 Alexander Rosenberg
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(in-package :truth-table/base)
(defun whitespace-p (char)
"Return nil unless CHAR is whitespace."
(member char '(#\newline #\space) :test 'eq))
(defun paren-p (char)
"Return nil unless CHAR is a parenthesis."
(member char '(#\( #\))))
(defun delim-p (char)
"Return nil unless CHAR is either whitespace or a parenthesis."
(or (whitespace-p char) (paren-p char)))
(defun symbol-char-p (char)
"Return nil until CHAR is a valid character for use in proposition variable
names."
(or (alpha-char-p char) (eq char #\_) (digit-char-p char)))
(defun replace-in-string (str chars new-char)
"Replace all instances of any of CHARS in STR with NEW-CHAR."
(coerce (loop with lchars = (if (atom chars)
(list chars)
chars)
for char across str
when (member char lchars)
collect new-char
else
collect char)
'string))
(define-condition proposition-parse-error (error)
((position :initarg :position
:accessor parse-error-position
:initform nil)
(proposition :initarg :proposition
:accessor parse-error-proposition
:initform nil)
(message :initarg :message
:accessor parse-error-message
:initform nil))
(:report (lambda (con stream)
(with-slots (position proposition message) con
(format stream
"parse error~@[ at column ~d~]~@[: ~a~]~
~@[:~% ~a~@[~% ~a^~]~]"
(when position
(1+ position))
message
(when proposition
(replace-in-string proposition
'(#\newline #\return)
#\space))
(when position
(make-string position
:initial-element #\space))))))
(:documentation "Condition representing an error during parsing of a
proposition."))
(defparameter *operator-symbol-table*
'((open-paren "(")
(close-paren ")")
(and "/\\" "and" "&&" "&" "∧" ".")
(nand "nand" "↑" "⊼")
(or "\\/" "or" "||" "|" "∥" "+" "")
(nor "nor" "↓" "⊽")
(xor "xor" "⊕" "⊻" "↮" "≢" "^" "!=")
(not "¬" "~" "!" "not")
(implies "->" ">" "=>" "⇒" "⟹" "→" "⊃" "implies")
(converse "<-" "<" "<=" "←" "⇐" "⟸" "⊂" "converse")
(iff "<->" "<>" "<=>" "⇔" "↔" "≡" "iff" "=" "==" "xnor" "⊙"))
"Alist table of operator symbols and their possible string representations.")
(defun operator-symbol (oper-str)
"Return the symbol for OPER-STR, or nil if it is not a know operator."
(loop for (oper-sym . strs) in *operator-symbol-table*
when (member oper-str strs :test 'equalp)
do (return oper-sym)))
(defun operator-precedence (oper)
"Return the precedence for OPER."
(case oper
(not 1)
(and 2)
(nand 2)
(xor 3)
(or 4)
(nor 4)
(implies 5)
(converse 5)
(iff 6)
(open-paren most-positive-fixnum)
(t nil)))
(defun interpret-operand (oper-str)
"Return a symbol representing OPER-STR, or the string itself if it represents
a variable."
(cond
((member oper-str '("t" "true" "" "1") :test 'equalp)
'true)
((member oper-str '("f" "false" "⊥" "0") :test 'equalp)
'false)
(t
(loop for char across oper-str
unless (symbol-char-p char)
do (return nil)
finally (return oper-str)))))
(defun string-first-char-safe (str)
"Return the first character of STR, or nil if it is empty."
(unless (zerop (length str))
(elt str 0)))
(defun next-symbol-token (str &key multi-char-names)
"Return the next token from STR that is not a paren. If MULTI-CHAR-NAMES is
non-nil, allow names to be more than one character long."
(loop with mode = (if (symbol-char-p (elt str 0))
'alpha
'sym)
for char across str
for chari from 0
while (or (and (eq mode 'alpha) (symbol-char-p char))
(and (eq mode 'sym) (not (symbol-char-p char))
(not (delim-p char))))
collect char into token
finally
(let ((str (coerce token 'string)))
(return
;; the multi-char token is an operator. its a variable, so defer
;; to multi-char-names
(if (or multi-char-names
(operator-symbol str)
(symbolp (interpret-operand str)))
str
(string (first token)))))))
(defun next-token (str &key multi-char-names)
"Return a list of the next token in STR and how much whitespace it had."
(let ((whitespace-chars 0))
(loop for char across str
while (whitespace-p char) do
(setq whitespace-chars (1+ whitespace-chars)))
(setq str (subseq str whitespace-chars))
(let ((next-char (string-first-char-safe str)))
(cond
((not next-char)
(list nil whitespace-chars))
((paren-p next-char)
(list (string next-char) whitespace-chars))
(t
(let ((token (next-symbol-token str :multi-char-names multi-char-names)))
(list token whitespace-chars)))))))
(defmacro dotokens ((var pos-var str &optional (retval nil retvalp))
(&key multi-char-names &allow-other-keys)
&body body)
"Execute BODY once with VAR bound to each token in STR. Optionally, return
RETVAL. The position of each token will be stored in POS-VAR. If
MULTI-CHAR-NAMES is enabled, allow multiple characters in variable names."
(let ((stream-var (gensym))
(token-start-var (gensym))
(token-var (gensym))
(read-chars-var (gensym))
(whitespace-var (gensym)))
`(loop for ,stream-var = ,str then (subseq ,str ,read-chars-var)
for (,token-var ,whitespace-var)
= (next-token ,stream-var :multi-char-names ,multi-char-names)
for ,token-start-var = ,whitespace-var
then (+ ,read-chars-var ,whitespace-var)
for ,read-chars-var = (+ ,whitespace-var (length ,token-var))
then (+ ,read-chars-var ,whitespace-var (length ,token-var))
while ,token-var do
(let ((,var ,token-var)
(,pos-var ,token-start-var))
(declare (ignorable ,pos-var))
,@body)
finally
(return ,(when retvalp
retval)))))
(defun interpret-token (token)
"Return a list of the form (type value), where type is one of: `operator' or
`operand', and value is the tokens value, as returned by `operator-symbol' or
`interpret-operand'. If the token is of an unknown type, return a list of (nil
nil)."
(let ((operator-value (operator-symbol token)))
(if operator-value
(list 'operator operator-value)
(let ((operand-value (interpret-operand token)))
(if operand-value
(list 'operand operand-value)
(list nil nil))))))
(defun apply-one-operator (operator-stack operand-stack
&optional proposition position)
"Apply the next operator from OPERATOR-STACK to its operands from
OPERAND-STACK, return the new state of both stacks as values."
(let* ((operator (pop operator-stack)))
(when (not operator)
(error 'proposition-parse-error :message "no more operators"
:position position :proposition proposition))
(let ((oper-args (operator-argument-count operator))
(cur-operands (list (pop operand-stack))))
(when (not (car cur-operands))
(error 'proposition-parse-error
:position position
:proposition proposition
:message (format nil
"operator ~A expects ~D arguments, found none"
operator oper-args)))
(when (= oper-args 2)
(push (pop operand-stack) cur-operands)
(when (not (car cur-operands))
(error 'proposition-parse-error
:position position
:proposition proposition
:message (format nil "operator ~A expects ~D arguments, found 1"
operator oper-args))))
(push (cons operator cur-operands) operand-stack)))
(values operator-stack operand-stack))
(defun apply-lower-precedent (prec operators operands
&optional proposition position)
"Apply all operators with lower precedent than PREC. Return the values of the
new operators and operands stack, as well as the number of operators removed."
(loop with pop-count = 0
while (<= (or (operator-precedence (car operators))
most-positive-fixnum)
prec)
do
(setf (values operators operands)
(apply-one-operator operators operands
proposition position)
pop-count (1+ pop-count))
finally (return (values operators operands pop-count))))
(defun parse-proposition-string (str &key (implicit-and t) multi-char-names)
"Parse STR, which is a proposition string.
The return value is the values set of the parsed string, and the list of all
found variables."
(let ((found-vars '())
(operators '())
(operands '())
(oper-poses '())
(last-was-operand nil))
(dotokens (token token-pos str)
(:multi-char-names multi-char-names)
(destructuring-bind (type value) (interpret-token token)
(cond
;; unknown type
((not type)
(error 'proposition-parse-error
:position token-pos
:proposition str
:message "unknown token"))
;; operand
((eq type 'operand)
(when last-was-operand
;; two operands next to each other often means "and" implicitly
(unless implicit-and
(error 'proposition-parse-error
:position token-pos
:proposition str
:message "expected operator, found operand"))
(multiple-value-bind (new-oper new-opan pop-count)
(apply-lower-precedent (operator-precedence 'and)
operators operands str token-pos)
(setq operators new-oper
operands new-opan)
(dotimes (i pop-count)
(pop oper-poses)))
(push 'and operators)
(push token-pos oper-poses))
(unless (member value '(true false))
(pushnew value found-vars :test 'equal))
(push value operands)
(setq last-was-operand t))
;; open and close paren don't touch `last-was-operand'
((eq value 'open-paren)
(push value operators)
(push token-pos oper-poses))
((eq value 'close-paren)
(loop while (not (eq (car operators) 'open-paren))
when (null operators) do
(error 'proposition-parse-error
:position token-pos
:proposition str
:message "no matching open parenthesis")
do
(setf (values operators operands)
(apply-one-operator operators operands
str token-pos))
(pop oper-poses))
;; remove the open-paren
(pop operators)
(pop oper-poses))
;; operator
(t
(multiple-value-bind (new-oper new-opan pop-count)
(apply-lower-precedent (operator-precedence value)
operators operands str token-pos)
(setq operators new-oper
operands new-opan)
(dotimes (i pop-count)
(pop oper-poses)))
(push value operators)
(push token-pos oper-poses)
(setq last-was-operand nil)))))
(loop while operators
for oper-pos = (pop oper-poses)
when (eq (car operators) 'open-paren) do
(error 'proposition-parse-error
:message "no matching closing parenthesis"
:proposition str
:position oper-pos)
do
(setf (values operators operands)
(apply-one-operator operators operands
str oper-pos)))
;; return variables in the order we found them
(values (car operands) (nreverse found-vars))))