Initial commit
This commit is contained in:
170
url-encode.lisp
Normal file
170
url-encode.lisp
Normal file
@ -0,0 +1,170 @@
|
||||
(in-package :cl-xdg-trash/url-encode)
|
||||
|
||||
(declaim (ftype (function (character) t) url-unreserved-character-p))
|
||||
(defun url-unreserved-character-p (char)
|
||||
"Return non-nil if CHAR is an unreserved character in a URL."
|
||||
(or (member char '(#\- #\_ #\~ #\.) :test #'eql)
|
||||
(let ((code (char-code char)))
|
||||
(or (and (>= code (char-code #\a))
|
||||
(<= code (char-code #\z)))
|
||||
(and (>= code (char-code #\A))
|
||||
(<= code (char-code #\Z)))))))
|
||||
|
||||
(deftype utf-8-code-point ()
|
||||
'(integer 0 #x10FFFF))
|
||||
|
||||
(deftype utf-8-middle-byte ()
|
||||
'(integer #b10000000 #b10111111))
|
||||
|
||||
(deftype utf-8-char ()
|
||||
'(or
|
||||
(cons (integer 0 #b1010111) null)
|
||||
|
||||
(cons (integer #b11000000 #b11011111) (cons utf-8-middle-byte null))
|
||||
|
||||
(cons (integer #b11100000 #b11101111)
|
||||
(cons utf-8-middle-byte (cons utf-8-middle-byte null)))
|
||||
|
||||
(cons (integer #b11110000 #b11110111)
|
||||
(cons utf-8-middle-byte (cons utf-8-middle-byte (cons utf-8-middle-byte null))))))
|
||||
|
||||
(declaim (ftype (function (character) list-of-code-points) utf-8-char))
|
||||
(defun utf-8-encode-char (char)
|
||||
"Encode CHAR, a character, to a list of bytes that make up its UTF-8
|
||||
representation."
|
||||
(let* ((code (char-code char))
|
||||
(length (integer-length code)))
|
||||
(cond
|
||||
((<= length 7) (list code))
|
||||
((<= length 11) (list (+ (ash code -6) #b11000000)
|
||||
(+ (logand code #b111111) #b10000000)))
|
||||
((<= length 16) (list (+ (ash code -12) #b11100000)
|
||||
(+ (logand (ash code -6) #b111111) #b10000000)
|
||||
(+ (logand code #b111111) #b10000000)))
|
||||
(t (list (+ (ash code -24) #b11110000)
|
||||
(+ (logand (ash code -12) #b111111) #b10000000)
|
||||
(+ (logand (ash code -6) #b111111) #b10000000)
|
||||
(+ (logand code #b111111) #b10000000))))))
|
||||
|
||||
(declaim (ftype (function (utf-8-char) character) utf-8-decode-char))
|
||||
(defun utf-8-decode-char (char)
|
||||
"Decode a list of up to 4 UTF-8 bytes to and return the character the
|
||||
represent."
|
||||
(check-type char utf-8-char)
|
||||
(destructuring-bind (&optional part1 part2 part3 part4) char
|
||||
(declare (type (or null (integer 0 #xFF)) part1 part2 part3 part4))
|
||||
(code-char
|
||||
(cond
|
||||
;; 4 byte
|
||||
(part4 (+ (ash (logand part1 #b111) 18)
|
||||
(ash (logand part2 #b111111) 12)
|
||||
(ash (logand part3 #b111111) 6)
|
||||
(logand part4 #b111111)))
|
||||
;; 3 bytes
|
||||
(part3 (+ (ash (logand part1 #b1111) 12)
|
||||
(ash (logand part2 #b111111) 6)
|
||||
(logand part3 #b111111)))
|
||||
;; 2 bytes
|
||||
(part2 (+ (ash (logand part1 #b11111) 6)
|
||||
(logand part2 #b111111)))
|
||||
;; 1 byte
|
||||
(t part1)))))
|
||||
|
||||
(declaim (ftype (function (character) string) escape-char-for-url))
|
||||
(defun escape-char-for-url (char)
|
||||
"Escape CHAR such that it is safe to include in a URL."
|
||||
(if (url-unreserved-character-p char)
|
||||
(string char)
|
||||
(format nil "~{%~X~}" (utf-8-encode-char char))))
|
||||
|
||||
(declaim (ftype (function (string) string) url-encode))
|
||||
(defun url-encode (string)
|
||||
"URL encode (percent escape) STRING."
|
||||
(format nil "~{~A~}" (map 'list 'escape-char-for-url string)))
|
||||
|
||||
(define-condition url-decode-error (error)
|
||||
((string :accessor url-decode-error-string
|
||||
:initarg :string
|
||||
:type string
|
||||
:documentation "The string which could not be decoded.")
|
||||
(index :accessor url-decode-error-index
|
||||
:initarg :index
|
||||
:type integer
|
||||
:documentation "The index of the problematic character."))
|
||||
(:report (lambda (condition stream)
|
||||
(format stream "Failed to URL decode ~S starting from index ~S."
|
||||
(url-decode-error-string condition)
|
||||
(url-decode-error-index condition)))))
|
||||
|
||||
(declaim (ftype (function (integer) integer)
|
||||
count-url-characters-from-first-byte))
|
||||
(defun count-url-characters-from-first-byte (first-byte)
|
||||
"Return the number of characters following FIRST-BYTE in a URL encoded
|
||||
string. FIRST-BYTE is not included. This does not work if FIRST-BYTE starts a
|
||||
one byte escape sequence."
|
||||
(case (ash first-byte -4)
|
||||
(#b1111 3)
|
||||
(#b1110 2)
|
||||
(t 1)))
|
||||
|
||||
(declaim (ftype (function ((integer 0 #xFF) string integer)
|
||||
(values character integer))
|
||||
parse-next-n-url-characters))
|
||||
(defun parse-next-n-url-characters (first-byte string start)
|
||||
"Prase the next escaped sequence from STRING starting at START which began
|
||||
with FIRST-BYTE. FIRST-BYTE should not be included."
|
||||
(let ((count (count-url-characters-from-first-byte first-byte)))
|
||||
(when (> 0 (- (length string) start (* count 3)))
|
||||
(error 'url-decode-error :string string :index start))
|
||||
(loop for time below count
|
||||
for i = start then (+ i 3)
|
||||
unless (eql #\% (aref string i))
|
||||
do (error 'url-decode-error :string string :index i)
|
||||
collect (handler-case
|
||||
(parse-integer string :start (1+ i) :end (+ i 3)
|
||||
:radix 16)
|
||||
(parse-error ()
|
||||
(error 'url-decode-error :string string :index (1+ i))))
|
||||
into out
|
||||
finally (return (values (handler-case
|
||||
(utf-8-decode-char (cons first-byte out))
|
||||
(type-error ()
|
||||
(error 'url-decode-error :string string
|
||||
:index i)))
|
||||
(* (1+ count) 3))))))
|
||||
|
||||
(declaim (ftype (function (string &key (:start integer))
|
||||
(values character integer))
|
||||
parse-url-encoded-char))
|
||||
(defun parse-url-encoded-char (string &key (start 0))
|
||||
"Parse the next URL encoded character in STRING, starting at START, and return
|
||||
it. This function assumes that the character at START is a percent sign."
|
||||
(when (> 0 (- (length string) start 3))
|
||||
(error 'url-decode-error :string string :index start))
|
||||
(let ((first-byte (handler-case
|
||||
(parse-integer string :start (1+ start) :end (+ start 3)
|
||||
:radix 16)
|
||||
(parse-error ()
|
||||
(error 'url-decode-error :string string
|
||||
:index (1+ start))))))
|
||||
(if (zerop (logand first-byte #b10000000))
|
||||
(values (code-char first-byte) 3)
|
||||
(parse-next-n-url-characters first-byte string (+ start 3)))))
|
||||
|
||||
(declaim (ftype (function (string &key (:start integer) (:end integer))
|
||||
string)
|
||||
url-decode))
|
||||
(defun url-decode (string &key (start 0) (end (length string)))
|
||||
"Decode STRING, which is percent escaped."
|
||||
(coerce (loop with index = start
|
||||
while (< index end)
|
||||
for char = (aref string index)
|
||||
if (eql char #\%)
|
||||
collect (multiple-value-bind (decoded skip-count)
|
||||
(parse-url-encoded-char string :start index)
|
||||
(incf index skip-count)
|
||||
decoded )
|
||||
else
|
||||
collect char
|
||||
and do (incf index))
|
||||
'string))
|
Reference in New Issue
Block a user