guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] branch master updated: Add "read" implementation in Sche


From: Andy Wingo
Subject: [Guile-commits] branch master updated: Add "read" implementation in Scheme
Date: Sat, 13 Feb 2021 16:36:32 -0500

This is an automated email from the git hooks/post-receive script.

wingo pushed a commit to branch master
in repository guile.

The following commit(s) were added to refs/heads/master by this push:
     new 40e4e3b  Add "read" implementation in Scheme
40e4e3b is described below

commit 40e4e3b2a40bc44bf72a9ae05ea686cbdaf79e05
Author: Andy Wingo <wingo@pobox.com>
AuthorDate: Sat Feb 13 22:22:33 2021 +0100

    Add "read" implementation in Scheme
    
    * module/Makefile.am (SOURCES): Add ice-9/read.
    * module/ice-9/read.scm: New file.  The idea is to move the compiler to
      use this "read", after proving that it does the same as C.  Then we
      can switch to read-syntax that returns syntax objects with source
      locations, allowing us to annotate any datum.
---
 module/Makefile.am    |   1 +
 module/ice-9/read.scm | 866 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 867 insertions(+)

diff --git a/module/Makefile.am b/module/Makefile.am
index 86d5401..516fb3a 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -146,6 +146,7 @@ SOURCES =                                   \
   ice-9/q.scm                                  \
   ice-9/r5rs.scm                               \
   ice-9/rdelim.scm                             \
+  ice-9/read.scm                               \
   ice-9/receive.scm                            \
   ice-9/regex.scm                              \
   ice-9/runq.scm                               \
diff --git a/module/ice-9/read.scm b/module/ice-9/read.scm
new file mode 100644
index 0000000..bf4ed2f
--- /dev/null
+++ b/module/ice-9/read.scm
@@ -0,0 +1,866 @@
+;;; Scheme reader
+;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
+;;;   Free Software Foundation, Inc.
+;;;
+;;; This library is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU Lesser General Public License as
+;;; published by the Free Software Foundation, either version 3 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This library is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this program.  If not, see
+;;; <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;;
+;;; Implementation of Scheme's "read".
+;;;
+;;; Code:
+
+
+;; While porting read.c to Scheme, I found these expressions that result
+;; in undesirable behavior in the C reader.  Most all of them are also
+;; present in the Scheme reader.  Probably I should fix all of them, but
+;; I would first like to prove that the Scheme reader is good enough.
+;;
+;; (call-with-input-string "," read)
+;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
+;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
+;; (call-with-input-string "(#true1)" read) => '(#t 1)
+;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
+;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
+;; #:   foo
+;; #:#|what|#foo
+;; #@-(1 2 3) => #(1 2 3)
+;; (#*10101010102) => (#*1010101010 2)
+
+(define-module (ice-9 read)
+  #:use-module (srfi srfi-11)
+  #:use-module (ice-9 textual-ports)
+  #:use-module (rnrs bytevectors)
+  #:replace (read))
+
+(define read-hash-procedures
+  (fluid->parameter %read-hash-procedures))
+
+(define (read-hash-procedure ch)
+  (assq-ref (read-hash-procedures) ch))
+
+(define (read-hash-extend ch proc)
+  (let ((alist (read-hash-procedures)))
+    (read-hash-procedures
+     (if proc
+         (assq-set! alist ch proc)
+         (assq-remove! alist ch)))))
+
+(define bitfield:record-positions? 0)
+(define bitfield:case-insensitive? 2)
+(define bitfield:keyword-style 4)
+(define bitfield:r6rs-escapes? 6)
+(define bitfield:square-brackets? 8)
+(define bitfield:hungry-eol-escapes? 10)
+(define bitfield:curly-infix? 12)
+(define bitfield:r7rs-symbols? 14)
+(define read-option-bits 16)
+
+(define read-option-mask #b11)
+(define read-option-inherit #b11)
+(define read-options-inherit-all (1- (ash 1 read-option-bits)))
+
+(define keyword-style-hash-prefix 0)
+(define keyword-style-prefix 1)
+(define keyword-style-postfix 2)
+
+(define (compute-reader-options port)
+  (let ((options (read-options))
+        (port-options (or (%port-property port 'port-read-options)
+                          read-options-inherit-all)))
+    (define-syntax-rule (option field exp)
+      (let ((port-option (logand port-options (ash read-option-mask field))))
+        (if (= port-option (ash read-option-inherit field))
+            exp
+            port-option)))
+    (define (bool key field)
+      (option field
+              (if (memq key options) (ash 1 field) 0)))
+    (define (enum key values field)
+      (option field
+              (ash (assq-ref values (and=> (memq key options) cadr)) field)))
+    (logior (bool 'positions bitfield:record-positions?)
+            (bool 'case-insensitive bitfield:case-insensitive?)
+            (enum 'keyword-style '((#f . 0) (prefix . 1) (postfix . 2))
+                  bitfield:keyword-style)
+            (bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
+            (bool 'square-brackets bitfield:square-brackets?)
+            (bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
+            (bool 'curly-infix bitfield:curly-infix?)
+            (bool 'r7rs-symbols bitfield:r7rs-symbols?))))
+
+(define (set-option options field new)
+  (logior new (logand options (lognot (ash #b11 field)))))
+
+(define (set-port-read-option! port field value)
+  (let ((options (or (%port-property port 'port-read-options)
+                     read-options-inherit-all))
+        (new (ash value field)))
+    (%set-port-property! port 'port-read-options
+                         (set-option options field new)
+                         )))
+
+(define* (read #:optional (port (current-input-port)))
+  ;; init read options
+  (define opts (compute-reader-options port))
+  (define (enabled? field)
+    (not (zero? (logand (ash 1 field) opts))))
+  (define (set-reader-option! field value)
+    (set! opts (set-option opts field value))
+    (set-port-read-option! port field value))
+  (define (record-positions?) (enabled? bitfield:record-positions?))
+  (define (case-insensitive?) (enabled? bitfield:case-insensitive?))
+  (define (keyword-style) (logand read-option-mask
+                                  (ash opts (- bitfield:keyword-style))))
+  (define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
+  (define (square-brackets?) (enabled? bitfield:square-brackets?))
+  (define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
+  (define (curly-infix?) (enabled? bitfield:curly-infix?))
+  (define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
+  (define neoteric 0)
+  (define (next) (get-char port))
+  (define (peek) (lookahead-char port))
+  (define filename (port-filename port))
+  (define (get-pos) (cons (port-line port) (port-column port)))
+
+  (define (annotate line column datum)
+    ;; FIXME: Return a syntax object instead, so we can avoid the
+    ;; srcprops side table.
+    (when (and (supports-source-properties? datum)
+               ;; Line or column can be invalid via set-port-column! or
+               ;; ungetting chars beyond start of line.
+               (<= 0 line)
+               (<= 1 column))
+      ;; We always capture the column after one char of lookahead;
+      ;; subtract off that lookahead value.
+      (set-source-properties! datum `((filename . ,filename)
+                                      (line . ,line)
+                                      (column . ,(1- column)))))
+    datum)
+
+  (define (input-error msg . args)
+    (apply error msg args))
+
+  (define (read-semicolon-comment)
+    (let ((ch (next)))
+      (cond
+       ((eof-object? ch) ch)
+       ((eqv? ch #\newline) (next))
+       (else (read-semicolon-comment)))))
+
+  (define-syntax-rule (take-until first pred)
+    (let ((acc (open-output-string)))
+      (put-char acc first)
+      (let lp ()
+        (let ((ch (peek)))
+          (cond
+           ((or (eof-object? ch)
+                (pred ch))
+            (get-output-string acc))
+           (else
+            (put-char acc ch)
+            (next)
+            (lp)))))))
+  (define-syntax-rule (take-while first pred)
+    (take-until first (lambda (ch) (not (pred ch)))))
+
+  (define (delimiter? ch)
+    (or (memv ch '(#\( #\) #\; #\"
+                   #\space #\return #\ff #\newline #\tab))
+        (and (memv ch '(#\[ #\])) (or (square-brackets?) (curly-infix?)))
+        (and (memv ch '(#\{ #\})) (curly-infix?))))
+
+  (define (read-token ch)
+    (take-until ch delimiter?))
+
+  (define (read-mixed-case-symbol ch)
+    (let* ((str (read-token ch))
+           (len (string-length str)))
+      (cond
+       ((and (eq? (keyword-style) keyword-style-postfix)
+             (> len 0) (eqv? #\: (string-ref str (1- len))))
+        (let ((str (substring str 0 (1- len))))
+          (symbol->keyword
+           (string->symbol
+            (if (case-insensitive?)
+                (string-downcase str)
+                str)))))
+       (else
+        (string->symbol
+         (if (case-insensitive?)
+             (string-downcase str)
+             str))))))
+
+  (define (read-parenthesized rdelim)
+    (define (finish-curly-infix ret)
+      ;; Perform syntactic transformations on {...} lists.
+      (define (extract-infix-list ls)
+        (and (pair? ls)
+             (let ((x (car ls))
+                   (ls (cdr ls)))
+               (and (pair? ls)
+                    (let ((op (car ls))
+                          (ls (cdr ls)))
+                      (if (null? ls)
+                          (list op x)
+                          (let ((tail (extract-infix-list ls)))
+                            (and tail
+                                 (equal? op (car tail))
+                                 (cons* op x (cdr tail))))))))))
+      (cond
+       ((or (not (eqv? rdelim #\}))) ret) ; Only on {...} lists.
+       ((null? ret) ret)                  ; {} => ()
+       ((null? (cdr ret)) (car ret))      ; {x} => x
+       ((null? (cddr ret)) ret)           ; {x y} => (x y)
+       ((extract-infix-list ret))   ; {x + y + ... + z} => (+ x y ... z)
+       (else (cons '$nfx$ ret))))   ; {x y . z} => ($nfx$ x y . z)
+    (define curly? (eqv? rdelim #\}))
+    (finish-curly-infix
+     (let lp ((ch (next-non-whitespace)))
+       (when (eof-object? ch)
+         (input-error "unexpected end of input while searching for " rdelim))
+       (cond
+        ((eqv? ch rdelim) '())
+        ((or (eqv? ch #\))
+             (and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
+             (and (eqv? ch #\}) (curly-infix?)))
+         (input-error "mismatched close paren" ch))
+        (else
+         (let ((expr (read-expr ch)))
+           ;; Note that it is possible for scm_read_expression to
+           ;; return `.', but not as part of a dotted pair: as in
+           ;; #{.}#.  Indeed an example is here!
+           (if (and (eqv? ch #\.) (eq? expr '#{.}#))
+               (let* ((tail (read-expr (next-non-whitespace)))
+                      (close (next-non-whitespace)))
+                 (unless (eqv? close rdelim)
+                   (input-error "missing close paren" rdelim))
+                 tail)
+               (cons expr (lp (next-non-whitespace))))))))))
+
+  (define (hex-digit ch)
+    (case ch
+      ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
+       (- (char->integer ch) (char->integer #\0)))
+      ((#\a #\b #\c #\d #\e #\f)
+       (+ 10 (- (char->integer ch) (char->integer #\a))))
+      ((#\A #\B #\C #\D #\E #\F)
+       (+ 10 (- (char->integer ch) (char->integer #\A))))
+      (else #f)))
+    
+  (define (read-r6rs-hex-escape)
+    (let ((ch (next)))
+      (cond
+       ((hex-digit ch) =>
+        (lambda (res)
+          (let lp ((res res))
+            (let ((ch (next)))
+              (cond
+               ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
+               ((eqv? ch #\;) (integer->char res))
+               (else
+                (input-error "invalid character in escape sequence: ~S" 
ch)))))))
+       (else
+        (input-error "invalid character in escape sequence: ~S" ch)))))
+
+  (define (read-fixed-hex-escape len)
+    (let lp ((len len) (res 0))
+      (if (zero? len)
+          (integer->char res)
+          (let ((ch (next)))
+            (cond
+             ((hex-digit ch) =>
+              (lambda (digit)
+                (lp (1- len) (+ (* res 16) digit))))
+             (else
+              (input-error "invalid character in escape sequence: ~S" ch)))))))
+
+  (define (read-string rdelim)
+    (let ((acc (open-output-string)))
+      (let lp ()
+        (let ((ch (next)))
+          (cond
+           ((eof-object? ch)
+            (input-error "unexpected end of input while reading string"))
+           ((eqv? ch rdelim)
+            (get-output-string acc))
+           ((eqv? ch #\\)
+            (let ((ch (next)))
+              (when (eof-object? ch)
+                (input-error "unexpected end of input while reading string"))
+              (case ch
+                ((#\newline)
+                 (when (hungry-eol-escapes?)
+                   ;; Skip intraline whitespace before continuing.
+                   (let lp ()
+                     (let ((ch (peek)))
+                       (unless (or (eof-object? ch)
+                                   (eqv? ch #\tab)
+                                   (eq? (char-general-category ch) 'Zs))
+                         (next)
+                         (lp))))))
+                ;; Accept "\(" for use at the beginning of
+                ;; lines in multiline strings to avoid
+                ;; confusing emacs lisp modes.
+                ((#\| #\\ #\() (put-char acc ch))
+                ((#\0)         (put-char acc #\nul))
+                ((#\f)         (put-char acc #\ff))
+                ((#\n)         (put-char acc #\newline))
+                ((#\r)         (put-char acc #\return))
+                ((#\t)         (put-char acc #\tab))
+                ((#\a)         (put-char acc #\alarm))
+                ((#\v)         (put-char acc #\vtab))
+                ((#\b)         (put-char acc #\backspace))
+                ((#\x)
+                 (let ((ch (if (or (r6rs-escapes?) (eqv? rdelim #\|))
+                               (read-r6rs-hex-escape)
+                               (read-fixed-hex-escape 2))))
+                   (put-char acc ch)))
+                ((#\u)
+                 (put-char acc (read-fixed-hex-escape 4)))
+                ((#\U)
+                 (put-char acc (read-fixed-hex-escape 8)))
+                (else
+                 (unless (eqv? ch rdelim)
+                   (input-error "invalid character in escape sequence: ~S" ch))
+                 (put-char acc ch)))
+              (lp)))
+           (else
+            (put-char acc ch)
+            (lp)))))))
+
+  (define (read-character)
+    (let ((ch (next)))
+      (cond
+       ((eof-object? ch)
+        (input-error "unexpected end of input after #\\"))
+       (else
+        (let* ((tok (read-token ch))
+               (len (string-length tok)))
+          (define dotted-circle #\x25cc)
+          (define r5rs-charnames
+            '(("space" . #\x20) ("newline" . #\x0a)))
+          (define r6rs-charnames
+            '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
+              ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
+              ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
+              ("delete" . #\x7f)))
+          (define r7rs-charnames
+            '(("escape" . #\x1b)))
+          (define C0-control-charnames
+            '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
+              ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
+              ("ack" . #\x06) ("bel" . #\x07) ("bs"  . #\x08)
+              ("ht"  . #\x09) ("lf"  . #\x0a) ("vt"  . #\x0b)
+              ("ff"  . #\x0c) ("cr"  . #\x0d) ("so"  . #\x0e)
+              ("si"  . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
+              ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
+              ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
+              ("can" . #\x18) ("em"  . #\x19) ("sub" . #\x1a)
+              ("esc" . #\x1b) ("fs"  . #\x1c) ("gs"  . #\x1d)
+              ("rs"  . #\x1e) ("us"  . #\x1f) ("sp"  . #\x20)
+              ("del" . #\x7f)))
+          (define alt-charnames
+            '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
+          ;; Although R6RS and R7RS charnames specified as being
+          ;; case-sensitive, Guile matches them case-insensitively, like
+          ;; other char names.
+          (define (named-char tok alist)
+            (let lp ((alist alist))
+              (and (pair? alist)
+                   (if (string-ci=? tok (caar alist))
+                       (cdar alist)
+                       (lp (cdr alist))))))
+          (cond
+           ((= len 1) ch)
+           ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
+            ;; Ignore dotted circles, which may be used to keep
+            ;; combining characters from combining with the backslash in
+            ;; #\charname.
+            ch)
+           ((and (<= (char->integer #\0) (char->integer ch) (char->integer 
#\7))
+                 (string->number tok 8))
+            ;; Specifying a codepoint as an octal value.
+            => integer->char)
+           ((and (eqv? ch #\x) (> len 1)
+                 (string->number (substring tok 1) 16))
+            ;; Specifying a codepoint as an hexadecimal value.  Skip
+            ;; initial "x".
+            => integer->char)
+           ((named-char tok r5rs-charnames))
+           ((named-char tok r6rs-charnames))
+           ((named-char tok r7rs-charnames))
+           ((named-char tok C0-control-charnames))
+           ((named-char tok alt-charnames))
+           (else
+            (input-error "unknown character name ~a" tok))))))))
+
+  (define (read-vector)
+    (list->vector (read-parenthesized #\))))
+
+  (define (read-srfi-4-vector ch)
+    (read-array ch))
+
+  (define (maybe-read-boolean-tail tail)
+    (let ((len (string-length tail)))
+      (let lp ((i 0))
+        (or (= i len)
+            (let ((ch (peek)))
+              (and (not (eof-object? ch))
+                   (eqv? (char-downcase ch) (string-ref tail i))
+                   (or (begin
+                         (next)
+                         (lp (1+ i)))
+                       (begin
+                         (unget-char port ch)
+                         #f))))))))
+
+  (define (read-false-or-srfi-4-vector)
+    (let ((ch (peek)))
+      (if (or (eqv? ch #\3)
+              (eqv? ch #\6))
+          (read-srfi-4-vector #\f)
+          (begin
+            (maybe-read-boolean-tail "alse")
+            #f))))
+
+  (define (read-bytevector)
+    (define (expect ch)
+      (unless (eqv? (next) ch)
+        (input-error "invalid bytevector prefix" ch)))
+    (expect #\u)
+    (expect #\8)
+    (expect #\()
+    (u8-list->bytevector (read-parenthesized #\))))
+
+  ;; FIXME: We should require a terminating delimiter.
+  (define (read-bitvector)
+    (list->bitvector
+     (let lp ()
+       (let ((ch (peek)))
+         (case ch
+           ((#\0) (next) (cons #f (lp)))
+           ((#\1) (next) (cons #t (lp)))
+           (else '()))))))
+
+  (define (read-boolean ch)
+    ;; Historically, Guile hasn't required a delimiter after #f / #t.
+    ;; When the longer #false / #true forms were added, we kept this
+    ;; behavior.  It is terrible and we should change it!!
+    (case ch
+      ((#\t #\T)
+       (maybe-read-boolean-tail "rue")
+       #t)
+      (else
+       (maybe-read-boolean-tail "alse")
+       #f)))
+
+  (define (read-keyword)
+    (let ((ch (next-non-whitespace)))
+      (when (eof-object? ch)
+        (input-error "end of input while reading keyword"))
+      (let ((expr (read-expr ch)))
+        (unless (symbol? expr)
+          (input-error "keyword prefix #: not followed by a symbol: ~a"
+                       expr))
+        (symbol->keyword expr))))
+
+  (define (read-array ch)
+    (define (read-decimal-integer ch alt)
+      ;; This parser has problems but it's what Guile's read.c does.  Any
+      ;; fix should come later and to both of them.
+      (define (decimal-digit ch)
+        (and (not (eof-object? ch))
+             (let ((digit (- (char->integer ch) (char->integer #\0))))
+               (and (<= 0 digit 9) digit))))
+      (let-values (((sign ch) (if (eqv? ch #\-)
+                                  (values -1 (next))
+                                  (values 1 ch))))
+        (let lp ((ch ch) (res #f))
+          (cond
+           ((decimal-digit ch)
+            => (lambda (digit)
+                 (lp (next) (if res (+ (* 10 res) digit) digit))))
+           (else
+            (values ch (if res (* res sign) alt)))))))
+    (define (read-rank ch)
+      (let-values (((ch rank) (read-decimal-integer ch 1)))
+        (when (< rank 0)
+          (input-error "array rank must be non-negative"))
+        (when (eof-object? ch)
+          (input-error "unexpected end of input while reading array"))
+        (values ch rank)))
+    (define (read-tag ch)
+      (let lp ((ch ch) (chars '()))
+        (when (eof-object? ch)
+          (input-error "unexpected end of input while reading array"))
+        (if (memv ch '(#\( #\@ @\:))
+            (values ch
+                    (if (null? chars)
+                        #t
+                        (string->symbol (list->string (reverse chars)))))
+            (lp (next) (cons ch chars)))))
+    (define (read-dimension ch)
+      (let*-values (((ch lbnd) (if (eqv? ch #\@)
+                                   (read-decimal-integer (next) 0)
+                                   (values ch 0)))
+                    ((ch len) (if (eqv? ch #\:)
+                                  (read-decimal-integer (next) 0)
+                                  (values ch #f))))
+        (when (and len (< len 0))
+          (input-error "array length must be non-negative"))
+        (when (eof-object? ch)
+          (input-error "unexpected end of input while reading array"))
+        (values ch
+                (if len
+                    (if (zero? lbnd)
+                        len
+                        (list lbnd (+ lbnd (1- len))))
+                    lbnd))))
+    (define (read-shape ch alt)
+      (if (memv ch '(#\@ @\:))
+          (let*-values (((ch head) (read-dimension ch))
+                        ((ch tail) (read-shape ch '())))
+            (values ch (cons head tail)))
+          (values ch alt)))
+    (define (read-elements ch rank)
+      (unless (eqv? ch #\()
+        (input-error "missing '(' in vector or array literal"))
+      (let ((elts (read-parenthesized #\))))
+        (if (zero? rank)
+            (begin
+              ;; Handle special print syntax of rank zero arrays; see
+              ;; scm_i_print_array for a rationale.
+              (when (null? elts)
+                (input-error "too few elements in array literal, need 1"))
+              (unless (null? (cdr elts))
+                (input-error "too many elements in array literal, need 1"))
+              (car elts))
+            elts)))
+    (let*-values (((ch rank) (read-rank ch))
+                  ((ch tag) (read-tag ch))
+                  ((ch shape) (read-shape ch rank))
+                  ((elts) (read-elements ch rank)))
+      (when (and (pair? shape) (not (eqv? (length shape) rank)))
+        (input-error
+         "the number of shape specifications must match the array rank"))
+      (list->typed-array tag shape elts)))
+
+  (define (read-number-and-radix ch)
+    (let ((tok (string-append "#" (read-token ch))))
+      (or (string->number tok)
+          (input-error "unknown # object"))))
+
+  (define (read-extended-symbol)
+    (define (next-not-eof)
+      (let ((ch (next)))
+        (when (eof-object? ch)
+          (input-error "end of input while reading symbol"))
+        ch))
+    (string->symbol
+     (list->string
+      (let lp ((saw-brace? #f))
+        (let ((ch (next-not-eof)))
+          (cond
+           (saw-brace?
+            (if (eqv? ch #\#)
+                '()
+                (cons #\} (lp #f))))
+           ((eqv? ch #\})
+            (lp #t))
+           ((eqv? ch #\\)
+            ;; It used to be that print.c would print extended-read-syntax
+            ;; symbols with backslashes before "non-standard" chars, but
+            ;; this routine wouldn't do anything with those escapes.
+            ;; Bummer.  What we've done is to change print.c to output
+            ;; R6RS hex escapes for those characters, relying on the fact
+            ;; that the extended read syntax would never put a `\' before
+            ;; an `x'.  For now, we just ignore other instances of
+            ;; backslash in the string.
+            (let* ((ch (next-not-eof))
+                   (ch (if (eqv? ch #\x)
+                           (read-r6rs-hex-escape)
+                           ch)))
+              (cons ch (lp #f))))
+           (else
+            (cons ch (lp #f)))))))))
+
+  (define (read-nil)
+    ;; Have already read "#\n" -- now read "il".
+    (let ((id (read-mixed-case-symbol #\n)))
+      (unless (eq? id 'nil)
+        (input-error "unexpected input while reading #nil: ~a" id))
+      #nil))
+
+  (define (read-sharp)
+    (let* ((ch (next)))
+      (cond
+       ((eof-object? ch)
+        (input-error "unexpected end of input after #"))
+       ((read-hash-procedure ch)
+        => (lambda (proc) (proc ch)))
+       (else
+        (case ch
+          ((#\\) (read-character))
+          ((#\() (read-vector))
+          ((#\s #\u \c) (read-srfi-4-vector ch))
+          ((#\f) (read-false-or-srfi-4-vector))
+          ((#\v) (read-bytevector))
+          ((#\*) (read-bitvector))
+          ((#\t #\T #\F) (read-boolean ch))
+          ((#\:) (read-keyword))
+          ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
+           (read-array ch))
+          ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
+           (read-number-and-radix ch))
+          ((#\{) (read-extended-symbol))
+          ((#\') (list 'syntax (read-expr (next-non-whitespace))))
+          ((#\`) (list 'quasisyntax (read-expr (next-non-whitespace))))
+          ((#\,)
+           (if (eqv? #\@ (peek))
+               (begin
+                 (next)
+                 (list 'unsyntax-splicing (read-expr (next-non-whitespace))))
+               (list 'unsyntax (read-expr (next-non-whitespace)))))
+          ((#\n) (read-nil))
+          (else
+           (input-error "Unknown # object: ~S" ch)))))))
+
+  (define (read-number ch)
+    (let* ((str (read-token ch)))
+      (or (string->number str)
+          (string->symbol (if (case-insensitive?)
+                              (string-downcase str)
+                              str)))))
+
+  (define (read-expr* ch)
+    (case ch
+      ((#\{)
+       (cond
+        ((curly-infix?)
+         (set! neoteric (1+ neoteric))
+         (let ((expr (read-parenthesized #\})))
+           (set! neoteric (1- neoteric))
+           expr))
+        (else
+         (read-mixed-case-symbol ch))))
+      ((#\[)
+       (cond
+        ((square-brackets?)
+         (read-parenthesized #\]))
+        ((curly-infix?)
+         ;; The syntax of neoteric expressions requires that '[' be a
+         ;; delimiter when curly-infix is enabled, so it cannot be part
+         ;; of an unescaped symbol.  We might as well do something
+         ;; useful with it, so we adopt Kawa's convention:  [...] =>
+         ;; ($bracket-list$ ...)
+         ;; FIXME: source locations for this cons
+         (cons '$bracket-list$ (read-parenthesized #\])))
+        (else
+         (read-mixed-case-symbol ch))))
+      ((#\()
+       (read-parenthesized #\)))
+      ((#\")
+       (read-string ch))
+      ((#\|)
+       (if (r7rs-symbols?)
+           (string->symbol (read-string ch))
+           (read-mixed-case-symbol ch)))
+      ((#\')
+       (list 'quote (read-expr (next-non-whitespace))))
+      ((#\`)
+       (list 'quasiquote (read-expr (next-non-whitespace))))
+      ((#\,)
+       (cond
+        ((eqv? #\@ (peek))
+         (next)
+         (list 'unquote-splicing (read-expr (next-non-whitespace))))
+        (else
+         (list 'unquote (read-expr (next-non-whitespace))))))
+      ((#\#)
+       ;; FIXME: read-sharp should recur if we read a comment
+       (read-sharp))
+      ((#\))
+       (input-error "unexpected \")\""))
+      ((#\})
+       (if (curly-infix?)
+           (input-error "unexpected \"}\"")
+           (read-mixed-case-symbol ch)))
+      ((#\])
+       (if (square-brackets?)
+           (input-error "unexpected \"]\"")
+           (read-mixed-case-symbol ch)))
+      ((#f)
+       ;; EOF.
+       the-eof-object)
+      ((#\:)
+       (if (eq? (keyword-style) keyword-style-prefix)
+           ;; FIXME: Don't skip whitespace here.
+           (let ((ch (next-non-whitespace)))
+             (when (eof-object? ch)
+               (input-error "unexpected end of input while reading :keyword"))
+             (symbol->keyword (read-expr ch)))
+           (read-mixed-case-symbol ch)))
+      ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
+       (read-number ch))
+      (else
+       (read-mixed-case-symbol ch))))
+
+  (define (read-neoteric ch)
+    (let lp ((expr (read-expr ch)))
+      ;; 'expr' is the first component of the neoteric expression.  If
+      ;; the next character is '(', '[', or '{', (without any
+      ;; intervening whitespace), we use it to construct a new
+      ;; expression, and loop.  For example:
+      ;; f{n - 1}(x) => ((f (- n 1)) x).
+      (case (peek)
+        ((#\() ;; e(...) => (e ...)
+         (next)
+         (lp (cons expr (read-parenthesized #\)))))
+        ((#\[) ;; e[...] => ($bracket-apply$ e ...)
+         (next)
+         (lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
+        ((#\{) ;; e{} => (e); e{...} => (e {...})
+         (next)
+         (let ((args (read-parenthesized #\})))
+           (lp (if (null? args)
+                   (list expr)
+                   (list expr args)))))
+        (else
+         expr))))
+
+  (define (read-expr ch)
+    (let ((line (port-line port))
+          (column (port-column port)))
+      (annotate
+       line
+       column
+       (if (zero? neoteric)
+           (read-expr* ch)
+           (read-neoteric ch)))))
+
+  (define (read-directive)
+    (let ((ch (next)))
+      (cond
+       ((eof-object? ch)
+        (input-error "unexpected end of input after #!"))
+       (else
+        (string->symbol
+         (take-while ch (lambda (ch)
+                          (or (eqv? ch #\-) (char-alphabetic? ch)))))))))
+
+  (define (skip-scsh-comment)
+    (let lp ((ch (next)))
+      (cond
+       ((eof-object? ch)
+        (input-error "unexpected end of input while looking for !#"))
+       ((eqv? ch #\!)
+        (let ((ch (next)))
+          (if (eqv? ch #\#)
+              (next)
+              (lp ch))))
+       (else
+        (lp (next))))))
+
+  (define (process-shebang)
+    ;; After having read #!, we complete either with #!r6rs,
+    ;; #!fold-case, #!no-fold-case, #!curly-infix,
+    ;; #!curly-infix-and-bracket-lists, or a SCSH block comment
+    ;; terminated by !#.
+    (let ((sym (read-directive)))
+      (cond
+       ((eq? sym 'r6rs)
+        (set-reader-option! bitfield:case-insensitive? 0)
+        (set-reader-option! bitfield:r6rs-escapes? 1)
+        (set-reader-option! bitfield:square-brackets? 1)
+        (set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
+        (set-reader-option! bitfield:hungry-eol-escapes? 1)
+        (next))
+       ((eq? sym 'fold-case)
+        (set-reader-option! bitfield:case-insensitive? 1)
+        (next))
+       ((eq? sym 'no-fold-case)
+        (set-reader-option! bitfield:case-insensitive? 0)
+        (next))
+       ((eq? sym 'curly-infix)
+        (set-reader-option! bitfield:curly-infix? 1)
+        (next))
+       ((eq? sym 'curly-infix-and-bracket-lists)
+        (set-reader-option! bitfield:curly-infix? 1)
+        (set-reader-option! bitfield:square-brackets? 0)
+        (next))
+       (else
+        (skip-scsh-comment)))))
+
+  (define (skip-eol-comment)
+    (let ((ch (next)))
+      (cond
+       ((eof-object? ch) ch)
+       ((eq? ch #\newline) (next))
+       (else (skip-eol-comment)))))
+
+  ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
+  ;; nested.
+  (define (skip-r6rs-block-comment)
+    ;; We have read #|, now looking for |#.
+    (let ((ch (next)))
+      (when (eof-object? ch)
+        (input-error "unterminated `#| ... |#' comment"))
+      (cond
+       ((and (eqv? ch #\|) (eqv? (peek) #\#))
+        ;; Done.
+        (next)
+        (values))
+       ((and (eqv? ch #\#) (eqv? (peek) #\|))
+        ;; A nested comment.
+        (next)
+        (skip-r6rs-block-comment)
+        (skip-r6rs-block-comment))
+       (else
+        (skip-r6rs-block-comment)))))
+
+  (define (next-non-whitespace)
+    (let lp ((ch (next)))
+      (case ch
+        ((#\;)
+         (lp (skip-eol-comment)))
+        ((#\#)
+         (case (peek)
+           ((#\!)
+            (next)
+            (lp (process-shebang)))
+           ((#\;)
+            (next)
+            (let ((ch (next-non-whitespace)))
+              (when (eof-object? ch)
+                (input-error "no expression after #; comment"))
+              (read-expr ch))
+            (next-non-whitespace))
+           ((#\|)
+            (if (read-hash-procedure #\|)
+                ch
+                (begin
+                  (next)
+                  (skip-r6rs-block-comment)
+                  (next-non-whitespace))))
+           (else ch)))
+        ((#\space #\return #\ff #\newline #\tab)
+         (next-non-whitespace))
+        (else ch))))
+
+  (let ((ch (next-non-whitespace)))
+    (if (eof-object? ch)
+        ch
+        (read-expr ch))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]