[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#48758: [PATCH] Elisp reader does not support non-decimal integers
From: |
Ivan Sokolov |
Subject: |
bug#48758: [PATCH] Elisp reader does not support non-decimal integers |
Date: |
Mon, 31 May 2021 15:04:42 +0300 |
This patch improves Elisp compatibility.
I added support for binary, octal, hexadecimal and arbitrary radix
integer literals as described in Elisp manual [1], except for the final
period. It should not take long to add support for the final period, but
in this patch I did not do this because Emacs itself does not support
final period for non-decimal integers.
[1]:
https://www.gnu.org/software/emacs/manual/html_node/elisp/Integer-Basics.html
diff --git a/module/language/elisp/lexer.scm b/module/language/elisp/lexer.scm
index 5a0e6b3ff..1066ed0c2 100644
--- a/module/language/elisp/lexer.scm
+++ b/module/language/elisp/lexer.scm
@@ -186,9 +186,22 @@
;;; against regular expressions to determine if it is possibly an
;;; integer or a float.
-(define integer-regex (make-regexp "^[+-]?[0-9]+\\.?$"))
-
-(define float-regex
+(define make-integer-regexp
+ (let ((alphabet "0123456789abcdefghijklmnopqrstuvwxyz"))
+ (lambda (radix)
+ (unless (<= 2 radix 36)
+ (error "invalid radix" radix))
+ (let ((pat (string-append "^[+-]?[" (string-take alphabet radix) "]+$")))
+ (make-regexp pat regexp/icase)))))
+
+(define get-integer-regexp
+ (let ((ht (make-hash-table)))
+ (hash-set! ht 10 (make-regexp "^[+-]?[0-9]+\\.?$"))
+ (lambda (radix)
+ (or (hash-ref ht radix)
+ (hash-set! ht radix (make-integer-regexp radix))))))
+
+(define float-regexp
(make-regexp
"^[+-]?([0-9]+\\.?[0-9]*|[0-9]*\\.?[0-9]+)(e[+-]?[0-9]+)?$"))
@@ -197,7 +210,9 @@
(define no-escape-punctuation (string->char-set "-+=*/_~!@$%^&:<>{}?."))
-(define (get-symbol-or-number port)
+(define (get-symbol-or-number port radix)
+ (define integer-regexp
+ (and=> radix get-integer-regexp))
(let iterate ((result-chars '())
(had-escape #f))
(let* ((c (read-char port))
@@ -207,10 +222,11 @@
(values
(cond
((and (not had-escape)
- (regexp-exec integer-regex result))
+ (regexp? integer-regexp)
+ (regexp-exec integer-regexp result))
'integer)
((and (not had-escape)
- (regexp-exec float-regex result))
+ (regexp-exec float-regexp result))
'float)
(else 'symbol))
result))))
@@ -228,11 +244,20 @@
(unread-char c port)
(finish))))))
+(define (string->integer str radix)
+ (let* ((num (string->number str radix))
+ (exact (and=> num inexact->exact)))
+ (unless (number? num)
+ (error "expected number" str))
+ (unless (integer? exact)
+ (error "expected integer" str))
+ exact))
+
;;; Parse a circular structure marker without the leading # (which was
;;; already read and recognized), that is, a number as identifier and
;;; then either = or #.
-(define (get-circular-marker port)
+(define (get-circular-marker-or-number port)
(call-with-values
(lambda ()
(let iterate ((result 0))
@@ -241,13 +266,20 @@
(let ((val (- (char->integer cur) (char->integer #\0))))
(iterate (+ (* result 10) val)))
(values result cur)))))
- (lambda (id type)
- (case type
- ((#\#) `(circular-ref . ,id))
- ((#\=) `(circular-def . ,id))
- (else (lexer-error port
- "invalid circular marker character"
- type))))))
+ (lambda (result last-ch)
+ (case last-ch
+ ((#\#) `(circular-ref . ,result))
+ ((#\=) `(circular-def . ,result))
+ ((#\r)
+ (call-with-values
+ (lambda ()
+ (get-symbol-or-number port result))
+ (lambda (type str)
+ (unless (eq? type 'integer)
+ (error "invalid integer read syntax" type str result))
+ `(integer . ,(string->integer str result)))))
+ (else
+ (lexer-error port "invalid circular marker character" last-ch))))))
;;; Main lexer routine, which is given a port and does look for the next
;;; token.
@@ -334,13 +366,21 @@
(case c
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(unread-char c port)
- (let ((mark (get-circular-marker port)))
+ (let ((mark (get-circular-marker-or-number port)))
(return (car mark) (cdr mark))))
((#\')
(return 'function #f))
+ ((#\b #\o #\x)
+ (let ((radix (case c ((#\b) 2) ((#\o) 8) ((#\x) 16))))
+ (call-with-values
+ (lambda () (get-symbol-or-number port radix))
+ (lambda (type str)
+ (unless (eq? type 'integer)
+ (error "invalid integer read syntax" type str radix))
+ (return 'integer (string->integer str radix))))))
((#\:)
(call-with-values
- (lambda () (get-symbol-or-number port))
+ (lambda () (get-symbol-or-number port #f))
(lambda (type str)
(return 'symbol (make-symbol str))))))))
;; Parentheses and other special-meaning single characters.
@@ -363,7 +403,7 @@
(else
(unread-char c port)
(call-with-values
- (lambda () (get-symbol-or-number port))
+ (lambda () (get-symbol-or-number port 10))
(lambda (type str)
(case type
((symbol)
@@ -384,12 +424,7 @@
;; string->number returns an inexact real. Thus we need
;; a conversion here, but it should always result in an
;; integer!
- (return
- 'integer
- (let ((num (inexact->exact (string->number str))))
- (if (not (integer? num))
- (error "expected integer" str num))
- num)))
+ (return 'integer (string->integer str 10)))
((float)
(return 'float (let ((num (string->number str)))
(if (exact? num)
diff --git a/test-suite/tests/elisp-reader.test
b/test-suite/tests/elisp-reader.test
index 669c4d592..b84e4756f 100644
--- a/test-suite/tests/elisp-reader.test
+++ b/test-suite/tests/elisp-reader.test
@@ -75,21 +75,45 @@
(symbol . abc) (paren-open . #f) (symbol . def)
(paren-close . #f) (symbol . ghi) (symbol . .e5))))
- ; Here we make use of the property that exact/inexact numbers are not equal?
- ; even when they have the same numeric value!
- (pass-if "integers (decimal)"
- (equal? (lex-string "-1 1 1. +1 01234")
- '((integer . -1) (integer . 1) (integer . 1) (integer . 1)
- (integer . 1234))))
- (pass-if "integers (binary)"
- (equal? (lex-string "#b-1 #b1 #b+1 #b10011010010")
- '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
- (pass-if "integers (octal)"
- (equal? (lex-string "#o-1 #o1 #o+1 #o2322")
- '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
- (pass-if "integers (hexadecimal)"
- (equal? (lex-string "#x-1 #x1 #x+1 #x4d2")
- '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
+ (with-test-prefix "integers"
+ ;; Here we make use of the property that exact/inexact numbers are not
equal?
+ ;; even when they have the same numeric value!
+
+ (with-test-prefix "decimal"
+ (pass-if-equal "simple" '((integer . 1)) (lex-string "1"))
+ (pass-if-equal "positive" '((integer . 1)) (lex-string "+1"))
+ (pass-if-equal "negative" '((integer . -1)) (lex-string "-1"))
+ (pass-if-equal "trailing dot" '((integer . 1)) (lex-string "1."))
+ (pass-if-equal "all digits"
+ '((integer . 123456789)) (lex-string "0123456789")))
+
+ (with-test-prefix "binary"
+ (pass-if-equal "simple" '((integer . 1)) (lex-string "#b1"))
+ (pass-if-equal "positive" '((integer . 1)) (lex-string "#b+1"))
+ (pass-if-equal "negative" '((integer . -1)) (lex-string "#b-1"))
+ (pass-if-equal "all digits"
+ '((integer . #b010101)) (lex-string "#b010101")))
+
+ (with-test-prefix "octal"
+ (pass-if-equal "simple" '((integer . 1)) (lex-string "#o1"))
+ (pass-if-equal "positive" '((integer . 1)) (lex-string "#o+1"))
+ (pass-if-equal "negative" '((integer . -1)) (lex-string "#o-1"))
+ (pass-if-equal "all digits"
+ '((integer . #o01234567)) (lex-string "#o01234567")))
+
+ (with-test-prefix "hexadecimal"
+ (pass-if-equal "simple" '((integer . 1)) (lex-string "#x1"))
+ (pass-if-equal "positive" '((integer . 1)) (lex-string "#x+1"))
+ (pass-if-equal "negative" '((integer . -1)) (lex-string "#x-1"))
+ (pass-if-equal "all digits"
+ '((integer . #x0123456789abcdef)) (lex-string "#x0123456789aBcDeF")))
+
+ (with-test-prefix "arbitrary radix"
+ (pass-if-equal "simple" '((integer . 44)) (lex-string "#24r1k"))
+ (pass-if-equal "positive" '((integer . 44)) (lex-string "#24r+1k"))
+ (pass-if-equal "negative" '((integer . -44)) (lex-string "#24r-1k"))
+ (pass-if-equal "max radix" '((integer . 35)) (lex-string "#36rz"))))
+
(pass-if "floats"
(equal? (lex-string "1500.0 15e2 15.e2 1.5e3 .15e4 -.345e-2")
'((float . 1500.0) (float . 1500.0) (float . 1500.0)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- bug#48758: [PATCH] Elisp reader does not support non-decimal integers,
Ivan Sokolov <=