bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#52209: 28.0.60; [PATCH] date-to-time fails on pure dates


From: Bob Rogers
Subject: bug#52209: 28.0.60; [PATCH] date-to-time fails on pure dates
Date: Tue, 28 Dec 2021 10:52:31 -0500

   From: Lars Ingebrigtsen <larsi@gnus.org>
   Date: Sun, 26 Dec 2021 12:31:07 +0100

   Better errors messages are possible without making many specific error
   symbols, though.

OK, I think I have a good solution that uses a single error symbol; let
me know what you think.  (Having never done much with Elisp conditions,
I was still thinking in terms of Common Lisp, so I had to go scratch my
head for a while.)

   I am currently working on broadening what the parser will accept,
though I think it is close to a usable state.  I am using the
documentation for the Perl Date::Parse module to see what it accepts,
and will then look at the corresponding Python and Ruby modules for
further ideas.  I am not planning to adopt everything I see, though; in
particular, I think it's a good idea for new code to stick to insisting
on four-digit years except when the caller has specified a format that
determines the month/day order.

                                        -- Bob

diff --git a/lisp/calendar/parse-date.el b/lisp/calendar/parse-date.el
new file mode 100644
index 0000000000..10bd939e91
--- /dev/null
+++ b/lisp/calendar/parse-date.el
@@ -0,0 +1,472 @@
+;;; parse-date.el --- parsing time/date strings -*- lexical-binding: t -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; Author: Bob Rogers <rogers@rgrjr.com>
+;; Keywords: util
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; 'parse-date' parses a time and/or date in a string and returns a
+;; list of values, just like `decode-time', where unspecified elements
+;; in the string are returned as nil (except unspecified DST is
+;; returned as -1).  `encode-time' may be applied on these values to
+;; obtain an internal time value.  If left to its own devices, it
+;; accepts a wide variety of formats, but can be told to insist on a
+;; particular date/time format.
+
+;; Historically, `parse-time-string' was used for this purpose, but it
+;; was focused on email date formats, and gradually but imperfectly
+;; extended to handle other formats.  'parse-date' is compatible in
+;; that it parses the same input formats and uses the same return
+;; value format, but is stricter in that it signals an error for
+;; tokens that `parse-time-string' would simply ignore.
+
+;;; TODO:
+;;
+;; * Add a euro-date format for DD/MM/YYYY ?
+;;
+
+;;; Code:
+
+(require 'cl-lib)
+(require 'iso8601)
+(require 'parse-time)
+
+(define-error 'date-parse-error "Date/time parse error" 'error)
+
+(defconst parse-date--ends-with-alpha-tz-re
+  (concat " \\(" (mapconcat #'car parse-time-zoneinfo "\\|") "\\)$")
+  "Recognize an alphanumeric timezone at the end of the string.")
+
+(defun parse-date--guess-rfc822-formats (date-string)
+  (let ((case-fold-search t))
+    (cond ((string-match "(" date-string) 'rfc2822)
+          ((string-match parse-date--ends-with-alpha-tz-re date-string)
+            ;; Alphabetic timezones are legacy syntax.
+            'rfc822)
+          ((string-match " [-+][0-9][0-9][0-9][0-9][ \t\n]*\\($\\}(\\)"
+                         date-string)
+            ;; Note that an ISO-8601 timezone has a colon in the middle
+            ;; and no preceding space.
+            'rfc2822)
+          (t nil))))
+
+(defun parse-date--guess-format (date-string)
+  (cond ((iso8601-valid-p date-string) 'iso-8601)
+        ((parse-date--guess-rfc822-formats date-string))
+        (t nil)))
+
+(defun parse-date--ignore-char? (char)
+  ;; Ignore whitespace and commas.
+  (or (eq char ?\ ) (eq char ?\t) (eq char ?\r) (eq char ?\n) (eq char ?,)))
+
+(defun parse-date--tokenize-string (string &optional strip-fws?)
+  "Turn STRING into tokens, separated only by whitespace and commas.
+Multiple commas are ignored.  Pure digit sequences are turned
+into integers.  If STRIP-FWS? is true, then folding whitespace as
+defined by RFC2822 (strictly, the CFWS production that also
+accepts comments) is stripped out by treating it like whitespace;
+if it's value is the symbol `first', we exit when we see the
+first '(' (per RFC2822), else we strip them all (per RFC822)."
+  (let ((index 0)
+       (end (length string))
+        (fws-eof? (eq strip-fws? 'first))
+       (list ()))
+    (when fws-eof?
+      ;; In order to stop on the first "(", we need to see it as
+      ;; non-whitespace.
+      (setq strip-fws? nil))
+    (cl-flet ((skip-ignored ()
+                ;; Skip ignored characters at index (the scan
+                ;; position).  Skip RFC822 comments in matched parens
+                ;; if strip-fws? is true, but do not complain about
+                ;; unterminated comments.
+                (let ((char nil)
+                      (nest 0))
+                  (while (and (< index end)
+                              (setq char (aref string index))
+                              (or (> nest 0)
+                                  (parse-date--ignore-char? char)
+                                  (and strip-fws? (eql char ?\())))
+                    (cl-incf index)
+                    ;; FWS bookkeeping.
+                    (cond ((not strip-fws?))
+                          ((and (eq char ?\\)
+                                (< (1+ index) end))
+                           ;; Move to the next char but don't check
+                           ;; it to see if it might be a paren.
+                            (cl-incf index))
+                          ((eq char ?\() (cl-incf nest))
+                          ((eq char ?\)) (cl-decf nest)))))))
+      (skip-ignored)           ;; Skip leading whitespace.
+      (while (and (< index end)
+                  (not (and fws-eof?
+                            (eq (aref string index) ?\())))
+        (let* ((start index)
+               (char (aref string index))
+               (all-digits (<= ?0 char ?9)))
+          ;; char is valid; look for more valid characters.
+          (when (and strip-fws?
+                     (eq char ?\\)
+                     (< (1+ index) end))
+            ;; Escaped character, which might be a "(".  If so, we are
+            ;; correct to include it in the token, even though the
+            ;; caller is sure to barf.  If not, we violate RFC2?822 by
+            ;; not removing the backslash, but no characters in valid
+            ;; RFC2?822 dates need escaping anyway, so it shouldn't
+            ;; matter that this is not done strictly correctly.  --
+            ;; rgr, 24-Dec-21.
+            (cl-incf index))
+          (while (and (< (cl-incf index) end)
+                      (setq char (aref string index))
+                      (not (or (parse-date--ignore-char? char)
+                               (and strip-fws?
+                                    (eq char ?\()))))
+            (unless (<= ?0 char ?9)
+              (setq all-digits nil))
+            (when (and strip-fws?
+                       (eq char ?\\)
+                       (< (1+ index) end))
+              ;; Escaped character, see above.
+              (cl-incf index)))
+          (push (if all-digits
+                    (cl-parse-integer string :start start :end index)
+                  (substring string start index))
+                list)
+          (skip-ignored)))
+      (nreverse list))))
+
+(defconst parse-date--slot-names
+  '(second minute hour day month year weekday dst zone)
+  "Names of return value slots, for better error messages
+See the decoded-time defstruct.")
+
+(defconst parse-date--slot-ranges
+  '((0 60) (0 59) (0 23) (1 31) (1 12) (1 9999))
+  "Numeric slot ranges, for bounds checking.
+Note that RFC2822 explicitly requires that seconds go up to 60,
+to allow for leap seconds (see Mills, D., 'Network Time
+Protocol', STD 12, RFC 1119, September 1989).")
+
+(defun parse-date--x822 (time-string obs-format?)
+  ;; Parse an RFC2822 or (if obs-format? is true) RFC822 date.  The
+  ;; strict syntax for the former is as follows:
+  ;;
+  ;;   [ day-of-week "," ] day FWS month-name FWS year FWS time [CFWS]
+  ;;
+  ;; where "time" is:
+  ;;
+  ;;   2DIGIT ":" 2DIGIT [ ":" 2DIGIT ] FWS ( "+" / "-" ) 4DIGIT
+  ;;
+  ;; RFC822 also accepts comments in random places (which is handled
+  ;; by parse-date--tokenize-string) and two-digit years.  We are
+  ;; somewhat more lax in what we accept (specifically, the hours
+  ;; don't have to be two digits, and the TZ and the comma after the
+  ;; DOW are optional), but we do insist that the items that are
+  ;; present do appear in this order.
+  (let ((tokens (parse-date--tokenize-string (downcase time-string)
+                                            (if obs-format? 'all 'first)))
+        (time (list nil nil nil nil nil nil nil -1 nil)))
+    (cl-labels ((set-matched-slot (slot index token)
+                  ;; Assign a slot value from match data if index is
+                  ;; non-nil, else from token, signalling an error if
+                  ;; it's already been assigned or is out of range.
+                  (let ((value (if index
+                                   (cl-parse-integer (match-string index 
token))
+                                 token))
+                        (range (nth slot parse-date--slot-ranges)))
+                    (unless (equal (nth slot time)
+                                   (if (= slot 7) -1 nil))
+                      (signal 'date-parse-error
+                              (list "Duplicate slot value"
+                                    (nth slot parse-date--slot-names) token)))
+                    (when (and range
+                               (not (<= (car range) value (cadr range))))
+                      (signal 'date-parse-error
+                              (list "Slot out of range"
+                                    (nth slot parse-date--slot-names)
+                                    token (car range) (cadr range))))
+                    (setf (nth slot time) value)))
+                (set-numeric (slot token)
+                  (unless (natnump token)
+                    (signal 'date-parse-error
+                              (list "Not a number"
+                                    (nth slot parse-date--slot-names) token)))
+                  (set-matched-slot slot nil token)))
+      ;; Check for weekday.
+      (let ((dow (assoc (car tokens) parse-time-weekdays)))
+        (when dow
+          ;; Day of the week.
+          (set-matched-slot 6 nil (cdr dow))
+          (pop tokens)))
+      ;; Day.
+      (set-numeric 3 (pop tokens))
+      ;; Alphabetic month.
+      (let* ((month (pop tokens))
+             (match (assoc month parse-time-months)))
+        (if match
+            (set-matched-slot 4 nil (cdr match))
+          (signal 'date-parse-error
+                  (list "Expected an alphabetic month" month))))
+      ;; Year.
+      (let ((year (pop tokens)))
+        ;; Check the year for the right number of digits.
+        (cond ((> year 1000)
+                (set-numeric 5 year))
+              ((or (not obs-format?)
+                   (>= year 100))
+                "Four digit years are required but found '%s'" year)
+              ((>= year 50)
+                ;; second half of the 20th century.
+                (set-numeric 5 (+ 1900 year)))
+              (t
+                ;; first half of the 21st century.
+                (set-numeric 5 (+ 2000 year)))))
+      ;; Time.
+      (let ((time (pop tokens)))
+        (cond ((or (null time) (natnump time))
+                (signal 'date-parse-error
+                        (list "Expected a time" time)))
+              ((string-match
+                "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$"
+                time)
+                (set-matched-slot 2 1 time)
+                (set-matched-slot 1 2 time)
+                (set-matched-slot 0 3 time))
+              ((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" time)
+                ;; Time without seconds.
+                (set-matched-slot 2 1 time)
+                (set-matched-slot 1 2 time)
+                (set-matched-slot 0 nil 0))
+              (t
+                (signal 'date-parse-error
+                        (list "Expected a time" time)))))
+      ;; Timezone.
+      (let* ((zone (pop tokens))
+             (match (assoc zone parse-time-zoneinfo)))
+        (cond (match
+                (set-matched-slot 8 nil (cadr match))
+                (set-matched-slot 7 nil (caddr match)))
+              ((and (stringp zone)
+                    (string-match "^[-+][0-9][0-9][0-9][0-9]$" zone))
+                ;; Numeric time zone.
+                (set-matched-slot
+                  8 nil
+                  (* 60
+                     (+ (cl-parse-integer zone :start 3 :end 5)
+                        (* 60 (cl-parse-integer zone :start 1 :end 3)))
+                     (if (= (aref zone 0) ?-) -1 1))))
+              (zone
+                (signal 'date-parse-error
+                        (list "Expected a timezone" zone)))))
+      (when tokens
+        (signal 'date-parse-error
+                (list "Extra token(s)" (car tokens)))))
+    time))
+
+(defun parse-date--default (time-string two-digit-year?)
+  ;; Do the standard parsing thing.  This is mostly free form, in that
+  ;; tokens may appear in any order, but we expect to introduce some
+  ;; state dependence.
+  (let ((tokens (parse-date--tokenize-string (downcase time-string)))
+        (time (list nil nil nil nil nil nil nil -1 nil)))
+    (cl-flet ((set-matched-slot (slot index token)
+                ;; Assign a slot value from match data if index is
+                ;; non-nil, else from token, signalling an error if
+                ;; it's already been assigned or is out of range.
+                (let ((value (if index
+                                 (cl-parse-integer (match-string index token))
+                               token))
+                      (range (nth slot parse-date--slot-ranges)))
+                  (unless (equal (nth slot time)
+                                 (if (= slot 7) -1 nil))
+                    (signal 'date-parse-error
+                              (list "Duplicate slot value"
+                                    (nth slot parse-date--slot-names) token)))
+                  (when (and range
+                             (not (<= (car range) value (cadr range))))
+                    (signal 'date-parse-error
+                            (list "Slot out of range"
+                                         (nth slot parse-date--slot-names)
+                                         token (car range) (cadr range))))
+                  (setf (nth slot time) value))))
+      (while tokens
+        (let ((token (pop tokens))
+              (match nil))
+          (cond ((numberp token)
+                  ;; A bare number could be a month, day, or year.
+                  ;; The order of these tests matters greatly.
+                  (cond ((>= token 1000)
+                          (set-matched-slot 5 nil token))
+                        ((and (<= 1 token 31)
+                              (not (nth 3 time)))
+                          ;; Assume days come before months or years.
+                          (set-matched-slot 3 nil token))
+                        ((and (<= 1 token 12)
+                              (not (nth 4 time)))
+                          ;; Assume days come before years.
+                          (set-matched-slot 4 nil token))
+                        ((or (nth 5 time)
+                             (not two-digit-year?)
+                             (> token 100))
+                          (signal 'date-parse-error
+                                  (list "Unrecognized token" token)))
+                        ;; It's a two-digit year.
+                        ((>= token 50)
+                          ;; second half of the 20th century.
+                          (set-matched-slot 5 nil (+ 1900 token)))
+                        (t
+                          ;; first half of the 21st century.
+                          (set-matched-slot 5 nil (+ 2000 token)))))
+                ((setq match (assoc token parse-time-weekdays))
+                  (set-matched-slot 6 nil (cdr match)))
+                ((setq match (assoc token parse-time-months))
+                  (set-matched-slot 4 nil (cdr match)))
+                ((setq match (assoc token parse-time-zoneinfo))
+                  (set-matched-slot 8 nil (cadr match))
+                  (set-matched-slot 7 nil (caddr match)))
+                ((string-match "^[-+][0-9][0-9][0-9][0-9]$" token)
+                  ;; Numeric time zone.
+                  (set-matched-slot
+                    8 nil
+                    (* 60
+                       (+ (cl-parse-integer token :start 3 :end 5)
+                          (* 60 (cl-parse-integer token :start 1 :end 3)))
+                       (if (= (aref token 0) ?-) -1 1))))
+                ((string-match
+                  
"^\\([0-9][0-9][0-9][0-9]\\)[-/]\\([0-9][0-9]?\\)[-/]\\([0-9][0-9]?\\)$"
+                  token)
+                  ;; ISO-8601-style date (YYYY-MM-DD).
+                  (set-matched-slot 5 1 token)
+                  (set-matched-slot 4 2 token)
+                  (set-matched-slot 3 3 token))
+                ((string-match
+                  
"^\\([0-9][0-9]?\\)[-/]\\([0-9][0-9]?\\)[-/]\\([0-9][0-9][0-9][0-9]\\)$"
+                  token)
+                  ;; US date (MM-DD-YYYY), but we insist on four
+                  ;; digits for the year.
+                  (set-matched-slot 4 1 token)
+                  (set-matched-slot 3 2 token)
+                  (set-matched-slot 5 3 token))
+                ((string-match
+                  "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$"
+                  token)
+                  (set-matched-slot 2 1 token)
+                  (set-matched-slot 1 2 token)
+                  (set-matched-slot 0 3 token))
+                ((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" token)
+                  ;; Time without seconds.
+                  (set-matched-slot 2 1 token)
+                  (set-matched-slot 1 2 token)
+                  (set-matched-slot 0 nil 0))
+                ((member token '("am" "pm"))
+                  (unless (nth 2 time)
+                    (signal 'date-parse-error
+                            (list "Missing time" token)))
+                  (unless (<= (nth 2 time) 12)
+                    (signal 'date-parse-error
+                            (list "Time already past noon" token)))
+                  (when (equal token "pm")
+                    (cl-incf (nth 2 time) 12)))
+                (t
+                  (signal 'date-parse-error
+                          (list "Unrecognized token" token)))))))
+    time))
+
+;;;###autoload
+(cl-defgeneric parse-date (time-string &optional format)
+  "Parse TIME-STRING according to FORMAT, returning a list.
+The FORMAT value is a symbol that may be one of the following:
+
+   iso-8601 => parse the string according to the ISO-8601
+standard.  See `parse-iso8601-time-string'.
+
+   rfc822 => parse an RFC822 (old email) date, which allows
+two-digit years and internal '()' comments.  In dates of the form
+'11 Jan 12', the 11 is assumed to be the day, and the 12 is
+assumed to mean 2012.  Be sure you really want this; the format
+is more limited than most human-supplied dates.
+
+   rfc2822 => parse an RFC2822 (new email) date, which allows
+only four-digit years.  Again, this is a fairly restricted
+format, with fields required to be in a specified order and
+representation.
+
+   us-date => parse a US-style date, of the form MM/DD/YYYY, but
+allowing two-digit years.  In dates of the form '01/11/12', the 1
+is the month, 11 is the day, and the 12 is assumed to mean 2012.
+
+   nil => like us-date with two-digit years disallowed.
+
+Anything else is treated as iso-8601 if it looks similar, else
+us-date with two-digit years disallowed.
+
+   * For all formats except iso-8601, parsing is case-insensitive.
+
+   * Commas and whitespace are ignored.
+
+   * In date specifications, either '/' or '-' may be used to
+separate components, but all three components must be given.
+
+   * A date that starts with four digits is YYYY-MM-DD, ISO-8601
+style, but a date that ends with four digits is MM-DD-YYYY [at
+least in us-date format].
+
+   * Two digit years, when allowed, are in the 1900's when
+between 50 and 99 inclusive and in the 2000's when between 0 and
+49 inclusive.
+
+A `date-parse-error' is signalled when time values are duplicated,
+unrecognized, or out of range.  No consistency checks between
+fields are done.  For instance, the weekday is not checked to see
+that it corresponds to the date, and parse-date complains about
+the 32nd of March (or any other month) but blithely accepts the
+29th of February in non-leap years -- or the 31st of February in
+any year.
+
+The result is a list of (SEC MIN HOUR DAY MON YEAR DOW DST TZ),
+which can be accessed as a decoded-time defstruct (q.v.),
+e.g. `decoded-time-year' to extract the year, and turned into an
+Emacs timestamp by `encode-time'.  The values returned are
+identical to those of `decode-time', but any unknown values other
+than DST are returned as nil, and an unknown DST value is
+returned as -1.")
+
+(cl-defmethod parse-date (time-string (_format (eql iso-8601)))
+  (iso8601-parse time-string))
+
+(cl-defmethod parse-date (time-string (_format (eql rfc2822)))
+  (parse-date--x822 time-string nil))
+
+(cl-defmethod parse-date (time-string (_format (eql rfc822)))
+  (parse-date--x822 time-string t))
+
+(cl-defmethod parse-date (time-string (_format (eql us-date)))
+  (parse-date--default time-string t))
+
+(cl-defmethod parse-date (time-string (_format (eql nil)))
+  (parse-date--default time-string nil))
+
+(cl-defmethod parse-date (time-string _format)
+  ;; Re-dispatch after guessing the format.
+  (parse-date time-string (parse-date--guess-format time-string)))
+
+(provide 'parse-date)
+
+;;; parse-date.el ends here
diff --git a/test/lisp/calendar/parse-date-tests.el 
b/test/lisp/calendar/parse-date-tests.el
new file mode 100644
index 0000000000..bd2b344d71
--- /dev/null
+++ b/test/lisp/calendar/parse-date-tests.el
@@ -0,0 +1,247 @@
+;;; parse-date-tests.el --- Test suite for parse-date.el  -*- 
lexical-binding:t -*-
+
+;; Copyright (C) 2016-2021 Free Software Foundation, Inc.
+
+;; Author: Lars Ingebrigtsen <larsi@gnus.org>
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(require 'ert)
+(require 'parse-date)
+
+(ert-deftest parse-date-tests ()
+  "Test basic parse-date functionality."
+
+  ;; Test tokenization.
+  (should (equal (parse-date--tokenize-string " ") '()))
+  (should (equal (parse-date--tokenize-string " a b") '("a" "b")))
+  (should (equal (parse-date--tokenize-string "a bbc dde") '("a" "bbc" "dde")))
+  (should (equal (parse-date--tokenize-string " , a 27 b,, c 14:32 ")
+                 '("a" 27 "b" "c" "14:32")))
+  ;; Some folding whitespace tests.
+  (should (equal (parse-date--tokenize-string " a b (end) c" 'first)
+                 '("a" "b")))
+  (should (equal (parse-date--tokenize-string "(quux)a (foo (bar)) b(baz)" t)
+                 '("a" "b")))
+  (should (equal (parse-date--tokenize-string "a b\\cde" 'all)
+                 ;; Strictly incorrect, but strictly unnecessary syntax.
+                 '("a" "b\\cde")))
+  (should (equal (parse-date--tokenize-string "a b\\ de" 'all)
+                 '("a" "b\\ de")))
+  (should (equal (parse-date--tokenize-string "a \\de \\(f" 'all)
+                 '("a" "\\de" "\\(f")))
+
+  ;; Start with some compatible RFC822 dates.
+  (dolist (format '(nil rfc822 rfc2822))
+    (should (equal (parse-date "Mon, 22 Feb 2016 19:35:42 +0100" format)
+                   '(42 35 19 22 2 2016 1 -1 3600)))
+    (should (equal (parse-date "22 Feb 2016 19:35:42 +0100" format)
+                   '(42 35 19 22 2 2016 nil -1 3600)))
+    (should (equal (parse-date "Mon, 22 February 2016 19:35:42 +0100" format)
+                   '(42 35 19 22 2 2016 1 -1 3600)))
+    (should (equal (parse-date "Mon, 22 feb 2016 19:35:42 +0100" format)
+                   '(42 35 19 22 2 2016 1 -1 3600)))
+    (should (equal (parse-date "Monday, 22 february 2016 19:35:42 +0100" 
format)
+                   '(42 35 19 22 2 2016 1 -1 3600)))
+    (should (equal (parse-date "Monday, 22 february 2016 19:35:42 PST" format)
+                   '(42 35 19 22 2 2016 1 nil -28800)))
+    (should (equal (parse-date "Friday, 21 Sep 2018 13:47:58 PDT" format)
+                   '(58 47 13 21 9 2018 5 t -25200)))
+    (should (equal (parse-date "Friday, 21 Sep 2018 13:47:58" format)
+                   '(58 47 13 21 9 2018 5 -1 nil))))
+  ;; These are not allowed by the default format.
+  (should (equal (parse-date "22 Feb 16 19:35:42 +0100" 'rfc822)
+                 '(42 35 19 22 2 2016 nil -1 3600)))
+  (should (equal (parse-date "22 Feb 96 19:35:42 +0100" 'rfc822)
+                 '(42 35 19 22 2 1996 nil -1 3600)))
+  ;; Try them again with comments.
+  (should (equal (parse-date "22 Feb (today) 16 19:35:42 +0100" 'rfc822)
+                 '(42 35 19 22 2 2016 nil -1 3600)))
+  (should (equal (parse-date "22 Feb 96 (long ago) 19:35:42 +0100" 'rfc822)
+                 '(42 35 19 22 2 1996 nil -1 3600)))
+  (should (equal (parse-date
+                  "Friday, 21 Sep(comment \\) with \\( parens)18 19:35:42"
+                  'rfc822)
+                 '(42 35 19 21 9 2018 5 -1 nil)))
+  (should (equal (parse-date
+                  "Friday, 21 Sep 18 19:35:42 (unterminated comment"
+                  'rfc822)
+                 '(42 35 19 21 9 2018 5 -1 nil)))
+
+  ;; Test some RFC822 error cases
+  (dolist (test '(("33 1 2022" ("Slot out of range" day 33 1 31))
+                  ("0 1 2022" ("Slot out of range" day 0 1 31))
+                  ("1 1 2020 2021" ("Expected an alphabetic month" 1))
+                  ("1 Jan 2020 2021" ("Expected a time" 2021))
+                  ("1 Jan 2020 20:21 2000" ("Expected a timezone" 2000))
+                  ("1 Jan 2020 20:21 +0200 33" ("Extra token(s)" 33))))
+    (should (equal (condition-case err (parse-date (car test) 'rfc822)
+                     (date-parse-error (cdr err)))
+                   (cadr test))))
+
+  ;; And these are not allowed by rfc822 because of missing time.
+  (should (equal (parse-date "Friday, 21 Sep 2018" nil)
+                 '(nil nil nil 21 9 2018 5 -1 nil)))
+  (should (equal (parse-date "22 Feb 2016 +0100" nil)
+                 '(nil nil nil 22 2 2016 nil -1 3600)))
+
+  ;; Test the default format with both hyphens and slashes in dates.
+  (dolist (case '(;; Month can be numeric if date uses hyphens/slashes.
+                  ("Friday, 2018-09-21" (nil nil nil 21 9 2018 5 -1 nil))
+                  ;; Year can come last if four digits.
+                  ("Friday, 9-21-2018" (nil nil nil 21 9 2018 5 -1 nil))
+                  ;; Day of week is optional
+                  ("2018-09-21" (nil nil nil 21 9 2018 nil -1 nil))
+                  ;; The order of date, time, etc., does not matter.
+                  ("13:47:58, +0100, 2018-09-21, Friday"
+                   (58 47 13 21 9 2018 5 -1 3600))
+                  ;; Month, day, or both, can be a single digit.
+                  ("Friday, 2018-9-08" (nil nil nil 8 9 2018 5 -1 nil))
+                  ("Friday, 2018-09-8" (nil nil nil 8 9 2018 5 -1 nil))
+                  ("Friday, 2018-9-8" (nil nil nil 8 9 2018 5 -1 nil))))
+    (let ((string (car case))
+          (expected (cadr case)))
+      ;; Test with hyphens.
+      (should (equal (parse-date string nil) expected))
+      (while (string-match "-" string)
+        (setq string (replace-match "/" t t string)))
+      ;; Test with slashes.
+      (should (equal (parse-date string nil) expected))))
+
+  ;; Time by itself is recognized as such.
+  (should (equal (parse-date "03:47:58" nil)
+                 '(58 47 3 nil nil nil nil -1 nil)))
+  ;; A leading zero for hours is optional.
+  (should (equal (parse-date "3:47:58" nil)
+                 '(58 47 3 nil nil nil nil -1 nil)))
+  ;; Missing seconds are assumed to be zero.
+  (should (equal (parse-date "3:47" nil)
+                 '(0 47 3 nil nil nil nil -1 nil)))
+  ;; AM/PM are understood (in any case combination).
+  (dolist (am '(am AM Am))
+    (should (equal (parse-date (format "3:47 %s" am) nil)
+                   '(0 47 3 nil nil nil nil -1 nil))))
+  (dolist (pm '(pm PM Pm))
+    (should (equal (parse-date (format "3:47 %s" pm) nil)
+                   '(0 47 15 nil nil nil nil -1 nil))))
+
+  ;; Ensure some cases fail.
+  (should-error (parse-date "22 Feb 196" 'us-date))
+  (should-error (parse-date "22 Feb 16 19:35:42" nil))
+  (should-error (parse-date "22 Feb 96 19:35:42" nil)) ;; two-digit year
+  (should-error (parse-date "2 Feb 2021 1996" nil))    ;; duplicate year
+
+  (dolist (test '(("22 Feb 196" 'us-date       ;; bad year
+                   ("Unrecognized token" 196))
+                  ("22 Feb 16 19:35:42" nil    ;; two-digit year
+                   ("Unrecognized token" 16))
+                  ("22 Feb 96 19:35:42" nil    ;; two-digit year
+                   ("Unrecognized token" 96))
+                  ("2 Feb 2021 1996" nil
+                   ("Duplicate slot value" year 1996))
+                  ("2020-1-1 2021" nil
+                   ("Duplicate slot value" year 2021))
+                  ("22 Feb 196" 'us-date
+                   ("Unrecognized token" 196))
+                  ("22 Feb 16 19:35:42" nil
+                   ("Unrecognized token" 16))
+                  ("22 Feb 96 19:35:42" nil
+                   ("Unrecognized token" 96))
+                  ("2 Feb 2021 1996" nil
+                   ("Duplicate slot value" year 1996))
+                  ("2020-1-1 30" nil
+                   ("Unrecognized token" 30))
+                  ("2020-1-1 12" nil
+                   ("Unrecognized token" 12))
+                  ("15:47 15:15" nil
+                   ("Duplicate slot value" hour "15:15"))
+                  ("2020-1-1 +0800 -0800" t
+                   ("Duplicate slot value" zone -28800))
+                  ("15:47 PM" nil
+                   ("Time already past noon" "pm"))
+                  ("15:47 AM" nil
+                   ("Time already past noon" "am"))
+                  ("2020-1-1 PM" nil
+                   ("Missing time" "pm"))
+                  ;; Range tests
+                  ("2021-12-32" nil
+                   ("Slot out of range" day "2021-12-32" 1 31))
+                  ("2021-12-0" nil
+                   ("Slot out of range" day "2021-12-0" 1 31))
+                  ("2021-13-3" nil
+                   ("Slot out of range" month "2021-13-3" 1 12))
+                  ("0000-12-3" nil
+                   ("Slot out of range" year "0000-12-3" 1 9999))
+                  ("20021 Dec 3" nil
+                   ("Slot out of range" year 20021 1 9999))
+                  ("24:21:14" nil
+                   ("Slot out of range" hour "24:21:14" 0 23))
+                  ("14:60:21" nil
+                   ("Slot out of range" minute "14:60:21" 0 59))
+                  ("14:21:61" nil
+                   ("Slot out of range" second "14:21:61" 0 60))))
+    (should (equal (condition-case err (parse-date (car test) (cadr test))
+                     (date-parse-error (cdr err)))
+                   (caddr test))))
+  (should (equal (parse-date "14:21:60" nil)   ;; a leap second!
+                 '(60 21 14 nil nil nil nil -1 nil)))
+
+  ;; Test ISO-8601 dates.
+  (dolist (format '(t iso-8601))
+    (should (equal (parse-date "1998-09-12T12:21:54-0200" format)
+                   '(54 21 12 12 9 1998 nil nil -7200)))
+    (should (equal (format-time-string
+                    "%Y-%m-%d %H:%M:%S"
+                    (encode-time
+                      (parse-date "1998-09-12T12:21:54-0230" format))
+                    t)
+                   "1998-09-12 14:51:54"))
+    (should (equal (format-time-string
+                    "%Y-%m-%d %H:%M:%S"
+                    (encode-time
+                      (parse-date "1998-09-12T12:21:54-02:00" format))
+                    t)
+                   "1998-09-12 14:21:54"))
+    (should (equal (format-time-string
+                    "%Y-%m-%d %H:%M:%S"
+                    (encode-time
+                      (parse-date "1998-09-12T12:21:54-02" format))
+                    t)
+                   "1998-09-12 14:21:54"))
+    (should (equal (format-time-string
+                    "%Y-%m-%d %H:%M:%S"
+                    (encode-time
+                      (parse-date "1998-09-12T12:21:54+0230" format))
+                    t)
+                   "1998-09-12 09:51:54"))
+    (should (equal (format-time-string
+                    "%Y-%m-%d %H:%M:%S"
+                    (encode-time
+                     (parse-date "1998-09-12T12:21:54+02" format))
+                    t)
+                   "1998-09-12 10:21:54"))
+    (should (equal (parse-date "1998-09-12T12:21:54Z" t)
+                   '(54 21 12 12 9 1998 nil nil 0)))
+    (should (equal (parse-date "1998-09-12T12:21:54" format)
+                   '(54 21 12 12 9 1998 nil -1 nil)))))
+
+(provide 'parse-date-tests)
+
+;;; parse-date-tests.el ends here

reply via email to

[Prev in Thread] Current Thread [Next in Thread]