emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] /srv/bzr/emacs/trunk r108807: * xml.el: Implement XML para


From: Chong Yidong
Subject: [Emacs-diffs] /srv/bzr/emacs/trunk r108807: * xml.el: Implement XML parameter entities.
Date: Sat, 30 Jun 2012 19:33:22 +0800
User-agent: Bazaar (2.5.0)

------------------------------------------------------------
revno: 108807
committer: Chong Yidong <address@hidden>
branch nick: trunk
timestamp: Sat 2012-06-30 19:33:22 +0800
message:
  * xml.el: Implement XML parameter entities.
  (xml-parameter-entity-alist): New variable.
  (xml-parse-region, xml-parse-fragment): Preserve previous values
  of xml-entity-alist and xml-parameter-entity-alist, so that
  repeated calls on different documents do not change them.
  (xml-parse-tag): Fix doctype regexp.
  (xml--entity-replacement-text): New function.
  (xml-parse-dtd): Use it.  Don't handle system entities; doing that
  properly requires url retrieval which is unimplemented.
  (xml-escape-string): Doc fix.
modified:
  lisp/ChangeLog
  lisp/xml.el
=== modified file 'lisp/ChangeLog'
--- a/lisp/ChangeLog    2012-06-30 02:00:08 +0000
+++ b/lisp/ChangeLog    2012-06-30 11:33:22 +0000
@@ -1,3 +1,16 @@
+2012-06-30  Chong Yidong  <address@hidden>
+
+       * xml.el: Implement XML parameter entities.
+       (xml-parameter-entity-alist): New variable.
+       (xml-parse-region, xml-parse-fragment): Preserve previous values
+       of xml-entity-alist and xml-parameter-entity-alist, so that
+       repeated calls on different documents do not change them.
+       (xml-parse-tag): Fix doctype regexp.
+       (xml--entity-replacement-text): New function.
+       (xml-parse-dtd): Use it.  Don't handle system entities; doing that
+       properly requires url retrieval which is unimplemented.
+       (xml-escape-string): Doc fix.
+
 2012-06-30  Stefan Monnier  <address@hidden>
 
        * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2.

=== modified file 'lisp/xml.el'
--- a/lisp/xml.el       2012-06-24 15:06:24 +0000
+++ b/lisp/xml.el       2012-06-30 11:33:22 +0000
@@ -95,10 +95,13 @@
     ("apos" . "'")
     ("quot" . "\"")
     ("amp"  . "&"))
-  "The defined entities.  Entities are added to this when the DTD is parsed.")
+  "Alist of defined XML entities.")
+
+(defvar xml-parameter-entity-alist nil
+  "Alist of defined XML parametric entities.")
 
 (defvar xml-sub-parser nil
-  "Dynamically set this to a non-nil value if you want to parse an XML 
fragment.")
+  "Non-nil when the XML parser is parsing an XML fragment.")
 
 (defvar xml-validating-parser nil
   "Set to non-nil to get validity checking.")
@@ -308,6 +311,9 @@
   ;; specs DTRT.
   (with-syntax-table (standard-syntax-table)
     (let ((case-fold-search nil)       ; XML is case-sensitive.
+         ;; Prevent entity definitions from changing the defaults
+         (xml-entity-alist xml-entity-alist)
+         (xml-parameter-entity-alist xml-parameter-entity-alist)
          xml result dtd)
       (save-excursion
        (if buffer
@@ -366,6 +372,9 @@
 (defun xml-parse-fragment (&optional parse-dtd parse-ns)
   "Parse xml-like fragments."
   (let ((xml-sub-parser t)
+       ;; Prevent entity definitions from changing the defaults
+       (xml-entity-alist xml-entity-alist)
+       (xml-parameter-entity-alist xml-parameter-entity-alist)
        children)
     (while (not (eobp))
       (let ((bit (xml-parse-tag
@@ -413,7 +422,7 @@
         (buffer-substring-no-properties pos (match-beginning 0))
         (xml-parse-string))))
      ;;  DTD for the document
-     ((looking-at "<!DOCTYPE")
+     ((looking-at "<!DOCTYPE[ \t\n\r]")
       (let ((dtd (xml-parse-dtd parse-ns)))
        (skip-syntax-forward " ")
        (if xml-validating-parser
@@ -580,11 +589,11 @@
   ;;  Get the name of the document
   (looking-at xml-name-regexp)
   (let ((dtd (list (match-string-no-properties 0) 'dtd))
-       type element end-pos)
+       (xml-parameter-entity-alist xml-parameter-entity-alist))
     (goto-char (match-end 0))
+    (skip-syntax-forward " ")
 
-    (skip-syntax-forward " ")
-    ;; XML [75]
+    ;; External subset (XML [75])
     (cond ((looking-at "PUBLIC\\s-+")
           (goto-char (match-end 0))
           (unless (or (re-search-forward
@@ -607,119 +616,137 @@
             (error "XML: Missing System ID"))
           (push (list (match-string-no-properties 1) 'system) dtd)))
     (skip-syntax-forward " ")
-    (if (eq ?> (char-after))
-       (forward-char)
-      (if (not (eq (char-after) ?\[))
-         (error "XML: Bad DTD")
-       (forward-char)
-       ;;  Parse the rest of the DTD
-       ;;  Fixme: Deal with NOTATION, PIs.
-       (while (not (looking-at "\\s-*\\]"))
-         (skip-syntax-forward " ")
-         (cond
-
-          ;;  Translation of rule [45] of XML specifications
-          ((looking-at
-            "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
-
-           (setq element (match-string-no-properties 1)
-                 type    (match-string-no-properties 2))
-           (setq end-pos (match-end 0))
-
-           ;;  Translation of rule [46] of XML specifications
+
+    (if (eq (char-after) ?>)
+
+       ;; No internal subset
+       (forward-char)
+
+      ;; Internal subset (XML [28b])
+      (unless (eq (char-after) ?\[)
+       (error "XML: Bad DTD"))
+      (forward-char)
+
+      ;; Parse the rest of the DTD
+      ;; Fixme: Deal with NOTATION, PIs.
+      (while (not (looking-at "\\s-*\\]"))
+       (skip-syntax-forward " ")
+       (cond
+        ;; Element declaration [45]:
+        ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+         (let ((element (match-string-no-properties 1))
+               (type    (match-string-no-properties 2))
+               (end-pos (match-end 0)))
+           ;; Translation of rule [46] of XML specifications
            (cond
-            ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
+            ((string-match "^EMPTY[ \t\n\r]*$" type)       ; empty declaration
              (setq type 'empty))
-            ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
+            ((string-match "^ANY[ \t\n\r]*$" type)         ; any type of 
contents
              (setq type 'any))
-            ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
+            ((string-match "^(\\(.*\\))[ \t\n\r]*$" type)  ; children ([47])
              (setq type (xml-parse-elem-type (match-string-no-properties 1 
type))))
-            ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
+            ((string-match "^%[^;]+;[ \t\n\r]*$" type)   ; substitution
              nil)
-            (t
-             (if xml-validating-parser
-                 (error "XML: (Validity) Invalid element type in the DTD"))))
+            (xml-validating-parser
+             (error "XML: (Validity) Invalid element type in the DTD")))
 
-           ;;  rule [45]: the element declaration must be unique
-           (if (and (assoc element dtd)
-                    xml-validating-parser)
-               (error "XML: (Validity) Element declarations must be unique in 
a DTD (<%s>)"
-                      element))
+           ;; rule [45]: the element declaration must be unique
+           (and (assoc element dtd)
+                xml-validating-parser
+                (error "XML: (Validity) DTD element declarations must be 
unique (<%s>)"
+                       element))
 
            ;;  Store the element in the DTD
            (push (list element type) dtd)
-           (goto-char end-pos))
-
-          ;; Translation of rule [52] of XML specifications
-          ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
-                               "\\)[ \t\n\r]*\\(" xml-att-def-re
-                               "\\)*[ \t\n\r]*>"))
-
-           ;; We don't do anything with ATTLIST currently
-           (goto-char (match-end 0)))
-
-          ((looking-at "<!--")
-           (search-forward "-->"))
-          ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
-                               "\\)[ \t\n\r]*\\(" xml-entity-value-re
-                               "\\)[ \t\n\r]*>"))
-           (let ((name  (match-string-no-properties 1))
-                 (value (substring (match-string-no-properties 2) 1
-                                   (- (length (match-string-no-properties 2)) 
1))))
-             (goto-char (match-end 0))
-             (setq xml-entity-alist
-                   (append xml-entity-alist
-                           (list (cons name
-                                       (with-temp-buffer
-                                         (insert value)
-                                         (goto-char (point-min))
-                                         (xml-parse-fragment
-                                          xml-validating-parser
-                                          parse-ns))))))))
-          ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
-                                   "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
-                                   "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
-               (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
-                                   "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
-                                   "\"[- 
\r\na-zA-Z0-9'()+,./:=?;address@hidden""
-                                   "\\|'[- 
\r\na-zA-Z0-9()+,./:=?;address@hidden'"
-                                   "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
-                                   "[ \t\n\r]*>")))
-           (let ((name  (match-string-no-properties 1))
-                 (file  (substring (match-string-no-properties 2) 1
-                                   (- (length (match-string-no-properties 2)) 
1))))
-             (goto-char (match-end 0))
-             (setq xml-entity-alist
-                   (append xml-entity-alist
-                           (list (cons name (with-temp-buffer
-                                              (insert-file-contents file)
-                                              (goto-char (point-min))
-                                              (xml-parse-fragment
-                                               xml-validating-parser
-                                               parse-ns))))))))
-          ;; skip parameter entity declarations
-          ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" 
xml-name-re
-                                   "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
-                                   "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
-               (looking-at (concat "<!ENTITY[ \t\n\r]+"
-                                   "%[ \t\n\r]+"
-                                   "\\(" xml-name-re "\\)[ \t\n\r]+"
-                                   "PUBLIC[ \t\n\r]+"
-                                   "\\(\"[- 
\r\na-zA-Z0-9'()+,./:=?;address@hidden""
-                                   "\\|'[- 
\r\na-zA-Z0-9()+,./:=?;address@hidden'\\)[ \t\n\r]+"
-                                   "\\(\"[^\"]+\"\\|'[^']+'\\)"
-                                   "[ \t\n\r]*>")))
-           (goto-char (match-end 0)))
-          ;; skip parameter entities
-          ((looking-at (concat "%" xml-name-re ";"))
-           (goto-char (match-end 0)))
-          (t
-           (when xml-validating-parser
-             (error "XML: (Validity) Invalid DTD item"))))))
+           (goto-char end-pos)))
+
+        ;; Attribute-list declaration [52] (currently unsupported):
+        ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
+                             "\\)[ \t\n\r]*\\(" xml-att-def-re
+                             "\\)*[ \t\n\r]*>"))
+         (goto-char (match-end 0)))
+
+        ;; Comments (skip to end):
+        ((looking-at "<!--")
+         (search-forward "-->"))
+
+        ;; Internal entity declarations:
+        ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                             xml-name-re "\\)[ \t\n\r]*\\("
+                             xml-entity-value-re "\\)[ \t\n\r]*>"))
+         (let* ((name (prog1 (match-string-no-properties 2)
+                        (goto-char (match-end 0))))
+                (alist (if (match-string 1)
+                           'xml-parameter-entity-alist
+                         'xml-entity-alist))
+                ;; Retrieve the deplacement text:
+                (value (xml--entity-replacement-text
+                        ;; Entity value, sans quotation marks:
+                        (substring (match-string-no-properties 3) 1 -1))))
+           ;; If the same entity is declared more than once, the
+           ;; first declaration is binding.
+           (unless (assoc name (symbol-value alist))
+             (set alist (cons (cons name value) (symbol-value alist))))))
+
+        ;; External entity declarations (currently unsupported):
+        ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                 xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
+                                 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
+             (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                 xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
+                                 "\"[- \r\na-zA-Z0-9'()+,./:=?;address@hidden""
+                                 "\\|'[- 
\r\na-zA-Z0-9()+,./:=?;address@hidden'"
+                                 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
+                                 "[ \t\n\r]*>")))
+         (goto-char (match-end 0)))
+
+        ;; Parameter entity:
+        ((looking-at (concat "%\\(" xml-name-re "\\);"))
+         (goto-char (match-end 0))
+         (let* ((entity (match-string 1))
+                (end (point-marker))
+                (elt (assoc entity xml-parameter-entity-alist)))
+           (when elt
+             (replace-match (cdr elt) t t)
+             (goto-char end))))
+
+        ;; Anything else:
+        (xml-validating-parser
+         (error "XML: (Validity) Invalid DTD item"))))
+
       (if (looking-at "\\s-*]>")
          (goto-char (match-end 0))))
     (nreverse dtd)))
 
+(defun xml--entity-replacement-text (string)
+  "Return the replacement text for the entity value STRING.
+The replacement text is obtained by replacing character
+references and parameter-entity references."
+  (let ((ref-re (eval-when-compile
+                 (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\("
+                         xml-name-re "\\)\\);")))
+       children)
+    (while (string-match ref-re string)
+      (push (substring string 0 (match-beginning 0)) children)
+      (let ((remainder (substring string (match-end 0)))
+           ref val)
+       (cond ((setq ref (match-string 1 string))
+              ;; Decimal character reference
+              (setq val (decode-char 'ucs (string-to-number ref)))
+              (if val (push (string val) children)))
+             ;; Hexadecimal character reference
+             ((setq ref (match-string 2 string))
+              (setq val (decode-char 'ucs (string-to-number ref 16)))
+              (if val (push (string val) children)))
+             ;; Parameter entity reference
+             ((setq ref (match-string 3 string))
+              (setq val (assoc ref xml-parameter-entity-alist))
+              (if val
+                  (push (cdr val) children)
+                (push (concat "%" ref ";") children))))
+       (setq string remainder)))
+    (mapconcat 'identity (nreverse (cons string children)) "")))
+
 (defun xml-parse-elem-type (string)
   "Convert element type STRING into a Lisp structure."
 
@@ -864,15 +891,12 @@
 (defalias 'xml-print 'xml-debug-print)
 
 (defun xml-escape-string (string)
-  "Return the string with entity substitutions made from
-xml-entity-alist."
+  "Return STRING with entity substitutions made from `xml-entity-alist'."
   (mapconcat (lambda (byte)
                (let ((char (char-to-string byte)))
                  (if (rassoc char xml-entity-alist)
                      (concat "&" (car (rassoc char xml-entity-alist)) ";")
                    char)))
-             ;; This differs from the non-unicode branch.  Just
-             ;; grabbing the string works here.
              string ""))
 
 (defun xml-debug-print-internal (xml indent-string)


reply via email to

[Prev in Thread] Current Thread [Next in Thread]