[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] /srv/bzr/emacs/trunk r108807: * xml.el: Implement XML para
From: |
Chong Yidong |
Subject: |
[Emacs-diffs] /srv/bzr/emacs/trunk r108807: * xml.el: Implement XML parameter entities. |
Date: |
Sat, 30 Jun 2012 19:33:22 +0800 |
User-agent: |
Bazaar (2.5.0) |
------------------------------------------------------------
revno: 108807
committer: Chong Yidong <address@hidden>
branch nick: trunk
timestamp: Sat 2012-06-30 19:33:22 +0800
message:
* xml.el: Implement XML parameter entities.
(xml-parameter-entity-alist): New variable.
(xml-parse-region, xml-parse-fragment): Preserve previous values
of xml-entity-alist and xml-parameter-entity-alist, so that
repeated calls on different documents do not change them.
(xml-parse-tag): Fix doctype regexp.
(xml--entity-replacement-text): New function.
(xml-parse-dtd): Use it. Don't handle system entities; doing that
properly requires url retrieval which is unimplemented.
(xml-escape-string): Doc fix.
modified:
lisp/ChangeLog
lisp/xml.el
=== modified file 'lisp/ChangeLog'
--- a/lisp/ChangeLog 2012-06-30 02:00:08 +0000
+++ b/lisp/ChangeLog 2012-06-30 11:33:22 +0000
@@ -1,3 +1,16 @@
+2012-06-30 Chong Yidong <address@hidden>
+
+ * xml.el: Implement XML parameter entities.
+ (xml-parameter-entity-alist): New variable.
+ (xml-parse-region, xml-parse-fragment): Preserve previous values
+ of xml-entity-alist and xml-parameter-entity-alist, so that
+ repeated calls on different documents do not change them.
+ (xml-parse-tag): Fix doctype regexp.
+ (xml--entity-replacement-text): New function.
+ (xml-parse-dtd): Use it. Don't handle system entities; doing that
+ properly requires url retrieval which is unimplemented.
+ (xml-escape-string): Doc fix.
+
2012-06-30 Stefan Monnier <address@hidden>
* emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2.
=== modified file 'lisp/xml.el'
--- a/lisp/xml.el 2012-06-24 15:06:24 +0000
+++ b/lisp/xml.el 2012-06-30 11:33:22 +0000
@@ -95,10 +95,13 @@
("apos" . "'")
("quot" . "\"")
("amp" . "&"))
- "The defined entities. Entities are added to this when the DTD is parsed.")
+ "Alist of defined XML entities.")
+
+(defvar xml-parameter-entity-alist nil
+ "Alist of defined XML parametric entities.")
(defvar xml-sub-parser nil
- "Dynamically set this to a non-nil value if you want to parse an XML
fragment.")
+ "Non-nil when the XML parser is parsing an XML fragment.")
(defvar xml-validating-parser nil
"Set to non-nil to get validity checking.")
@@ -308,6 +311,9 @@
;; specs DTRT.
(with-syntax-table (standard-syntax-table)
(let ((case-fold-search nil) ; XML is case-sensitive.
+ ;; Prevent entity definitions from changing the defaults
+ (xml-entity-alist xml-entity-alist)
+ (xml-parameter-entity-alist xml-parameter-entity-alist)
xml result dtd)
(save-excursion
(if buffer
@@ -366,6 +372,9 @@
(defun xml-parse-fragment (&optional parse-dtd parse-ns)
"Parse xml-like fragments."
(let ((xml-sub-parser t)
+ ;; Prevent entity definitions from changing the defaults
+ (xml-entity-alist xml-entity-alist)
+ (xml-parameter-entity-alist xml-parameter-entity-alist)
children)
(while (not (eobp))
(let ((bit (xml-parse-tag
@@ -413,7 +422,7 @@
(buffer-substring-no-properties pos (match-beginning 0))
(xml-parse-string))))
;; DTD for the document
- ((looking-at "<!DOCTYPE")
+ ((looking-at "<!DOCTYPE[ \t\n\r]")
(let ((dtd (xml-parse-dtd parse-ns)))
(skip-syntax-forward " ")
(if xml-validating-parser
@@ -580,11 +589,11 @@
;; Get the name of the document
(looking-at xml-name-regexp)
(let ((dtd (list (match-string-no-properties 0) 'dtd))
- type element end-pos)
+ (xml-parameter-entity-alist xml-parameter-entity-alist))
(goto-char (match-end 0))
+ (skip-syntax-forward " ")
- (skip-syntax-forward " ")
- ;; XML [75]
+ ;; External subset (XML [75])
(cond ((looking-at "PUBLIC\\s-+")
(goto-char (match-end 0))
(unless (or (re-search-forward
@@ -607,119 +616,137 @@
(error "XML: Missing System ID"))
(push (list (match-string-no-properties 1) 'system) dtd)))
(skip-syntax-forward " ")
- (if (eq ?> (char-after))
- (forward-char)
- (if (not (eq (char-after) ?\[))
- (error "XML: Bad DTD")
- (forward-char)
- ;; Parse the rest of the DTD
- ;; Fixme: Deal with NOTATION, PIs.
- (while (not (looking-at "\\s-*\\]"))
- (skip-syntax-forward " ")
- (cond
-
- ;; Translation of rule [45] of XML specifications
- ((looking-at
- "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
-
- (setq element (match-string-no-properties 1)
- type (match-string-no-properties 2))
- (setq end-pos (match-end 0))
-
- ;; Translation of rule [46] of XML specifications
+
+ (if (eq (char-after) ?>)
+
+ ;; No internal subset
+ (forward-char)
+
+ ;; Internal subset (XML [28b])
+ (unless (eq (char-after) ?\[)
+ (error "XML: Bad DTD"))
+ (forward-char)
+
+ ;; Parse the rest of the DTD
+ ;; Fixme: Deal with NOTATION, PIs.
+ (while (not (looking-at "\\s-*\\]"))
+ (skip-syntax-forward " ")
+ (cond
+ ;; Element declaration [45]:
+ ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+ (let ((element (match-string-no-properties 1))
+ (type (match-string-no-properties 2))
+ (end-pos (match-end 0)))
+ ;; Translation of rule [46] of XML specifications
(cond
- ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
+ ((string-match "^EMPTY[ \t\n\r]*$" type) ; empty declaration
(setq type 'empty))
- ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
+ ((string-match "^ANY[ \t\n\r]*$" type) ; any type of
contents
(setq type 'any))
- ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
+ ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1
type))))
- ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
+ ((string-match "^%[^;]+;[ \t\n\r]*$" type) ; substitution
nil)
- (t
- (if xml-validating-parser
- (error "XML: (Validity) Invalid element type in the DTD"))))
+ (xml-validating-parser
+ (error "XML: (Validity) Invalid element type in the DTD")))
- ;; rule [45]: the element declaration must be unique
- (if (and (assoc element dtd)
- xml-validating-parser)
- (error "XML: (Validity) Element declarations must be unique in
a DTD (<%s>)"
- element))
+ ;; rule [45]: the element declaration must be unique
+ (and (assoc element dtd)
+ xml-validating-parser
+ (error "XML: (Validity) DTD element declarations must be
unique (<%s>)"
+ element))
;; Store the element in the DTD
(push (list element type) dtd)
- (goto-char end-pos))
-
- ;; Translation of rule [52] of XML specifications
- ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
- "\\)[ \t\n\r]*\\(" xml-att-def-re
- "\\)*[ \t\n\r]*>"))
-
- ;; We don't do anything with ATTLIST currently
- (goto-char (match-end 0)))
-
- ((looking-at "<!--")
- (search-forward "-->"))
- ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
- "\\)[ \t\n\r]*\\(" xml-entity-value-re
- "\\)[ \t\n\r]*>"))
- (let ((name (match-string-no-properties 1))
- (value (substring (match-string-no-properties 2) 1
- (- (length (match-string-no-properties 2))
1))))
- (goto-char (match-end 0))
- (setq xml-entity-alist
- (append xml-entity-alist
- (list (cons name
- (with-temp-buffer
- (insert value)
- (goto-char (point-min))
- (xml-parse-fragment
- xml-validating-parser
- parse-ns))))))))
- ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
- "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
- "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
- (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
- "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
- "\"[-
\r\na-zA-Z0-9'()+,./:=?;address@hidden""
- "\\|'[-
\r\na-zA-Z0-9()+,./:=?;address@hidden'"
- "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
- "[ \t\n\r]*>")))
- (let ((name (match-string-no-properties 1))
- (file (substring (match-string-no-properties 2) 1
- (- (length (match-string-no-properties 2))
1))))
- (goto-char (match-end 0))
- (setq xml-entity-alist
- (append xml-entity-alist
- (list (cons name (with-temp-buffer
- (insert-file-contents file)
- (goto-char (point-min))
- (xml-parse-fragment
- xml-validating-parser
- parse-ns))))))))
- ;; skip parameter entity declarations
- ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\("
xml-name-re
- "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
- "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
- (looking-at (concat "<!ENTITY[ \t\n\r]+"
- "%[ \t\n\r]+"
- "\\(" xml-name-re "\\)[ \t\n\r]+"
- "PUBLIC[ \t\n\r]+"
- "\\(\"[-
\r\na-zA-Z0-9'()+,./:=?;address@hidden""
- "\\|'[-
\r\na-zA-Z0-9()+,./:=?;address@hidden'\\)[ \t\n\r]+"
- "\\(\"[^\"]+\"\\|'[^']+'\\)"
- "[ \t\n\r]*>")))
- (goto-char (match-end 0)))
- ;; skip parameter entities
- ((looking-at (concat "%" xml-name-re ";"))
- (goto-char (match-end 0)))
- (t
- (when xml-validating-parser
- (error "XML: (Validity) Invalid DTD item"))))))
+ (goto-char end-pos)))
+
+ ;; Attribute-list declaration [52] (currently unsupported):
+ ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
+ "\\)[ \t\n\r]*\\(" xml-att-def-re
+ "\\)*[ \t\n\r]*>"))
+ (goto-char (match-end 0)))
+
+ ;; Comments (skip to end):
+ ((looking-at "<!--")
+ (search-forward "-->"))
+
+ ;; Internal entity declarations:
+ ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+ xml-name-re "\\)[ \t\n\r]*\\("
+ xml-entity-value-re "\\)[ \t\n\r]*>"))
+ (let* ((name (prog1 (match-string-no-properties 2)
+ (goto-char (match-end 0))))
+ (alist (if (match-string 1)
+ 'xml-parameter-entity-alist
+ 'xml-entity-alist))
+ ;; Retrieve the deplacement text:
+ (value (xml--entity-replacement-text
+ ;; Entity value, sans quotation marks:
+ (substring (match-string-no-properties 3) 1 -1))))
+ ;; If the same entity is declared more than once, the
+ ;; first declaration is binding.
+ (unless (assoc name (symbol-value alist))
+ (set alist (cons (cons name value) (symbol-value alist))))))
+
+ ;; External entity declarations (currently unsupported):
+ ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+ xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
+ "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
+ (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+ xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
+ "\"[- \r\na-zA-Z0-9'()+,./:=?;address@hidden""
+ "\\|'[-
\r\na-zA-Z0-9()+,./:=?;address@hidden'"
+ "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
+ "[ \t\n\r]*>")))
+ (goto-char (match-end 0)))
+
+ ;; Parameter entity:
+ ((looking-at (concat "%\\(" xml-name-re "\\);"))
+ (goto-char (match-end 0))
+ (let* ((entity (match-string 1))
+ (end (point-marker))
+ (elt (assoc entity xml-parameter-entity-alist)))
+ (when elt
+ (replace-match (cdr elt) t t)
+ (goto-char end))))
+
+ ;; Anything else:
+ (xml-validating-parser
+ (error "XML: (Validity) Invalid DTD item"))))
+
(if (looking-at "\\s-*]>")
(goto-char (match-end 0))))
(nreverse dtd)))
+(defun xml--entity-replacement-text (string)
+ "Return the replacement text for the entity value STRING.
+The replacement text is obtained by replacing character
+references and parameter-entity references."
+ (let ((ref-re (eval-when-compile
+ (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\("
+ xml-name-re "\\)\\);")))
+ children)
+ (while (string-match ref-re string)
+ (push (substring string 0 (match-beginning 0)) children)
+ (let ((remainder (substring string (match-end 0)))
+ ref val)
+ (cond ((setq ref (match-string 1 string))
+ ;; Decimal character reference
+ (setq val (decode-char 'ucs (string-to-number ref)))
+ (if val (push (string val) children)))
+ ;; Hexadecimal character reference
+ ((setq ref (match-string 2 string))
+ (setq val (decode-char 'ucs (string-to-number ref 16)))
+ (if val (push (string val) children)))
+ ;; Parameter entity reference
+ ((setq ref (match-string 3 string))
+ (setq val (assoc ref xml-parameter-entity-alist))
+ (if val
+ (push (cdr val) children)
+ (push (concat "%" ref ";") children))))
+ (setq string remainder)))
+ (mapconcat 'identity (nreverse (cons string children)) "")))
+
(defun xml-parse-elem-type (string)
"Convert element type STRING into a Lisp structure."
@@ -864,15 +891,12 @@
(defalias 'xml-print 'xml-debug-print)
(defun xml-escape-string (string)
- "Return the string with entity substitutions made from
-xml-entity-alist."
+ "Return STRING with entity substitutions made from `xml-entity-alist'."
(mapconcat (lambda (byte)
(let ((char (char-to-string byte)))
(if (rassoc char xml-entity-alist)
(concat "&" (car (rassoc char xml-entity-alist)) ";")
char)))
- ;; This differs from the non-unicode branch. Just
- ;; grabbing the string works here.
string ""))
(defun xml-debug-print-internal (xml indent-string)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] /srv/bzr/emacs/trunk r108807: * xml.el: Implement XML parameter entities.,
Chong Yidong <=