;; 1: basic ;;
;;
A paragraph
_
_ ;; undo the changes: ;; ;; 8: an element that is sometimes empty, sometimes not ;; 8.1 ;;
first para
second para
fourth
this is important _ ;; ;; 9.2
text
) - Emits an error if the nesting of elements is wrong (regardless of `html-close-tag-enforce-xhtml') This can be turned off with `html-close-tag-ignore-nesting' (but use of this is discouraged; it rarely fixes the problem) - If a tag is not configured to be EMPTY (i.e.
,
to be empty and use it like
) - If it exits with an error, point is left on the spot where the error was found (with one exception, if \"brokenEndComment\" is the first token) - comment-end tokens with whitespace between \"--\" and '>' are not supported (allowed by HTML4: section 3.2.4), because they aren't supported in xhtml " (interactive) (let ((pos (point)) (comment-or-CDATAsec nil) ;; endComment | endCDATAsection (tag-name) (tag-type) ;; start tag | end tag | empty-element tag .. (tags-closed ()) ;; list of (names of) end-tags (done nil)) (if (html-close-tag-in-comment-or-cdata-section-p) (error "Can't close tag when in a comment or CDATA section")) (while (and (not done) (re-search-backward ;;\1=startComment;\2=endComment;\3=startCDATAsection ;;\4=endCDATAsection;\5=brokenEndComment;\6=tag (concat "\\(\\)\\|\\(\\)" ;; comment according to HTML4 spec (section 3.2.4) ;; => not supported "\\|\\(--[ \t\r\n]*>\\)" ;; tag ;; '<' may not be used unescaped in xml (I assume ;; that this is true for html/sgml as well) "\\|\\(?[A-Za-z_:]\\)") nil t)) (cond ((match-string 1) ;; start of comment (if (null comment-or-CDATAsec) (error "%s: Comment not terminated." (what-line))) (if (equal comment-or-CDATAsec 'endComment) (setq comment-or-CDATAsec nil))) ((match-string 2) ;; end of comment (if (null comment-or-CDATAsec) (setq comment-or-CDATAsec 'endComment) (if (equal comment-or-CDATAsec 'endComment) (error "%s: Nested comments not allowed" (what-line))))) ((match-string 3) ;; start of CDATA section (xhtml) (if (null comment-or-CDATAsec) (error "%s: CDATA section not terminated" (what-line))) (if (equal comment-or-CDATAsec 'endCDATAsection) (setq comment-or-CDATAsec nil))) ((match-string 4) ;; end of CDATA section (xhtml) (if (null comment-or-CDATAsec) (setq comment-or-CDATAsec 'endCDATAsection) (if (equal comment-or-CDATAsec 'endCDATAsection) (error "%s: Nested CDATA-sections not allowed." (what-line))))) ((match-string 5) ;; broken end-comment token, i.e. "-- >" (error (concat "%s: Don't use whitespace between \"--\" and '>' " "because it's not supported by xhtml.") (what-line))) ((match-string 6) ;; tag ;; ignore if in a comment or CDATA section (if (null comment-or-CDATAsec) ;; find tag-name and tag-type ;; TODO: maybe this can be done with a regular expression ? ;; (something like sgml-start-tag-regexp) (progn (let ((start-pos (point)) (quote-token nil)) ;; "'" | '"' | nil (forward-char) ;; skip '<' (if (char-equal (char-after) ?/) (progn ;; end-tag (setq tag-type 'end-tag) (forward-char) ;; skip '/' (setq tag-name (thing-at-point 'word)) (if (null tag-name) (error "[Internal error]: Couldn't find end-tag-name: %s" (what-line)))) ;; either start-tag or empty-element tag (progn (setq tag-name (thing-at-point 'word)) (if (null tag-name) (error "[Internal error]: Couldn't find tag-name for start-tag: %s" (what-line))) ;; exit once unquoted '/' or '>' is found: ;; in HTML, unquoted attribute values may only contain ;; [A-Za-z0-9-.] (section 3.2.2 of the html4 spec); ;; in xhtml all attribute values are quoted. (while (not (and (or (char-equal (char-after) ?/) (char-equal (char-after) ?>)) (null quote-token))) (if (and (char-equal (char-after) ?\") (not (char-equal (char-before) ?\\))) ;; unescaped ?\" (cond ((null quote-token) ;; start of quoted content (setq quote-token ?\")) ;; end of quoted content ((char-equal quote-token ?\") (setq quote-token nil)) ;; quote-token == ?\' => part of quoted content ;; => ignore )) (if (and (char-equal (char-after) ?\') (not (char-equal (char-before) ?\\))) ;; unescaped ?\' (cond ((null quote-token) ;; start of quoted content (setq quote-token ?\')) ;; end of quoted content ((char-equal quote-token ?\') (setq quote-token nil)) ;; quote-token == ?\" => part of quoted content ;; => ignore )) (forward-char)) (cond ((char-equal (char-after) ?/) (setq tag-type 'empty-element-tag)) ((html-close-tag-empty-html-tag-p tag-name) (setq tag-type 'html-empty-tag)) ((member-ignore-case tag-name html-close-tag-extra-empty-tags) (setq tag-type 'html-configured-to-be-empty-tag)) (t (setq tag-type 'start-tag))) )) (goto-char start-pos)) ;; it's a tag (outside comment and CDATA section) ! (cond ((equal tag-type 'end-tag) ;; end-tag (push tag-name tags-closed)) ((equal tag-type 'html-configured-to-be-empty-tag) ;; the pop is needed to prevent "Invalid nesting" if an element ;; is configured to be empty but is closed anyway ;; note: no need to use car-safe here because nil counts as ;; a list ((car nil) is okay, (car "foo") is not) (if (equal (if tags-closed (downcase (car tags-closed)) nil) (downcase tag-name)) (pop tags-closed) (if html-close-tag-enforce-xhtml (error (concat "%s: xhtml: You should change <%s> to an " "empty-element tag (<%s />) or\n" "add an end-tag (<%s>Content%s>, " "recommended).") (what-line) tag-name (downcase tag-name) (downcase tag-name) (downcase tag-name))))) ((equal tag-type 'html-empty-tag) ;; HTML "empty" start-tag (i.e. ) ;; these 4 lines would be used to prevent "Invalid nesting" if ;; an "empty" element is closed with no content, like ;; . The problem is that it would allow ;; things like text ;;(if (equal (if tags-closed ;; (downcase (car tags-closed)) ;; nil) (downcase tag-name)) ;; (pop tags-closed) (if html-close-tag-enforce-xhtml (error (concat "%s: xhtml: You should change <%s> to an " "empty-element tag (<%s />).") (what-line) tag-name (downcase tag-name)))) ((equal tag-type 'start-tag) ;; start-tag (if tags-closed ;; find a matching end-tag (if (and (not (string-equal (downcase (pop tags-closed)) (downcase tag-name))) (not html-close-tag-ignore-nesting)) (error (concat "%s: Invalid nesting of elements. " "The problem *may* be that <%s> may\n" "or may not be empty. Try to add \"%s\" to " "`html-close-tag-extra-empty-tags'.") (what-line) tag-name (downcase tag-name))) ;; tag-name is the candidate because there are no ;; unmatched end-tags (setq done t))))))))) (goto-char pos) (if done ;; TODO: indent (indent-for-tab-command); we need ;; (beginning-of-line) in order for this to work, and that's ;; not always wanted (insert "" tag-name ">") ;; exit was through re-search-backward==nil (message "html-close-tag: Nothing to do")) ))