emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] scratch/fontify-open-string 4a9b24e: Initial commit. Allow


From: Alan Mackenzie
Subject: [Emacs-diffs] scratch/fontify-open-string 4a9b24e: Initial commit. Allow wanted fontification of open string in any mode.
Date: Sun, 1 Jul 2018 08:11:21 -0400 (EDT)

branch: scratch/fontify-open-string
commit 4a9b24e1780c980d033b44f3c86133bbab691ebe
Author: Alan Mackenzie <address@hidden>
Commit: Alan Mackenzie <address@hidden>

    Initial commit.  Allow wanted fontification of open string in any mode.
    
    The wanted fontification is for the string face to end at the first 
unescaped
    newline.  This is achieved by a new syntax flag `s' on NL, which means
    "terminate any open string".
    
    src/syntax.c (SYNTAX_FLAGS_CLOSE_STRING, back_maybe_string): New functions.
    (Fstring_to_syntax, Finternal_describe_syntax_value, scan_lists)
    (scan_sexps_forward): Adapt to handle the `s' flag.
    
    lisp/font-lock.el (font-lock-warn-open-string): New defcustom.
    (font-lock-fontify-syntactically-region): Enhance to fontify " with
    warning-face.
    
    lisp/progmodes/sh-script.el (sh-mode-syntax-table): Add flag `s' to syntax
    entry for \n.
---
 lisp/font-lock.el           |  24 +++++-
 lisp/progmodes/sh-script.el |   2 +-
 src/syntax.c                | 193 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 208 insertions(+), 11 deletions(-)

diff --git a/lisp/font-lock.el b/lisp/font-lock.el
index be9fb4d..f2b7fef 100644
--- a/lisp/font-lock.el
+++ b/lisp/font-lock.el
@@ -287,6 +287,16 @@ If a number, only buffers greater than this size have 
fontification messages."
                 (integer :tag "size"))
   :group 'font-lock
   :version "24.1")
+
+(defcustom font-lock-warn-open-string t
+  "Fontify the opening quote of an unterminated string with warning face?
+This is done when this variable is non-nil.
+
+This works only when the syntax-table entry for newline contains the flag `s'
+\(see page \"xxx\" in the Elisp manual)."
+  :type 'boolean
+  :group 'font-lock
+  :version "27.1")
 
 
 ;; Originally these variable values were face names such as `bold' etc.
@@ -1597,18 +1607,30 @@ START should be at the beginning of a line."
              (replace-regexp-in-string "^ *" "" comment-end))))
         ;; Find the `start' state.
         (state (syntax-ppss start))
-        face beg)
+        face beg in-string s-c-start)
     (if loudly (message "Fontifying %s... (syntactically...)" (buffer-name)))
     ;;
     ;; Find each interesting place between here and `end'.
     (while
        (progn
          (when (or (nth 3 state) (nth 4 state))
+            (setq s-c-start (nth 8 state))
+            (setq in-string (nth 3 state))
            (setq face (funcall font-lock-syntactic-face-function state))
            (setq beg (max (nth 8 state) start))
            (setq state (parse-partial-sexp (point) end nil nil state
                                            'syntax-table))
            (when face (put-text-property beg (point) 'face face))
+;;;; NEW STOUGH, 2018-06-29
+            (put-text-property s-c-start (1+ s-c-start)
+                               'face
+                               (if (and font-lock-warn-open-string
+                                        in-string
+                                        (not (nth 3 state))
+                                        (not (eq in-string (char-before))))
+                                   'font-lock-warning-face
+                                 face))
+;;;; END OF NEW STOUGH
            (when (and (eq face 'font-lock-comment-face)
                        (or font-lock-comment-start-skip
                           comment-start-skip))
diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index aaa86b5..bf760e0 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -429,7 +429,7 @@ name symbol."
 (defvar sh-mode-syntax-table
   (sh-mode-syntax-table ()
        ?\# "<"
-       ?\n ">#"
+       ?\n ">#s"
        ?\" "\"\""
        ?\' "\"'"
        ?\` "\"`"
diff --git a/src/syntax.c b/src/syntax.c
index c5a4b03..b82b091 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -33,7 +33,7 @@ along with GNU Emacs.  If not, see 
<https://www.gnu.org/licenses/>.  */
 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
 
-/* Eight single-bit flags have the following meanings:
+/* Nine single-bit flags have the following meanings:
   1. This character is the first of a two-character comment-start sequence.
   2. This character is the second of a two-character comment-start sequence.
   3. This character is the first of a two-character comment-end sequence.
@@ -42,6 +42,7 @@ along with GNU Emacs.  If not, see 
<https://www.gnu.org/licenses/>.  */
   6. The char is part of a delimiter for comments of style "b".
   7. This character is part of a nestable comment sequence.
   8. The char is part of a delimiter for comments of style "c".
+  9. The char will close an open string (except one opened by a string-fence).
   Note that any two-character sequence whose first character has flag 1
   and whose second character has flag 2 will be interpreted as a comment start.
 
@@ -108,7 +109,11 @@ SYNTAX_FLAGS_COMMENT_NESTED (int flags)
 {
   return (flags >> 22) & 1;
 }
-
+static bool
+SYNTAX_FLAGS_CLOSE_STRING (int flags)
+{
+  return (flags >> 24) & 1;
+}
 /* FLAGS should be the flags of the main char of the comment marker, e.g.
    the second for comstart and the first for comend.  */
 static int
@@ -1206,6 +1211,10 @@ the value of a `syntax-table' text property.  */)
       case 'c':
        val |= 1 << 23;
        break;
+
+      case 's':
+        val |= 1 << 24;
+        break;
       }
 
   if (val < ASIZE (Vsyntax_code_object) && NILP (match))
@@ -1257,6 +1266,8 @@ c (on any of its chars) using this flag:
  p means CHAR is a prefix character for `backward-prefix-chars';
    such characters are treated as whitespace when they occur
    between expressions.
+ s means CHAR will terminate any open string (except one started by a
+   character with generic string fence syntax).
 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
 {
@@ -1294,7 +1305,8 @@ DEFUN ("internal-describe-syntax-value", 
Finternal_describe_syntax_value,
   (Lisp_Object syntax)
 {
   int code, syntax_code;
-  bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
+  bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested,
+    strclose;
   char str[2];
   Lisp_Object first, match_lisp, value = syntax;
 
@@ -1335,6 +1347,7 @@ DEFUN ("internal-describe-syntax-value", 
Finternal_describe_syntax_value,
   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
+  strclose = SYNTAX_FLAGS_CLOSE_STRING (syntax_code);
 
   if (Smax <= code)
     {
@@ -1368,6 +1381,8 @@ DEFUN ("internal-describe-syntax-value", 
Finternal_describe_syntax_value,
     insert ("c", 1);
   if (comnested)
     insert ("n", 1);
+  if (strclose)
+    insert ("s", 1);
 
   insert_string ("\twhich means: ");
 
@@ -1439,6 +1454,9 @@ DEFUN ("internal-describe-syntax-value", 
Finternal_describe_syntax_value,
       insert1 (Fsubstitute_command_keys (prefixdoc));
     }
 
+  if (strclose)
+    insert_string (",\n\t   will close any string started by a char with \" 
syntax");
+
   return syntax;
 }
 
@@ -2637,6 +2655,144 @@ syntax_multibyte (int c, bool multibyte_symbol_p)
   return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
 }
 
+static bool
+back_maybe_string (ptrdiff_t *from, ptrdiff_t *from_byte,
+                   ptrdiff_t stop, bool multibyte_symbol_p)
+{
+  unsigned short int quit_count = 0;
+  enum syntaxcode code = Smax;
+  int syntax = Smax, prev_syntax;
+  ptrdiff_t at = *from, at_byte = *from_byte;
+  ptrdiff_t targ, targ_byte;
+  int c, stringterm;
+  ptrdiff_t defun_start;
+  ptrdiff_t defun_start_byte;
+
+#define DEC_AT                                                  \
+  do {                                                          \
+    rarely_quit (++quit_count);                                 \
+    prev_syntax = syntax;                                       \
+    DEC_BOTH (at, at_byte);                                     \
+    if (at >= stop)                                             \
+      UPDATE_SYNTAX_TABLE_BACKWARD (at);                        \
+    if (char_quoted (at, at_byte))                              \
+      {                                                         \
+        DEC_BOTH (at, at_byte);                                 \
+        syntax = code = Sword;                                  \
+      }                                                         \
+    else                                                        \
+      {                                                         \
+        c = FETCH_CHAR_AS_MULTIBYTE (at_byte);                  \
+        syntax = SYNTAX_WITH_FLAGS (c);                         \
+        code = syntax_multibyte (c, multibyte_symbol_p);        \
+      }                                                         \
+    if (SYNTAX_FLAGS_COMSTART_FIRST (syntax)                    \
+        && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax))          \
+      code = Scomment;                                          \
+  } while (0)
+
+  /* Find the alleged string opener. */
+  while ((at > stop)
+         && (code != Sstring)
+         && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
+    {
+      DEC_AT;
+    }
+  if (code != Sstring)
+    goto lose;
+  stringterm = c;
+  targ = at;
+  targ_byte = at_byte;
+
+  /* Now go back over paired delimiters which are STRINGTERM.   */
+  while (true)                  /* One quoted string per iteration. */
+    {
+      DEC_AT;
+      /* Search back for a terminating string delimiter: */
+      while ((at > stop)
+             && (code != Sstring)
+             && (code != Sstring_fence)
+             && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
+        {
+          DEC_AT;
+          /* Check for comment and "other" strings. */
+        }
+      if ((at <= stop)
+          || SYNTAX_FLAGS_CLOSE_STRING (syntax))
+        goto done;
+      if (code == Sstring_fence)
+        stringterm = ST_STRING_STYLE;
+      else if (code == Sstring)
+        stringterm = c;
+      /* Now search back for the matching opening string delimiter: */
+      DEC_AT;
+      while ((at > stop)
+             && !((stringterm == ST_STRING_STYLE)
+                  && (syntax == Sstring_fence))
+             && !((c == stringterm)
+                  && (syntax == Sstring))
+             && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
+        {
+          if ((syntax == Sstring_fence)
+              || (syntax == Sstring)
+              || (syntax == Scomment))
+            goto lossage;
+          DEC_AT;
+        }
+      if ((at <= stop)
+          || SYNTAX_FLAGS_CLOSE_STRING (syntax))
+        goto lose;         /* Even number of string delims in line. */
+    }
+
+ done:
+  UPDATE_SYNTAX_TABLE_FORWARD (targ);
+  *from = targ;
+  *from_byte = targ_byte;
+  return true;
+ lose:
+  UPDATE_SYNTAX_TABLE_FORWARD (*from);
+  return false;
+
+ lossage:
+  /* We've encountered possible comments or strings with mixed
+     delimiters.  Bail out and scan forward from a safe position. */
+  {
+    struct lisp_parse_state state;
+    bool adjusted = true;
+
+    defun_start = find_defun_start (*from, *from_byte);
+    defun_start_byte = find_start_value_byte;
+    adjusted = (defun_start > BEGV);
+    internalize_parse_state (Qnil, &state);
+    scan_sexps_forward (&state,
+                        defun_start, defun_start_byte,
+                        *from, TYPE_MINIMUM (EMACS_INT),
+                        0, 0);
+    if (!adjusted)
+      {
+        adjusted = true;
+        find_start_value
+          = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
+          : state.thislevelstart >= 0 ? state.thislevelstart
+          : find_start_value;
+        find_start_value_byte = CHAR_TO_BYTE (find_start_value);
+      }
+
+    if ((state.instring != -1)
+        && (state.instring != ST_STRING_STYLE)
+        && (state.comstr_start >= stop))
+      {
+        UPDATE_SYNTAX_TABLE_BACKWARD (state.comstr_start);
+        *from = state.comstr_start;
+        *from_byte = CHAR_TO_BYTE (*from);
+        return true;
+      }
+    /* Syntax table is already valid at *FROM, after the
+       `scan_sexps_forward' */
+    return false;
+  }
+}
+
 static Lisp_Object
 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
 {
@@ -2803,13 +2959,16 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT 
depth, bool sexpflag)
              while (1)
                {
                  enum syntaxcode c_code;
+                  int c_code_flags;
                  if (from >= stop)
                    goto lose;
                  UPDATE_SYNTAX_TABLE_FORWARD (from);
                  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
                  c_code = syntax_multibyte (c, multibyte_symbol_p);
+                  c_code_flags = SYNTAX_WITH_FLAGS (c);
                  if (code == Sstring
-                     ? c == stringterm && c_code == Sstring
+                     ? (c == stringterm && c_code == Sstring)
+                      || SYNTAX_FLAGS_CLOSE_STRING (c_code_flags)
                      : c_code == Sstring_fence)
                    break;
 
@@ -2965,6 +3124,10 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT 
depth, bool sexpflag)
                 for very little gain, so we don't bother either.  -sm */
              if (found)
                from = out_charpos, from_byte = out_bytepos;
+              else if (SYNTAX_FLAGS_CLOSE_STRING (syntax)
+                       && back_maybe_string (&from, &from_byte, stop,
+                                             multibyte_symbol_p))
+                goto done2;
              break;
 
            case Scomment_fence:
@@ -3006,7 +3169,14 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT 
depth, bool sexpflag)
                }
              if (!depth && sexpflag) goto done2;
              break;
-           default:
+           case Swhitespace:
+            case Spunct:
+              if (SYNTAX_FLAGS_CLOSE_STRING (syntax)
+                  && back_maybe_string (&from, &from_byte, stop,
+                                        multibyte_symbol_p))
+                goto done2;
+              break;
+            default:
              /* Ignore whitespace, punctuation, quote, endcomment.  */
              break;
            }
@@ -3046,7 +3216,7 @@ function scans over parentheses until the depth goes to 
zero COUNT
 times.  Hence, positive DEPTH moves out that number of levels of
 parentheses, while negative DEPTH moves to a deeper level.
 
-Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
+Comments are skipped over if `parse-sexp-ignore-comments' is non-nil.
 
 If we reach the beginning or end of the accessible part of the buffer
 before we have scanned over COUNT lists, return nil if the depth at
@@ -3065,7 +3235,7 @@ DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
 If COUNT is negative, scan backwards.
 Returns the character number of the position thus found.
 
-Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
+Comments are skipped over if `parse-sexp-ignore-comments' is non-nil.
 
 If the beginning or end of (the accessible part of) the buffer is reached
 in the middle of a parenthetical grouping, an error is signaled.
@@ -3396,10 +3566,12 @@ do { prev_from = from;                          \
              {
                int c;
                enum syntaxcode c_code;
+                int c_code_flags;
 
                if (from >= end) goto done;
                c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
                c_code = SYNTAX (c);
+                c_code_flags = SYNTAX_WITH_FLAGS (c);
 
                /* Check C_CODE here so that if the char has
                   a syntax-table property which says it is NOT
@@ -3421,9 +3593,12 @@ do { prev_from = from;                           \
                    break;
 
                  default:
-                   break;
+                    if (nofence
+                        && SYNTAX_FLAGS_CLOSE_STRING (c_code_flags))
+                      goto string_end;
+                    break;
                  }
-               INC_FROM;
+                INC_FROM;
                rarely_quit (++quit_count);
              }
          }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]