findutils-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Findutils-patches] [PATCH] Enhancements to the code smell detector.


From: James Youngman
Subject: [Findutils-patches] [PATCH] Enhancements to the code smell detector.
Date: Sat, 30 Jun 2007 13:02:28 +0100

2007-06-30  James Youngman  <address@hidden>

        Enhancements to the code smell detector.
        * Makefile.am (findutils-check-smells): Automate the calling of
        build-aux/src-sniff.py.
        * build-aux/src-sniff.py: Rework to use a list of regex-based
        sniffers, to allow checking types of file other than C.  Allow
        file-based regex sniffers to give an indication of the line number
        where they think the problem (or part of the problem) exists.
        Added code smell detectors for a sample Bourne shell problem and
        for out-of-date FSF addresses.

Signed-off-by: James Youngman <address@hidden>
---
 ChangeLog              |   10 +++
 Makefile.am            |   10 +++
 build-aux/src-sniff.py |  165 ++++++++++++++++++++++++++++++++++-------------
 3 files changed, 139 insertions(+), 46 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index d983111..f5dd3b0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2007-06-30  James Youngman  <address@hidden>
+
+       Enhancements to the code smell detector.
+       * Makefile.am (findutils-check-smells): Automate the calling of
+       build-aux/src-sniff.py. 
+       * build-aux/src-sniff.py: Rework to use a list of regex-based
+       sniffers, to allow checking types of file other than C.  Allow
+       file-based regex sniffers to give an indication of the line number
+       where they think the problem (or part of the problem) exists.
+
 2007-06-29  James Youngman  <address@hidden>
 
        * find/find.1 (EXAMPLES): Added an example of using find and cpio -p
diff --git a/Makefile.am b/Makefile.am
index 0f5bf88..e47dbe1 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -54,3 +54,13 @@ findutils-check-testfiles:
        $(AUXDIR)/check-testfiles.sh "$(distdir)" "$(srcdir)" 
$(TESTFILE_SUFFIXES)
 
 
+findutils-check-smells:
+       find $(srcdir) \( -path $(srcdir)/autom4te.cache -o  \
+                           -path $(srcdir)/gnulib-cvs     -o  \
+                           -path $(srcdir)/gnulib         -o  \
+                           -name .git             -o  \
+                           \( -type d -name CVS \)  \
+                       \) -prune -o                  \
+       -type f \! \( -name '*~' -o -name '*.xo' -o -name '*.xi' \) \
+       -print0 | \
+       xargs -0 python $(AUXDIR)/src-sniff.py
diff --git a/build-aux/src-sniff.py b/build-aux/src-sniff.py
index 85e6506..b571021 100644
--- a/build-aux/src-sniff.py
+++ b/build-aux/src-sniff.py
@@ -4,10 +4,69 @@
 import re
 import sys
 
+C_ISH_FILENAME = "\.(c|cc|h|cpp|cxx|hxx)$"
+C_ISH_FILENAME_RE = re.compile(C_ISH_FILENAME)
+C_MODULE_FILENAME_RE = re.compile("\.(c|cc|cpp|cxx)$")
 FIRST_INCLUDE = 'config.h'
 problems = 0
 
-LINE_SMELLS_SRC = [
+
+def Problem(**kwargs):
+    global problems
+    problems += 1
+    msg = kwargs['message']
+    if kwargs['line']:
+        location = "%(filename)s:%(line)d" % kwargs
+    else:
+        location = "%(filename)s" % kwargs
+    detail = msg % kwargs
+    print >>sys.stderr, "error: %s: %s" % (location, detail)
+
+
+class RegexSniffer(object):
+    def __init__(self, source, message):
+        super(RegexSniffer, self).__init__()
+        self._regex = re.compile(source)
+        self._msg = message
+    def Sniff(self, text, filename, line):
+        m = self._regex.search(text)
+        if m:
+            if line is None:
+                line = 1 + m.string.count('\n', 1, m.start(0))
+            args = {
+                'filename' : filename,
+                'line'     : line,
+                'fulltext' : text,
+                'matchtext': m.group(0),
+                'message'  : self._msg
+                }
+            Problem(**args)
+            
+
+class RegexChecker(object):
+    def __init__(self, regex, line_smells, file_smells):
+        super(RegexChecker, self).__init__(self)
+        self._regex = re.compile(regex)
+        self._line_sniffers = [RegexSniffer(s[0],s[1]) for s in line_smells]
+        self._file_sniffers = [RegexSniffer(s[0],s[1]) for s in file_smells]
+    def Check(self, filename, lines, fulltext):
+        if self._regex.search(filename):
+            # We recognise this type of file.
+            for line_number, line_text in lines:
+                for sniffer in self._line_sniffers:
+                    sniffer.Sniff(line_text, filename, line_number)
+            for sniffer in self._file_sniffers:
+                sniffer.Sniff(fulltext, filename, None)
+        else:
+            # We don't know how to check this file.  Skip it.
+            pass
+
+
+checkers = [
+    # Check C-like languages for C code smells.
+    RegexChecker(C_ISH_FILENAME_RE,
+    # line smells
+    [
     [r'(?<!\w)free \(\(', "don't cast the argument to free()"],
     [r'\*\) *x(m|c|re)alloc(?!\w)',"don't cast the result of x*alloc"],
     [r'\*\) *alloca(?!\w)',"don't cast the result of alloca"],
@@ -17,45 +76,65 @@ LINE_SMELLS_SRC = [
     [r'error \(EXIT_SUCCESS',"passing EXIT_SUCCESS to error is confusing"],
     [r'file[s]ystem', "prefer writing 'file system' to 'filesystem'"],
     [r'HAVE''_CONFIG_H', "Avoid checking HAVE_CONFIG_H"],
-#   [r'HAVE_FCNTL_H', "Avoid checking HAVE_FCNTL_H"],
+    #   [r'HAVE_FCNTL_H', "Avoid checking HAVE_FCNTL_H"],
     [r'O_NDELAY', "Avoid using O_NDELAY"],
     [r'the *the', "'the the' is probably not deliberate"],
     [r'(?<!\w)error \([^_"]*[^_]"[^"]*[a-z]{3}', "untranslated error message"],
     [r'^# *if\s+defined *\(', "useless parentheses in '#if defined'"],
-     
-    ]
-
-FILE_SMELLS_SRC = [
+    
+    ],
+    [
     [r'# *include <assert.h>(?!.*assert \()',
      "If you include <assert.h>, use assert()."],
     [r'# *include "quotearg.h"(?!.*(?<!\w)quotearg(_[^ ]+)? \()',
      "If you include \"quotearg.h\", use one of its functions."],
     [r'# *include "quote.h"(?!.*(?<!\w)quote(_[^ ]+)? \()',
      "If you include \"quote.h\", use one of its functions."],
-    [r'\s$', "trailing whitespace"],
+    ]),
+    # Check Makefiles for Makefile code smells.
+    RegexChecker('(^|/)[Mm]akefile(.am|.in)?', 
+                 [ [r'^ ', "Spaces at start of line"], ],
+                 []),
+    # Check everything for whitespace problems.
+    # RegexChecker('', [], [[r'\s$', "trailing whitespace"],]),
+    # Check everything for out of date addresses.
+    RegexChecker('', [], [
+    [r'675\s*Mass\s*Ave,\s*02139[^a-zA-Z]*USA',
+     "out of date FSF address"],
+    [r'59 Temple Place.*02111-?1307\s*USA',
+     "out of date FSF address"],
+    ]),
     ]
 
+
 # missing check: ChangeLog prefixes
 # missing: sc_always_defined_macros from coreutils
 # missing: sc_tight_scope
 
-COMPILED_LINE_SMELLS = [(re.compile(pong[0]), pong[1])
-                        for pong in LINE_SMELLS_SRC]
-COMPILED_FILE_SMELLS = [(re.compile(pong[0], re.DOTALL), pong[1])
-                        for pong in FILE_SMELLS_SRC]
-
-def Problem(filename, desc):
-    global problems
-    print >> sys.stderr, "error: %s: %s" % (filename, desc)
-    problems += 1
 
 def Warning(filename, desc):
     print >> sys.stderr, "warning: %s: %s" % (filename, desc)
 
 
 def BuildIncludeList(text):
+    """Build a list of included files, with line numbers.
+    Args:
+      text: the full text of the source file
+    Returns:
+      [  ('config.h',32), ('assert.h',33), ... ]
+    """
     include_re = re.compile(r'# *include +[<"](.*)[>"]')
-    return [m.group(1) for m in include_re.finditer(text)]
+    includes = []
+    last_include_pos = 1
+    line = 1
+    for m in include_re.finditer(text):
+        header = m.group(1)
+        # Count only the number of lines between the last include and
+        # this one.  Counting them from the beginning would be quadratic.
+        line += m.string.count('\n', last_include_pos, m.start(0))
+        last_include_pos = m.end()
+        includes.append( (header,line) )
+    return includes
 
 
 def CheckStatHeader(filename, lines, fulltext):
@@ -65,8 +144,8 @@ def CheckStatHeader(filename, lines, fulltext):
     for line in lines:
         m = stat_use_re.search(line[1])
         if m:
-            msgfmt = "%d: If you use struct stat, you must #include 
<sys/stat.h> first"
-            Problem(filename, msgfmt % line[0])
+            msg = "If you use struct stat, you must #include <sys/stat.h> 
first"
+            Problem(filename = filename, line = line[0], message = msg)
             # Diagnose only once
             break
         m = stat_hdr_re.search(line[1])
@@ -75,37 +154,31 @@ def CheckStatHeader(filename, lines, fulltext):
 
 def CheckFirstInclude(filename, lines, fulltext):
     includes = BuildIncludeList(fulltext)
-    if includes and includes[0] != FIRST_INCLUDE:
-        if FIRST_INCLUDE in includes:
-            fmt = "%s is first include file, but %s should be included first"
-            Problem(filename, fmt % (includes[0], FIRST_INCLUDE))
-    if FIRST_INCLUDE not in includes:
+    #print "Include map:"
+    #for name, line in includes:
+    #    print "%s:%d: %s" % (filename, line, name)
+    if includes:
+        actual_first_include = includes[0][0]
+    else:
+        actual_first_include = None
+    if actual_first_include and actual_first_include  != FIRST_INCLUDE:
+        if FIRST_INCLUDE in [inc[0] for inc in includes]:
+            msg = ("%(actual_first_include)s is the first included file, "
+                   "but %(required_first_include)s should be included first")
+            Problem(filename=filename, line=includes[0][1], message=msg,
+                    actual_first_include=actual_first_include,
+                    required_first_include = FIRST_INCLUDE)
+    if FIRST_INCLUDE not in [inc[0] for inc in includes]:
         Warning(filename,
                 "%s should be included by most files" % FIRST_INCLUDE)
 
-def CheckLineSmells(filename, lines, fulltext):
-    for line in lines:
-        for smell in COMPILED_LINE_SMELLS:
-            match = smell[0].search(line[1])
-            if match:
-                Problem(filename, '%d: "%s": %s'
-                        % (line[0], match.group(0), smell[1]))
-            
-
-def CheckFileSmells(filename, lines, fulltext):
-    for smell in COMPILED_FILE_SMELLS:
-        match = smell[0].search(fulltext)
-        if match:
-            Problem(filename, ' %s'
-                    % (smell[1],))
-            
-
-
 
 def SniffSourceFile(filename, lines, fulltext):
-    for sniffer in [CheckFirstInclude, CheckStatHeader,
-                    CheckLineSmells, CheckFileSmells]:
-        sniffer(filename, lines, fulltext)
+    if C_MODULE_FILENAME_RE.search(filename):
+        CheckFirstInclude(filename, lines, fulltext)
+        CheckStatHeader  (filename, lines, fulltext)
+    for checker in checkers:
+        checker.Check(filename, lines, fulltext)
 
 
 def main(args):
@@ -117,7 +190,7 @@ def main(args):
         for line in f.readlines():
             lines.append( (line_number, line) )
             line_number += 1
-        fulltext = '\n'.join([line[1] for line in lines])
+        fulltext = ''.join([line[1] for line in lines])
         SniffSourceFile(srcfile, lines, fulltext)
         f.close()
     if problems:
-- 
1.5.2.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]