qemu-trivial
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3] scripts/checkpatch: Support codespell checking


From: Zhao Liu
Subject: [PATCH v3] scripts/checkpatch: Support codespell checking
Date: Fri, 5 Jan 2024 16:38:48 +0800

From: Zhao Liu <zhao1.liu@intel.com>

Add two spelling check options (--codespell and --codespellfile) to
enhance spelling check through dictionary, which copied the Linux
kernel's implementation in checkpatch.pl.

This check uses the dictionary at "/usr/share/codespell/dictionary.txt"
by default, if there is no dictionary specified under this path, it
will look for the dictionary of python3's codespell (This requires user
to add python3's path in environment variable $PATH, and to install
codespell by "pip install codespell").

Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Tested-by: Samuel Tardieu <sam@rfc1149.net>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
Changes since v2:
* Fix the code style. (Samuel)

v2: 
https://lore.kernel.org/qemu-devel/20231215103448.3822284-1-zhao1.liu@linux.intel.com/

Changes since v1:
* Drop the default dictionary "selling.text" and just support optional
  spelling check via --codespell and --codespellfile. (Thomas)

v1: 
https://lore.kernel.org/qemu-devel/20231204082917.2430223-1-zhao1.liu@linux.intel.com/
---
 scripts/checkpatch.pl | 125 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 105 insertions(+), 20 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 6e4100d2a41c..702689507412 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -35,6 +35,9 @@ my $summary_file = 0;
 my $root;
 my %debug;
 my $help = 0;
+my $codespell = 0;
+my $codespellfile = "/usr/share/codespell/dictionary.txt";
+my $user_codespellfile = "";
 
 sub help {
        my ($exitcode) = @_;
@@ -66,6 +69,9 @@ Options:
                              is all off)
   --test-only=WORD           report only warnings/errors containing WORD
                              literally
+  --codespell                Use the codespell dictionary for spelling/typos
+                             (default: $codespellfile)
+  --codespellfile            Use this codespell dictionary
   --color[=WHEN]             Use colors 'always', 'never', or only when output
                              is a terminal ('auto'). Default is 'auto'.
   -h, --help, --version      display this help and exit
@@ -85,28 +91,50 @@ foreach (@ARGV) {
 }
 
 GetOptions(
-       'q|quiet+'      => \$quiet,
-       'tree!'         => \$tree,
-       'signoff!'      => \$chk_signoff,
-       'patch!'        => \$chk_patch,
-       'branch!'       => \$chk_branch,
-       'emacs!'        => \$emacs,
-       'terse!'        => \$terse,
-       'f|file!'       => \$file,
-       'strict!'       => \$no_warnings,
-       'root=s'        => \$root,
-       'summary!'      => \$summary,
-       'mailback!'     => \$mailback,
-       'summary-file!' => \$summary_file,
-
-       'debug=s'       => \%debug,
-       'test-only=s'   => \$tst_only,
-       'color=s'       => \$color,
-       'no-color'      => sub { $color = 'never'; },
-       'h|help'        => \$help,
-       'version'       => \$help
+       'q|quiet+'              => \$quiet,
+       'tree!'                 => \$tree,
+       'signoff!'              => \$chk_signoff,
+       'patch!'                => \$chk_patch,
+       'branch!'               => \$chk_branch,
+       'emacs!'                => \$emacs,
+       'terse!'                => \$terse,
+       'f|file!'               => \$file,
+       'strict!'               => \$no_warnings,
+       'root=s'                => \$root,
+       'summary!'              => \$summary,
+       'mailback!'             => \$mailback,
+       'summary-file!'         => \$summary_file,
+       'debug=s'               => \%debug,
+       'test-only=s'           => \$tst_only,
+       'codespell!'            => \$codespell,
+       'codespellfile=s'       => \$user_codespellfile,
+       'color=s'               => \$color,
+       'no-color'              => sub { $color = 'never'; },
+       'h|help'                => \$help,
+       'version'               => \$help
 ) or help(1);
 
+if ($user_codespellfile) {
+       # Use the user provided codespell file unconditionally
+       $codespellfile = $user_codespellfile;
+} elsif (!(-f $codespellfile)) {
+       # If /usr/share/codespell/dictionary.txt is not present, try to find it
+       # under codespell's install directory: 
<codespell_root>/data/dictionary.txt
+       if (($codespell || $help) && which("python3") ne "") {
+               my $python_codespell_dict = << "EOF";
+
+import os.path as op
+import codespell_lib
+codespell_dir = op.dirname(codespell_lib.__file__)
+codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt')
+print(codespell_file, end='')
+EOF
+
+               my $codespell_dict = `python3 -c "$python_codespell_dict" 2> 
/dev/null`;
+               $codespellfile = $codespell_dict if (-f $codespell_dict);
+       }
+}
+
 help(0) if ($help);
 
 my $exit = 0;
@@ -337,6 +365,36 @@ our @typeList = (
        qr{guintptr},
 );
 
+# Load common spelling mistakes and build regular expression list.
+my $misspellings;
+my %spelling_fix;
+
+if ($codespell) {
+       if (open(my $spelling, '<', $codespellfile)) {
+               while (<$spelling>) {
+                       my $line = $_;
+
+                       $line =~ s/\s*\n?$//g;
+                       $line =~ s/^\s*//g;
+
+                       next if ($line =~ m/^\s*#/);
+                       next if ($line =~ m/^\s*$/);
+                       next if ($line =~ m/, disabled/i);
+
+                       $line =~ s/,.*$//;
+
+                       my ($suspect, $fix) = split(/->/, $line);
+
+                       $spelling_fix{$suspect} = $fix;
+               }
+               close($spelling);
+       } else {
+               warn "No codespell typos will be found - file '$codespellfile': 
$!\n";
+       }
+}
+
+$misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix;
+
 # This can be modified by sub possible.  Since it can be empty, be careful
 # about regexes that always match, because they can cause infinite loops.
 our @modifierList = (
@@ -477,6 +535,18 @@ sub top_of_kernel_tree {
        return 1;
 }
 
+sub which {
+       my ($bin) = @_;
+
+       foreach my $path (split(/:/, $ENV{PATH})) {
+               if (-e "$path/$bin") {
+                       return "$path/$bin";
+               }
+       }
+
+       return "";
+}
+
 sub expand_tabs {
        my ($str) = @_;
 
@@ -1585,6 +1655,21 @@ sub process {
                        WARN("8-bit UTF-8 used in possible commit log\n" . 
$herecurr);
                }
 
+# Check for various typo / spelling mistakes
+               if (defined($misspellings) &&
+                   ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) {
+                       while ($rawline =~ 
/(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) {
+                               my $typo = $1;
+                               my $blank = copy_spacing($rawline);
+                               my $ptr = substr($blank, 0, $-[1]) . "^" x 
length($typo);
+                               my $hereptr = "$hereline$ptr\n";
+                               my $typo_fix = $spelling_fix{lc($typo)};
+                               $typo_fix = ucfirst($typo_fix) if ($typo =~ 
/^[A-Z]/);
+                               $typo_fix = uc($typo_fix) if ($typo =~ 
/^[A-Z]+$/);
+                               WARN("'$typo' may be misspelled - perhaps 
'$typo_fix'?\n" . $hereptr);
+                       }
+               }
+
 # ignore non-hunk lines and lines being removed
                next if (!$hunk_line || $line =~ /^-/);
 
-- 
2.34.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]