emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 63fd71c: Improve scoring algorithm for flex-style completions


From: João Távora
Subject: master 63fd71c: Improve scoring algorithm for flex-style completions
Date: Sat, 26 Oct 2019 20:48:13 -0400 (EDT)

branch: master
commit 63fd71cd092de8daded15e32c268215b62c488b9
Author: João Távora <address@hidden>
Commit: João Távora <address@hidden>

    Improve scoring algorithm for flex-style completions
    
    The previous algorithm had two problems: it considered non-matches in
    the beginning and end of the string as matching "holes" and failed to
    penalize larger holes, making flex-score-match-tightness only
    effective in some corner cases.
    
    The new formula, which is described in code and in pseudo-code in the
    comments, fixes these problems.
    
    As a result, by default, C-h f flex now correctly bubbles up
    "company-search-flex-regexp" to the top, in front of "file-exists-p".
    With a flex-score-match-tightness smaller than 1.0, the situation is
    reversed.
    
    * lisp/minibuffer.el (flex-score-match-tightness): Adjust default
    value.  Improve docstring example.
    (completion-pcm--hilit-commonality): Improve example.  Remove unused
    variable.  Improve algorithm.
---
 lisp/minibuffer.el | 60 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el
index 542e672..c92a91e 100644
--- a/lisp/minibuffer.el
+++ b/lisp/minibuffer.el
@@ -3060,16 +3060,18 @@ PATTERN is as returned by 
`completion-pcm--string->pattern'."
            (when (string-match-p regex c) (push c poss)))
          (nreverse poss))))))
 
-(defvar flex-score-match-tightness 100
+(defvar flex-score-match-tightness 3
   "Controls how the `flex' completion style scores its matches.
 
-Value is a positive number.  Values smaller than one make the
-scoring formula value matches scattered along the string, while
-values greater than one make the formula value tighter matches.
-I.e \"foo\" matches both strings \"barbazfoo\" and \"fabrobazo\",
-which are of equal length, but only a value greater than one will
-score the former (which has one \"hole\") higher than the
-latter (which has two).")
+Value is a positive number.  A number smaller than 1 makes the
+scoring formula reward matches scattered along the string, while
+a number greater than one make the formula reward matches that
+are clumped together.  I.e \"foo\" matches both strings
+\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but
+only a value greater than one will score the former (which has
+one large \"hole\" and a clumped-together \"oo\" match) higher
+than the latter (which has two \"holes\" and three
+one-letter-long matches).")
 
 (defun completion-pcm--hilit-commonality (pattern completions)
   (when completions
@@ -3086,27 +3088,39 @@ latter (which has two).")
                 (end (pop md))
                 (len (length str))
                 ;; To understand how this works, consider these bad
-                ;; ascii(tm) diagrams showing how the pattern \"foo\"
-                ;; flex-matches \"fabrobazo" and
-                ;; \"barfoobaz\":
+                ;; ascii(tm) diagrams showing how the pattern "foo"
+                ;; flex-matches "fabrobazo", "fbarbazoo" and
+                ;; "barfoobaz":
 
                 ;;      f abr o baz o
                 ;;      + --- + --- +
 
+                ;;      f barbaz oo
+                ;;      + ------ ++
+
                 ;;      bar foo baz
-                ;;      --- +++ ---
+                ;;          +++
 
-                ;; Where + indicates parts where the pattern matched,
-                ;; - where it didn't match.  The score is a number
+                ;; "+" indicates parts where the pattern matched.  A
+                ;; "hole" in the middle of the string is indicated by
+                ;; "-".  Note that there are no "holes" near the edges
+                ;; of the string.  The completion score is a number
                 ;; bound by ]0..1]: the higher the better and only a
                 ;; perfect match (pattern equals string) will have
                 ;; score 1.  The formula takes the form of a quotient.
                 ;; For the numerator, we use the number of +, i.e. the
                 ;; length of the pattern.  For the denominator, it
-                ;; sums (1+ (/ (grouplen - 1)
-                ;; flex-score-match-tightness)) across all groups of
-                ;; -, sums one to that total, and then multiples by
-                ;; the length of the string.
+                ;; first computes
+                ;;
+                ;;     hole_i_contrib = 1 + (Li-1)^(1/tightness)
+                ;;
+                ;; , for each hole "i" of length "Li", where tightness
+                ;; is given by `flex-score-match-tightness'.  The
+                ;; final value for the denominator is then given by:
+                ;;
+                ;;    (SUM_across_i(hole_i_contrib) + 1) * len
+                ;;
+                ;; , where "len" is the string's length.
                 (score-numerator 0)
                 (score-denominator 0)
                 (last-b 0)
@@ -3115,13 +3129,15 @@ latter (which has two).")
                    "Update score variables given match range (A B)."
                    (setq
                     score-numerator   (+ score-numerator (- b a)))
-                   (unless (= a last-b)
+                   (unless (or (= a last-b)
+                               (zerop last-b)
+                               (= a (length str)))
                      (setq
                       score-denominator (+ score-denominator
                                            1
-                                           (/ (- a last-b 1)
-                                              flex-score-match-tightness
-                                              1.0))))
+                                           (expt (- a last-b 1)
+                                                 (/ 1.0
+                                                    
flex-score-match-tightness)))))
                    (setq
                     last-b              b))))
            (funcall update-score start start)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]