bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#64735: 29.0.92; find invocations are ~15x slower because of ignores


From: Ihor Radchenko
Subject: bug#64735: 29.0.92; find invocations are ~15x slower because of ignores
Date: Wed, 26 Jul 2023 09:09:28 +0000

Dmitry Gutov <dmitry@gutov.dev> writes:

>> (my-bench 10 "/usr/src/linux/" "")
>> 
>> (("built-in" . "Elapsed time: 7.034326s (3.598539s in 14 GCs)")
>>   ("built-in no filename handler alist" . "Elapsed time: 5.907194s 
>> (3.698456s in 15 GCs)")
>>   ("with-find" . "Elapsed time: 6.078056s (4.052791s in 16 GCs)")
>>   ("with-find-p" . "Elapsed time: 4.496762s (2.739565s in 11 GCs)")
>>   ("with-find-sync" . "Elapsed time: 3.702760s (1.715160s in 7 GCs)"))
>
> Thanks, for the extra data point in particular. Easy to see how it 
> compares to the most efficient use of 'find', right (on GNU/Linix, at 
> least)?
>
> It's also something to note that, GC-wise, numbers 1 and 2 are not the 
> worst: the time must be spent somewhere else.

Indeed. I did more detailed analysis in
https://yhetil.org/emacs-devel/87cz0p2xlc.fsf@localhost/

Main contributors in the lisp versions are (in the order from most
significant to less significant) (1) file name handlers; (2) regexp
matching of the file names; (3) nconc calls in the current
`directory-files-recursively' implementation.

I have modified `directory-files-recursively' to avoid O(N^2) `nconc'
calls + bypassing regexp matches when REGEXP is nil.

Here are the results (using the attached modified version of your
benchmark file):

(my-bench 10 "/usr/src/linux/" "")
(("built-in" . "Elapsed time: 7.285597s (3.853368s in 6 GCs)")
 ("built-in no filename handler alist" . "Elapsed time: 5.855019s (3.760662s in 
6 GCs)")
 ("built-in non-recursive no filename handler alist" . "Elapsed time: 5.817639s 
(4.326945s in 7 GCs)")
 ("built-in non-recursive no filename handler alist + skip re-match" . "Elapsed 
time: 2.708306s (1.871665s in 3 GCs)")
 ("with-find" . "Elapsed time: 6.082200s (4.262830s in 7 GCs)")
 ("with-find-p" . "Elapsed time: 4.325503s (3.058647s in 5 GCs)")
 ("with-find-sync" . "Elapsed time: 3.267648s (1.903655s in 3 GCs)"))

 (let ((gc-cons-threshold most-positive-fixnum))
   (my-bench 10 "/usr/src/linux/" ""))
(("built-in" . "Elapsed time: 2.754473s")
 ("built-in no filename handler alist" . "Elapsed time: 1.322443s")
 ("built-in non-recursive no filename handler alist" . "Elapsed time: 
1.235044s")
 ("built-in non-recursive no filename handler alist + skip re-match" . "Elapsed 
time: 0.750275s")
 ("with-find" . "Elapsed time: 1.438510s")
 ("with-find-p" . "Elapsed time: 1.200876s")
 ("with-find-sync" . "Elapsed time: 1.349755s"))

If we forget about GC, Elisp version can get fairly close to GNU find.
And if we do not perform regexp matching (which makes sense when the
REGEXP is ""), Elisp version is faster.

;; -*- lexical-binding: t; -*-

(defun find-directory-files-recursively (dir regexp &optional 
include-directories _p follow-symlinks)
  (cl-assert (null _p) t "find-directory-files-recursively can't accept 
arbitrary predicates")
  (with-temp-buffer
    (setq case-fold-search nil)
    (cd dir)
    (let* ((command
            (append
             (list "find" (file-local-name dir))
             (if follow-symlinks
                 '("-L")
               '("!" "(" "-type" "l" "-xtype" "d" ")"))
             (unless (string-empty-p regexp)
               (list "-regex" (concat ".*" regexp ".*")))
             (unless include-directories
               '("!" "-type" "d"))
             '("-print0")
             ))
           (remote (file-remote-p dir))
           (proc
            (if remote
                (let ((proc (apply #'start-file-process
                                   "find" (current-buffer) command)))
                  (set-process-sentinel proc (lambda (_proc _state)))
                  (set-process-query-on-exit-flag proc nil)
                  proc)
              (make-process :name "find" :buffer (current-buffer)
                            :connection-type 'pipe
                            :noquery t
                            :sentinel (lambda (_proc _state))
                            :command command))))
      (while (accept-process-output proc))
      (let ((start (goto-char (point-min))) ret)
        (while (search-forward "\0" nil t)
          (push (concat remote (buffer-substring-no-properties start (1- 
(point)))) ret)
          (setq start (point)))
        ret))))

(defun find-directory-files-recursively-2 (dir regexp &optional 
include-directories _p follow-symlinks)
  (cl-assert (null _p) t "find-directory-files-recursively can't accept 
arbitrary predicates")
  (cl-assert (not (file-remote-p dir)))
  (let* (buffered
         result
         (proc
          (make-process
           :name "find" :buffer nil
           :connection-type 'pipe
           :noquery t
           :sentinel (lambda (_proc _state))
           :filter (lambda (proc data)
                     (let ((start 0))
                       (when-let (end (string-search "\0" data start))
                         (push (concat buffered (substring data start end)) 
result)
                         (setq buffered "")
                         (setq start (1+ end))
                         (while-let ((end (string-search "\0" data start)))
                           (push (substring data start end) result)
                           (setq start (1+ end))))
                       (setq buffered (concat buffered (substring data 
start)))))
           :command (append
                     (list "find" (file-local-name dir))
                     (if follow-symlinks
                         '("-L")
                       '("!" "(" "-type" "l" "-xtype" "d" ")"))
                     (unless (string-empty-p regexp)
                       (list "-regex" (concat ".*" regexp ".*")))
                     (unless include-directories
                       '("!" "-type" "d"))
                     '("-print0")
                     ))))
    (while (accept-process-output proc))
    result))

(defun find-directory-files-recursively-3 (dir regexp &optional 
include-directories _p follow-symlinks)
  (cl-assert (null _p) t "find-directory-files-recursively can't accept 
arbitrary predicates")
  (cl-assert (not (file-remote-p dir)))
  (let ((args `(,(file-local-name dir)
                ,@(if follow-symlinks
                      '("-L")
                    '("!" "(" "-type" "l" "-xtype" "d" ")"))
                ,@(unless (string-empty-p regexp)
                    (list "-regex" (concat ".*" regexp ".*")))
                ,@(unless include-directories
                    '("!" "-type" "d"))
                "-print0")))
    (with-temp-buffer
      (let ((status (apply #'process-file
                           "find"
                           nil
                           t
                           nil
                           args))
            (pt (point-min))
            res)
        (unless (zerop status)
          (error "Listing failed"))
        (goto-char (point-min))
        (while (search-forward "\0" nil t)
          (push (buffer-substring-no-properties pt (1- (point)))
                res)
          (setq pt (point)))
        res))))

(defun directory-files-recursively-strip-nconc
    (dir regexp
         &optional include-directories predicate
         follow-symlinks)
  "Return list of all files under directory DIR whose names match REGEXP.
This function works recursively.  Files are returned in \"depth
first\" order, and files from each directory are sorted in
alphabetical order.  Each file name appears in the returned list
in its absolute form.

By default, the returned list excludes directories, but if
optional argument INCLUDE-DIRECTORIES is non-nil, they are
included.

PREDICATE can be either nil (which means that all subdirectories
of DIR are descended into), t (which means that subdirectories that
can't be read are ignored), or a function (which is called with
the name of each subdirectory, and should return non-nil if the
subdirectory is to be descended into).

If FOLLOW-SYMLINKS is non-nil, symbolic links that point to
directories are followed.  Note that this can lead to infinite
recursion."
  (let* ((result nil)
         (dirs (list dir))
         (dir (directory-file-name dir))
         ;; When DIR is "/", remote file names like "/method:" could
         ;; also be offered.  We shall suppress them.
         (tramp-mode (and tramp-mode (file-remote-p (expand-file-name dir)))))
    (while (setq dir (pop dirs))
      (dolist (file (file-name-all-completions "" dir))
        (unless (member file '("./" "../"))
          (if (directory-name-p file)
              (let* ((leaf (substring file 0 (1- (length file))))
                     (full-file (concat dir "/" leaf)))
                ;; Don't follow symlinks to other directories.
                (when (and (or (not (file-symlink-p full-file))
                               follow-symlinks)
                           ;; Allow filtering subdirectories.
                           (or (eq predicate nil)
                               (eq predicate t)
                               (funcall predicate full-file)))
                  (push full-file dirs))
                (when (and include-directories
                           (string-match regexp leaf))
                  (setq result (nconc result (list full-file)))))
            (when (and regexp (string-match regexp file))
              (push (concat dir "/" file) result))))))
    (sort result #'string<)))

(defun my-bench (count path regexp)
  (setq path (expand-file-name path))
  ;; (let ((old (directory-files-recursively path regexp))
  ;;       (new (find-directory-files-recursively-3 path regexp)))
  ;;   (dolist (path old)
  ;;     (unless (member path new) (error "! %s not in" path)))
  ;;   (dolist (path new)
  ;;     (unless (member path old) (error "!! %s not in" path))))
  (list
   (cons "built-in" (benchmark count (list 'directory-files-recursively path 
regexp)))
   (cons "built-in no filename handler alist" (let (file-name-handler-alist) 
(benchmark count (list 'directory-files-recursively path regexp))))
   (cons "built-in non-recursive no filename handler alist" (let 
(file-name-handler-alist) (benchmark count (list 
'directory-files-recursively-strip-nconc path regexp))))
   (cons "built-in non-recursive no filename handler alist + skip re-match" 
(let (file-name-handler-alist) (benchmark count (list 
'directory-files-recursively-strip-nconc path nil))))
   (cons "with-find" (benchmark count (list 'find-directory-files-recursively 
path regexp)))
   (cons "with-find-p" (benchmark count (list 
'find-directory-files-recursively-2 path regexp)))
   (cons "with-find-sync" (benchmark count (list 
'find-directory-files-recursively-3 path regexp)))))

(provide 'find-bench)
-- 
Ihor Radchenko // yantar92,
Org mode contributor,
Learn more about Org mode at <https://orgmode.org/>.
Support Org development at <https://liberapay.com/org-mode>,
or support my work at <https://liberapay.com/yantar92>

reply via email to

[Prev in Thread] Current Thread [Next in Thread]