emacs-orgmode
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [O] [PATCH] Fix and optimize publishing cache check


From: Matt Lundin
Subject: Re: [O] [PATCH] Fix and optimize publishing cache check
Date: Tue, 12 Aug 2014 23:32:17 -0500
User-agent: Gnus/5.130012 (Ma Gnus v0.12) Emacs/24.3 (gnu/linux)

Here is an improved version of the previous patch (please apply it
rather than the previous). This version further optimizes cache checking
by only calling find-buffer-visiting if necessary. (It also fixes some
long lines.) On a test project containing 5000 files, running
org-publish on the project with the cache enabled now takes seconds
rather than minutes.

Best,
Matt

>From ea7203b4d988967f0a70bd45ad7502a961a28aee Mon Sep 17 00:00:00 2001
From: Matt Lundin <address@hidden>
Date: Tue, 12 Aug 2014 23:25:23 -0500
Subject: [PATCH] Fix and optimize publish cache check

* lisp/ox-publish.el: (org-publish-cache-file-needs-publishing) Fix
  org-publish-cache-file-needs-publishing to change timestamp of files
  containing includes. Speed up check for includes by storing
  information about included files in cache itself.

This patch ensures that org-publish-cache-file-needs-publishing does
not keep publishing a file containing includes by updating the
modification time of that file. It also speeds up publishing by
caching information about included files, thus keeping
org-publish-cache-file-needs-publishing from having to visit every
file just to check includes (which can take a long time on a project
containing hundreds or thousands of files).
---
 lisp/ox-publish.el | 68 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/lisp/ox-publish.el b/lisp/ox-publish.el
index df40572..e848a0c 100644
--- a/lisp/ox-publish.el
+++ b/lisp/ox-publish.el
@@ -1167,33 +1167,47 @@ the file including them will be republished as well."
         (key (org-publish-timestamp-filename filename pub-dir pub-func))
         (pstamp (org-publish-cache-get key))
         (org-inhibit-startup t)
-        (visiting (find-buffer-visiting filename))
-        included-files-ctime buf)
-    (when (equal (file-name-extension filename) "org")
-      (setq buf (find-file (expand-file-name filename)))
-      (with-current-buffer buf
-       (goto-char (point-min))
-       (while (re-search-forward "^[ \t]*#\\+INCLUDE:" nil t)
-         (let* ((element (org-element-at-point))
-                (included-file
-                 (and (eq (org-element-type element) 'keyword)
-                      (let ((value (org-element-property :value element)))
-                        (and value
-                             (string-match "^\\(\".+?\"\\|\\S-+\\)" value)
-                             (org-remove-double-quotes
-                              (match-string 1 value)))))))
-           (when included-file
-             (add-to-list 'included-files-ctime
-                          (org-publish-cache-ctime-of-src
-                           (expand-file-name included-file))
-                          t)))))
-      (unless visiting (kill-buffer buf)))
-    (if (null pstamp) t
-      (let ((ctime (org-publish-cache-ctime-of-src filename)))
-       (or (< pstamp ctime)
-           (when included-files-ctime
-             (not (null (delq nil (mapcar (lambda (ct) (< ctime ct))
-                                          included-files-ctime))))))))))
+        (ctime (org-publish-cache-ctime-of-src filename))
+        (needsp (or (null pstamp) (< pstamp ctime)))
+        includes)
+    ;; if the file needs publishing, refresh the included-files cache property
+    (when (and needsp
+              (equal (file-name-extension filename) "org"))
+      (let ((visiting (find-buffer-visiting filename))
+           (buf (find-file-noselect (expand-file-name filename))))
+       (with-current-buffer buf
+         (save-excursion
+           (goto-char (point-min))
+           (while (re-search-forward "^[ \t]*#\\+INCLUDE:" nil t)
+             (let* ((element (org-element-at-point))
+                    (included-file
+                     (and (eq (org-element-type element) 'keyword)
+                          (let ((value (org-element-property :value element)))
+                            (and value
+                                 (string-match "^\\(\".+?\"\\|\\S-+\\)" value)
+                                 (org-remove-double-quotes
+                                  (match-string 1 value)))))))
+               (when included-file
+                 (add-to-list 'includes (expand-file-name included-file)))))))
+       (unless visiting (kill-buffer buf))
+       (when includes
+         (org-publish-cache-set-file-property filename :includes includes))))
+    ;; return t if needsp or if included files have changed
+    (or needsp
+       (when (delq nil
+                   (mapcar (lambda (file)
+                             (let ((ct (org-publish-cache-ctime-of-src file)))
+                               (and (file-exists-p file)
+                                    (< ctime ct))))
+                           (org-publish-cache-get-file-property filename 
:includes)))
+         ;; update the timestamp of the published file if buffer is not 
modified
+         (let ((visiting (find-buffer-visiting filename))
+               (buf (find-file-noselect (expand-file-name filename))))
+           (with-current-buffer buf
+             (when (not (buffer-modified-p))
+               (set-buffer-modified-p t) (save-buffer)))
+           (unless visiting (kill-buffer buf)))
+         t))))
 
 (defun org-publish-cache-set-file-property
   (filename property value &optional project-name)
-- 
2.0.4


Matt Lundin <address@hidden> writes:

> This patch does two things:
>
> 1. It prevents org-publish from perpetually republishing files
> containing includes. Currently, if an included file changes, it is
> published, as is the file that includes it. However, until changes are
> made in the file that includes it (which may be never) its modification
> time remain older than that of the included file, so it will be
> republished every time org-publish is called, even if neither file has
> changed. 
>
>    - Note: This patch fixes this behavior by updating the modification
>      time of the file that contains the includes. If this is deemed too
>      destructive/meddlesome, we could consider alternate behaviors, such
>      as setting the cached time with a simple float-time. Let me know
>      what would be best.
>
> 2. It optimizes checking for included files. Currently,
> org-publish-cache-file-needs-publishing visits every file in a project
> to check for includes. On my underpowered box, this takes a long time
> (over a minute) on a project with 1000+ files, thus defeating the
> purpose of the cache. This patch causes org-publish to store information
> about included files in the cache itself:
>
>   a. If a file has changed it updates information about the files it
>   includes and stores it in the cache.
>
>   b. If a file has not changed, it checks the cache for included files
>   and sees if any of those files have been updated (thus preventing the
>   need to check every file in the project for includes)
>
> Best,
> Matt
>
> From 94a8061bb30e1992213fb8e71ee949d336d37435 Mon Sep 17 00:00:00 2001
> From: Matt Lundin <address@hidden>
> Date: Tue, 12 Aug 2014 07:51:44 -0500
> Subject: [PATCH] Fix and optimize publish cache check
>
> * lisp/ox-publish.el: (org-publish-cache-file-needs-publishing) Fix
>   org-publish-cache-file-needs-publishing to change timestamp of files
>   containing includes. Speed up check for includes by storing
>   information about included files in cache itself.
>
> This patch ensures that org-publish-cache-file-needs-publishing does
> not keep publishing a file containing includes by updating the
> modification time of that file. It also speeds up publishing by
> caching information about included files, thus keeping
> org-publish-cache-file-needs-publishing from having to visit every
> file just to check includes (which can take a long time on a project
> containing hundreds or thousands of files).
> ---
>  lisp/ox-publish.el | 60 
> +++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 35 insertions(+), 25 deletions(-)
>
> diff --git a/lisp/ox-publish.el b/lisp/ox-publish.el
> index df40572..228411f 100644
> --- a/lisp/ox-publish.el
> +++ b/lisp/ox-publish.el
> @@ -1168,32 +1168,42 @@ the file including them will be republished as well."
>        (pstamp (org-publish-cache-get key))
>        (org-inhibit-startup t)
>        (visiting (find-buffer-visiting filename))
> -      included-files-ctime buf)
> -    (when (equal (file-name-extension filename) "org")
> -      (setq buf (find-file (expand-file-name filename)))
> +      (ctime (org-publish-cache-ctime-of-src filename))
> +      (needsp (or (null pstamp) (< pstamp ctime)))
> +      includes buf)
> +    ;; if the file needs publishing, refresh the included-files cache 
> property
> +    (when (and needsp
> +            (equal (file-name-extension filename) "org"))
> +      (setq buf (find-file-noselect (expand-file-name filename)))
>        (with-current-buffer buf
> -     (goto-char (point-min))
> -     (while (re-search-forward "^[ \t]*#\\+INCLUDE:" nil t)
> -       (let* ((element (org-element-at-point))
> -              (included-file
> -               (and (eq (org-element-type element) 'keyword)
> -                    (let ((value (org-element-property :value element)))
> -                      (and value
> -                           (string-match "^\\(\".+?\"\\|\\S-+\\)" value)
> -                           (org-remove-double-quotes
> -                            (match-string 1 value)))))))
> -         (when included-file
> -           (add-to-list 'included-files-ctime
> -                        (org-publish-cache-ctime-of-src
> -                         (expand-file-name included-file))
> -                        t)))))
> -      (unless visiting (kill-buffer buf)))
> -    (if (null pstamp) t
> -      (let ((ctime (org-publish-cache-ctime-of-src filename)))
> -     (or (< pstamp ctime)
> -         (when included-files-ctime
> -           (not (null (delq nil (mapcar (lambda (ct) (< ctime ct))
> -                                        included-files-ctime))))))))))
> +     (save-excursion
> +       (goto-char (point-min))
> +       (while (re-search-forward "^[ \t]*#\\+INCLUDE:" nil t)
> +         (let* ((element (org-element-at-point))
> +                (included-file
> +                 (and (eq (org-element-type element) 'keyword)
> +                      (let ((value (org-element-property :value element)))
> +                        (and value
> +                             (string-match "^\\(\".+?\"\\|\\S-+\\)" value)
> +                             (org-remove-double-quotes
> +                              (match-string 1 value)))))))
> +           (when included-file (add-to-list 'includes (expand-file-name 
> included-file)))))))
> +      (unless visiting (kill-buffer buf))
> +      (when includes (org-publish-cache-set-file-property filename :includes 
> includes)))
> +    ;; return t if needsp or if included files have changed
> +    (or needsp
> +     (when (delq nil
> +                 (mapcar (lambda (file)
> +                           (let ((ct (org-publish-cache-ctime-of-src file)))
> +                             (and (file-exists-p file)
> +                                  (< ctime ct))))
> +                         (org-publish-cache-get-file-property filename 
> :includes)))
> +       ;; update the timestamp of the published file, but only if buffer is 
> not modified
> +       (setq buf (find-file-noselect (expand-file-name filename)))
> +       (with-current-buffer buf
> +         (when (not (buffer-modified-p)) (set-buffer-modified-p t) 
> (save-buffer)))
> +       (unless visiting (kill-buffer buf))
> +       t))))
>  
>  (defun org-publish-cache-set-file-property
>    (filename property value &optional project-name)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]