emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/doc-toc 7e50cc20d0 53/84: Implement extract-only


From: ELPA Syncer
Subject: [elpa] externals/doc-toc 7e50cc20d0 53/84: Implement extract-only
Date: Mon, 26 Sep 2022 13:58:38 -0400 (EDT)

branch: externals/doc-toc
commit 7e50cc20d0a395ef29991c7a6023ade5336c00af
Author: Daniel Nicolai <dalanicolai@gmail.com>
Commit: Daniel Nicolai <dalanicolai@gmail.com>

    Implement extract-only
---
 toc-mode.el | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/toc-mode.el b/toc-mode.el
index d2dbfa279a..ccc61bd6fb 100644
--- a/toc-mode.el
+++ b/toc-mode.el
@@ -393,6 +393,40 @@ unprocessed text."
     (setq-local doc-buffer source-buffer)
     (insert text)))
 
+;;;###autoload
+(defun toc-extract-only ()
+  "Just extract text via OCR without further processing.
+Prompt for startpage and endpage and print OCR output to new buffer."
+  (interactive)
+  (let ((mode (derived-mode-p 'pdf-view-mode 'djvu-read-mode)))
+    (if mode
+        (let* ((page (string-to-number
+                      (read-string "Enter start-pagenumber for extraction: ")))
+               (endpage (string-to-number
+                         (read-string "Enter end-pagenumber for extraction: 
")))
+               (ext (url-file-extension (buffer-file-name (current-buffer))))
+               (buffer (file-name-sans-extension (buffer-name)))
+               (args (list "stdout" "--psm" "6")))
+          (when toc-ocr-languages
+            (setq args (append args (list "-l" toc-ocr-languages))))
+          (while (<= page (+ endpage))
+            (let ((file (cond ((string= ".pdf" ext)
+                               (make-temp-file "pageimage"
+                                               nil
+                                               (number-to-string page)
+                                               (pdf-cache-get-image page 600)))
+                              ((string= ".djvu" ext)
+                               (djvu-goto-page page)
+                               (make-temp-file "pageimage"
+                                               nil
+                                               (number-to-string page)
+                                               (image-property djvu-doc-image 
:data))))))
+              (apply 'call-process
+                     (append (list "tesseract" nil (list buffer nil) nil file)
+                             args))
+              (setq page (1+ page))))
+          (switch-to-buffer buffer)))))
+
 (defun toc--create-tablist-buffer ()
   "Create tablist buffer, from cleaned up Table of Contents buffer, for easy 
page number adjustment."
   (interactive)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]