emacs-orgmode
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] org-table-import: Make it more smarter for interactive use


From: Utkarsh Singh
Subject: Re: [PATCH] org-table-import: Make it more smarter for interactive use
Date: Fri, 23 Apr 2021 10:28:24 +0530
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/28.0.50 (gnu/linux)

Hi,

On 2021-04-20, 15:40 +0200, Nicolas Goaziou <mail@nicolasgoaziou.fr> wrote:

> Again all this needs to extensively tested, as there are a lot of
> dangers lurking around.

I am attaching my patch which also include my previous suggestion of
including yes-or-no prompt to org-table-import to allow file which don't
have csv, tsv or txt as extension.  Here are some concerns
with require your attention:

+ When using org-table-import interactively if we failed to guess
separator then we will be left with a user-error message and an
'unconverted table'.  We can make use of 'temp-buffer' to import our
file after successfully conversion.

+ Conversion part of org-table-convert-region make a distinction between
'(4) (comma separator) and rest of the separator we should either string
version of comma as AND condition or rewrite to simplify it.

I am willing to do these possible changes but currently waiting for your
review for org-table-guess-separator as there can be more serious bugs
lurking around on my code which I am considering base for these changes.

All the best,
Utkarsh

diff --git a/lisp/org/org-table.el b/lisp/org/org-table.el
index 0e93fb271f..84bc981fec 100644
--- a/lisp/org/org-table.el
+++ b/lisp/org/org-table.el
@@ -846,6 +846,42 @@ org-table-create
       (goto-char pos))
     (org-table-align)))
 
+
+(defun org-table-guess-separator (beg0 end0)
+  "Guess separator for `org-table-convert-region' for region BEG0 to END0.
+
+List of preferred separator:
+comma, TAB, semicolon, colon or SPACE.
+
+If region contains a line which doesn't contain the required
+separator then discard the separator and search again using next
+separator."
+  (let* ((beg (save-excursion
+                (goto-char (min beg0 end0))
+                (skip-chars-forward " \t\n")
+                (if (eobp) (point) (line-beginning-position))))
+        (end (save-excursion
+                (goto-char (max beg end0))
+                (skip-chars-backward " \t\n" beg)
+                (if (= beg (point)) (point) (line-end-position))))
+         (sep-regexp '((","  (rx bol (1+ (not (or ?\n ?,))) eol))
+                      ("\t" (rx bol (1+ (not (or ?\n ?\t))) eol))
+                      (";"  (rx bol (1+ (not (or ?\n ?\;))) eol))
+                      (":"  (rx bol (1+ (not (or ?\n ?:))) eol))
+                      (" "  (rx bol (1+ (not (or ?' ?\" ))
+                                         (not (or ?\s ?\;))
+                                         (not (or ?' ?\"))) eol))))
+         sep)
+    (unless (= beg end)
+      (save-excursion
+        (goto-char beg)
+        (catch :found
+          (pcase-dolist (`(,sep ,regexp) sep-regexp)
+            (save-excursion
+              (unless (re-search-forward (eval regexp) end t)
+                (throw :found sep))))
+          nil)))))
+
 ;;;###autoload
 (defun org-table-convert-region (beg0 end0 &optional separator)
   "Convert region to a table.
@@ -859,20 +895,19 @@ org-table-convert-region
 (4)     Use the comma as a field separator
 (16)    Use a TAB as field separator
 (64)    Prompt for a regular expression as field separator
-integer  When a number, use that many spaces, or a TAB, as field separator
-regexp   When a regular expression, use it to match the separator
-nil      When nil, the command tries to be smart and figure out the
-         separator in the following way:
-         - when each line contains a TAB, assume TAB-separated material
-         - when each line contains a comma, assume CSV material
-         - else, assume one or more SPACE characters as separator."
+integer When a number, use that many spaces, or a TAB, as field separator
+regexp  When a regular expression, use it to match the separator
+nil     When nil, the command tries to be smart and figure out the
+        separator using `org-table-guess-seperator'."
   (interactive "r\nP")
   (let* ((beg (min beg0 end0))
         (end (max beg0 end0))
         re)
+
     (if (> (count-lines beg end) org-table-convert-region-max-lines)
        (user-error "Region is longer than `org-table-convert-region-max-lines' 
(%s) lines; not converting"
                    org-table-convert-region-max-lines)
+
       (when (equal separator '(64))
        (setq separator (read-regexp "Regexp for field separator")))
       (goto-char beg)
@@ -881,17 +916,13 @@ org-table-convert-region
       (goto-char end)
       (if (bolp) (backward-char 1) (end-of-line 1))
       (setq end (point-marker))
-      ;; Get the right field separator
-      (unless separator
-       (goto-char beg)
-       (setq separator
-             (cond
-              ((not (re-search-forward "^[^\n\t]+$" end t)) '(16))
-              ((not (re-search-forward "^[^\n,]+$" end t)) '(4))
-              (t 1))))
+      (when (and (not separator)
+                 (not (setq separator
+                            (org-table-guess-separator (beg end)))))
+        (user-error "Failed to guess separator"))
       (goto-char beg)
       (if (equal separator '(4))
-         (while (< (point) end)
+          (while (< (point) end)
            ;; parse the csv stuff
            (cond
             ((looking-at "^") (insert "| "))
@@ -905,7 +936,7 @@ org-table-convert-region
        (setq re (cond
                  ((equal separator '(4)) "^\\|\"?[ \t]*,[ \t]*\"?")
                  ((equal separator '(16)) "^\\|\t")
-                 ((integerp separator)
+                 ((integerp separator)
                   (if (< separator 1)
                       (user-error "Number of spaces in separator must be >= 1")
                     (format "^ *\\| *\t *\\| \\{%d,\\}" separator)))
@@ -921,12 +952,8 @@ org-table-convert-region
 (defun org-table-import (file separator)
   "Import FILE as a table.
 
-The command tries to be smart and figure out the separator in the
-following way:
-
-- when each line contains a TAB, assume TAB-separated material;
-- when each line contains a comma, assume CSV material;
-- else, assume one or more SPACE characters as separator.
+The command tries to be smart and figure out the separator using
+`org-table-guess-seperator'.
 
 When non-nil, SEPARATOR specifies the field separator in the
 lines.  It can have the following values:
@@ -938,7 +965,8 @@ org-table-import
 - regexp  When a regular expression, use it to match the separator."
   (interactive "f\nP")
   (when (and (called-interactively-p 'any)
-            (not (string-match-p (rx "." (or "txt" "tsv" "csv") eos) file)))
+            (not (string-match-p (rx "." (or "txt" "tsv" "csv") eos) file))
+             (not (yes-or-no-p "File does not havs .txt .txt .csv as 
extension.  Do you still want to continue? ")))
     (user-error "Cannot import such file"))
   (unless (bolp) (insert "\n"))
   (let ((beg (point))
-- 
Utkarsh Singh
http://utkarshsingh.xyz

reply via email to

[Prev in Thread] Current Thread [Next in Thread]