guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Optimize 'get-bytevector-some'; it may now read less than possib


From: Mark H Weaver
Subject: [PATCH] Optimize 'get-bytevector-some'; it may now read less than possible
Date: Sun, 31 Mar 2013 21:32:53 -0400

Hello all,

This patch optimizes 'get-bytevector-some'.

Currently, 'get-bytevector-some' reads a single byte at a time, doing
both 'scm_get_byte_or_eof' and 'scm_char_ready_p' on each byte,
dynamically resizing the bytevector as it goes and then shrinking it at
the end.  Needless to say, it was horrendously inefficient.

This patch changes it to simply fill the read buffer (if empty) and then
copy the contents of the read/putback buffers into a bytevector that is
never resized.

The tradeoff is that whereas the old version did everything it could to
read as many bytes as possible (even to the point of calling 'poll' at
the OS level), this new version makes no guarantees beyond what is
required by the R6RS: at least one byte will be read (or EOF).

I'd like to push this to stable-2.0.  Any objections?

      Mark


>From cebdd9253b73c7caf18eb5a18b316680f7c69cc8 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sun, 31 Mar 2013 21:21:14 -0400
Subject: [PATCH] Optimize 'get-bytevector-some'; it may now read less than
 possible.

* libguile/r6rs-ports.c (scm_get_bytevector_some): Rewrite to
  efficiently take the contents of the read/putback buffers.

* test-suite/tests/r6rs-ports.test ("get-bytevector-some [only-some]"):
  Remove bogus test, which requires more than the R6RS requires.
---
 libguile/r6rs-ports.c            |   69 +++++++++++---------------------------
 test-suite/tests/r6rs-ports.test |   24 -------------
 2 files changed, 19 insertions(+), 74 deletions(-)

diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index 7ee7a69..f4b4f3f 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -556,65 +556,34 @@ SCM_DEFINE (scm_get_bytevector_some, 
"get-bytevector-some", 1, 0, 0,
            "end-of-file object.")
 #define FUNC_NAME s_scm_get_bytevector_some
 {
-  /* Read at least one byte, unless the end-of-file is already reached, and
-     read while characters are available (buffered).  */
-
-  SCM result;
-  char *c_bv;
-  unsigned c_len;
-  size_t c_total;
+  scm_t_port *pt;
+  size_t size;
+  SCM bv;
 
   SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
+  pt = SCM_PTAB_ENTRY (port);
 
-  c_len = 4096;
-  c_bv = (char *) scm_gc_malloc_pointerless (c_len, SCM_GC_BYTEVECTOR);
-  c_total = 0;
+  if (pt->rw_active == SCM_PORT_WRITE)
+    scm_ptobs[SCM_PTOBNUM (port)].flush (port);
 
-  do
-    {
-      int c_chr;
-
-      if (c_total + 1 > c_len)
-       {
-         /* Grow the bytevector.  */
-         c_bv = (char *) scm_gc_realloc (c_bv, c_len, c_len * 2,
-                                         SCM_GC_BYTEVECTOR);
-         c_len *= 2;
-       }
-
-      /* We can't use `scm_c_read ()' since it blocks.  */
-      c_chr = scm_get_byte_or_eof (port);
-      if (c_chr != EOF)
-       {
-         c_bv[c_total] = (char) c_chr;
-         c_total++;
-       }
-      else
-        break;
-    }
-  /* XXX: We want to check for the availability of a byte, but that's
-     what `scm_char_ready_p' actually does.  */
-  while (scm_is_true (scm_char_ready_p (port)));
+  if (pt->rw_random)
+    pt->rw_active = SCM_PORT_READ;
 
-  if (c_total == 0)
+  if (pt->read_pos >= pt->read_end)
     {
-      result = SCM_EOF_VAL;
-      scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
+      if (scm_fill_input (port) == EOF)
+       return SCM_EOF_VAL;
     }
-  else
-    {
-      if (c_len > c_total)
-       {
-         /* Shrink the bytevector.  */
-         c_bv = (char *) scm_gc_realloc (c_bv, c_len, c_total,
-                                         SCM_GC_BYTEVECTOR);
-         c_len = (unsigned) c_total;
-       }
 
-      result = scm_c_take_gc_bytevector ((signed char *) c_bv, c_len);
-    }
+  size = pt->read_end - pt->read_pos;
+  if (pt->read_buf == pt->putback_buf)
+    size += pt->saved_read_end - pt->saved_read_pos;
 
-  return result;
+  bv = scm_c_make_bytevector (size);
+  scm_take_from_input_buffers
+    (port, (char *) SCM_BYTEVECTOR_CONTENTS (bv), size);
+
+  return bv;
 }
 #undef FUNC_NAME
 
diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test
index ed49598..2db2c56 100644
--- a/test-suite/tests/r6rs-ports.test
+++ b/test-suite/tests/r6rs-ports.test
@@ -163,30 +163,6 @@
            (equal? (bytevector->u8-list bv)
                    (map char->integer (string->list str))))))
 
-  (pass-if "get-bytevector-some [only-some]"
-    (let* ((str   "GNU Guile")
-           (index 0)
-           (port  (make-soft-port
-                   (vector #f #f #f
-                           (lambda ()
-                             (if (>= index (string-length str))
-                                 (eof-object)
-                                 (let ((c (string-ref str index)))
-                                   (set! index (+ index 1))
-                                   c)))
-                           (lambda () #t)
-                           (lambda ()
-                             ;; Number of readily available octets: falls to
-                             ;; zero after 4 octets have been read.
-                             (- 4 (modulo index 5))))
-                   "r"))
-           (bv    (get-bytevector-some port)))
-      (and (bytevector? bv)
-           (= index 4)
-           (= (bytevector-length bv) index)
-           (equal? (bytevector->u8-list bv)
-                   (map char->integer (string->list "GNU "))))))
-
   (pass-if "get-bytevector-all"
     (let* ((str   "GNU Guile")
            (index 0)
-- 
1.7.10.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]