bug-guile
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#30066: 'get-bytevector-some' returns only 1 byte from unbuffered por


From: Ludovic Courtès
Subject: bug#30066: 'get-bytevector-some' returns only 1 byte from unbuffered ports
Date: Thu, 11 Jan 2018 15:34:17 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/25.3 (gnu/linux)

Hello,

Andy Wingo <address@hidden> skribis:

> There are tabs in your code; would you mind doing only spaces?
>
> A port being unbuffered doesn't mean that it has no bytes in its
> buffer.  In particular, scm_unget_bytes may put bytes back into the
> buffer.  Or, peek-u8 might fill this buffer with one byte.
>
> Also, they port may have buffered write bytes (could be the port has
> write buffering but no read buffering).  In that case (pt->rw_random)
> you need to scm_flush().
>
> I suggest taking the buffered bytes from the read buffer, if any.  Then
> if the port is unbuffered, make a bytevector and call scm_i_read_bytes;
> otherwise do the scm_fill_input path that's there already.
>
> One more thing, if the port goes EOF, you need to
> scm_port_buffer_set_has_eof_p.

I think the attached patch addresses these issues.  WDYT?

Thanks for the review!

Ludo’.

>From d3a60bac6c6aae62ced6eec21b3865caaab83bb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= <address@hidden>
Date: Thu, 11 Jan 2018 15:29:55 +0100
Subject: [PATCH] 'get-bytevector-some' reads as much as possible without
 blocking.

Fixes <https://bugs.gnu.org/30066>.

* libguile/ports.c (scm_i_read_bytes): Remove 'static' keyword.
* libguile/ports.h (SCM_UNBUFFEREDP): New macro.
(scm_i_read_bytes): New declaration.
* libguile/r6rs-ports.c (scm_get_bytevector_some): When PORT is
unbuffered, invoke 'scm_i_read_bytes' to read as much as we can.
* test-suite/tests/r6rs-ports.test ("8.2.8 Binary Input")
["get-bytevector-some [unbuffered port]"]
["get-bytevector-some [unbuffered port, lookahead-u8]"]
["get-bytevector-some [unbuffered port, unget-bytevector]"]: New tests.
---
 libguile/ports.c                 |  6 ++++--
 libguile/ports.h                 |  7 ++++++-
 libguile/r6rs-ports.c            | 34 ++++++++++++++++++++++++++++------
 test-suite/tests/r6rs-ports.test | 36 ++++++++++++++++++++++++++++++++++--
 4 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/libguile/ports.c b/libguile/ports.c
index 72bb73a01..002dd1433 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2001, 2003-2004, 2006-2017
+/* Copyright (C) 1995-2001, 2003-2004, 2006-2018
  * Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
@@ -1543,7 +1543,9 @@ scm_peek_byte_or_eof (SCM port)
   return peek_byte_or_eof (port, &buf, &cur);
 }
 
-static size_t
+/* Like read(2), read *up to* COUNT bytes from PORT into DST, starting
+   at OFFSET.  Return 0 upon EOF.  */
+size_t
 scm_i_read_bytes (SCM port, SCM dst, size_t start, size_t count)
 {
   size_t filled;
diff --git a/libguile/ports.h b/libguile/ports.h
index d131db5be..3fe64c27d 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -4,7 +4,7 @@
 #define SCM_PORTS_H
 
 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
- *   2006, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, 
Inc.
+ *   2006, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2018 Free Software 
Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -71,7 +71,10 @@ SCM_INTERNAL SCM scm_i_port_weak_set;
 #define SCM_CLOSEDP(x) (!SCM_OPENP (x))
 #define SCM_CLR_PORT_OPEN_FLAG(p) \
   SCM_SET_CELL_WORD_0 ((p), SCM_CELL_WORD_0 (p) & ~SCM_OPN)
+
 #ifdef BUILDING_LIBGUILE
+#define SCM_UNBUFFEREDP(x)                             \
+  (SCM_PORTP (x) && (SCM_CELL_WORD_0 (x) & SCM_BUF0))
 #define SCM_PORT_FINALIZING_P(x) \
   (SCM_CELL_WORD_0 (x) & SCM_F_PORT_FINALIZING)
 #define SCM_SET_PORT_FINALIZING(p) \
@@ -185,6 +188,8 @@ SCM_API int scm_get_byte_or_eof (SCM port);
 SCM_API int scm_peek_byte_or_eof (SCM port);
 SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
 SCM_API size_t scm_c_read_bytes (SCM port, SCM dst, size_t start, size_t 
count);
+SCM_INTERNAL size_t scm_i_read_bytes (SCM port, SCM dst, size_t start,
+                                     size_t count);
 SCM_API scm_t_wchar scm_getc (SCM port);
 SCM_API SCM scm_read_char (SCM port);
 
diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index e944c7aab..a3d638ca0 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009, 2010, 2011, 2013-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2010, 2011, 2013-2015, 2018 Free Software Foundation, 
Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -481,9 +481,9 @@ SCM_DEFINE (scm_get_bytevector_some, "get-bytevector-some", 
1, 0, 0,
             "position to point just past these bytes.")
 #define FUNC_NAME s_scm_get_bytevector_some
 {
-  SCM buf;
+  SCM buf, bv;
   size_t cur, avail;
-  SCM bv;
+  const size_t max_buffer_size = 4096;
 
   SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
 
@@ -494,9 +494,31 @@ SCM_DEFINE (scm_get_bytevector_some, 
"get-bytevector-some", 1, 0, 0,
       return SCM_EOF_VAL;
     }
 
-  bv = scm_c_make_bytevector (avail);
-  scm_port_buffer_take (buf, (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv),
-                        avail, cur, avail);
+  if (SCM_UNBUFFEREDP (port) && (avail < max_buffer_size))
+    {
+      /* PORT is unbuffered.  Read as much as possible from PORT.  */
+      size_t read;
+
+      bv = scm_c_make_bytevector (max_buffer_size);
+      scm_port_buffer_take (buf, (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv),
+                            avail, cur, avail);
+
+      read = scm_i_read_bytes (port, bv, avail,
+                               SCM_BYTEVECTOR_LENGTH (bv) - avail);
+
+      if (read == 0)
+        scm_port_buffer_set_has_eof_p (buf, SCM_BOOL_F);
+
+      if (read + avail < SCM_BYTEVECTOR_LENGTH (bv))
+        bv = scm_c_shrink_bytevector (bv, read + avail);
+    }
+  else
+    {
+      /* Return what's already buffered.  */
+      bv = scm_c_make_bytevector (avail);
+      scm_port_buffer_take (buf, (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv),
+                            avail, cur, avail);
+    }
 
   return bv;
 }
diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test
index ba3131f2e..d5476f20e 100644
--- a/test-suite/tests/r6rs-ports.test
+++ b/test-suite/tests/r6rs-ports.test
@@ -1,6 +1,6 @@
 ;;;; r6rs-ports.test --- R6RS I/O port tests.   -*- coding: utf-8; -*-
 ;;;;
-;;;; Copyright (C) 2009-2012, 2013-2015 Free Software Foundation, Inc.
+;;;; Copyright (C) 2009-2012, 2013-2015, 2018 Free Software Foundation, Inc.
 ;;;; Ludovic Courtès
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
@@ -26,7 +26,8 @@
   #:use-module (rnrs io ports)
   #:use-module (rnrs io simple)
   #:use-module (rnrs exceptions)
-  #:use-module (rnrs bytevectors))
+  #:use-module (rnrs bytevectors)
+  #:use-module ((ice-9 binary-ports) #:select (unget-bytevector)))
 
 (define-syntax pass-if-condition
   (syntax-rules ()
@@ -183,6 +184,37 @@
            (equal? (bytevector->u8-list bv)
                    (map char->integer (string->list str))))))
 
+  (pass-if-equal "get-bytevector-some [unbuffered port]"
+      (string->utf8 "Hello, world!")
+    ;; 'get-bytevector-some' used to return a single byte, see
+    ;; <https://bugs.gnu.org/30066>.
+    (call-with-input-string "Hello, world!"
+      (lambda (port)
+        (setvbuf port _IONBF)
+        (get-bytevector-some port))))
+
+  (pass-if-equal "get-bytevector-some [unbuffered port, lookahead-u8]"
+      (string->utf8 "Hello, world!")
+    (call-with-input-string "Hello, world!"
+      (lambda (port)
+        (setvbuf port _IONBF)
+
+        ;; 'lookahead-u8' fills in PORT's 1-byte buffer.  Yet,
+        ;; 'get-bytevector-some' should return the whole thing.
+        (and (eqv? (lookahead-u8 port) (char->integer #\H))
+             (get-bytevector-some port)))))
+
+  (pass-if-equal "get-bytevector-some [unbuffered port, unget-bytevector]"
+      (string->utf8 "Hello")
+    (call-with-input-string "Hello, world!"
+      (lambda (port)
+        (setvbuf port _IONBF)
+        ;; 'unget-bytevector' fills the putback buffer, and
+        ;; 'get-bytevector-some' should get data from there.
+        (unget-bytevector port (get-bytevector-all port)
+                          0 5)
+        (get-bytevector-some port))))
+
   (pass-if "get-bytevector-all"
     (let* ((str   "GNU Guile")
            (index 0)
-- 
2.15.1


reply via email to

[Prev in Thread] Current Thread [Next in Thread]