guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Add private port structure, and move iconv descriptors there


From: Mark H Weaver
Subject: Re: [PATCH] Add private port structure, and move iconv descriptors there
Date: Sun, 31 Mar 2013 15:44:18 -0400
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.3 (gnu/linux)

Andy Wingo <address@hidden> writes:
> Can you take a look at porting
> 6c98257f2ead0855f218369ea7f9a823cdb9727e?

Okay, here's an updated patch set that's essentially a backport of that
commit from master, but with 'encoding_mode' and 'iconv_descriptors' put
in the internal-only port structure.

What do you think?

      Mark


>From aeef3dc501dd598e5b3dea3fc5d1552a032d3917 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sat, 30 Mar 2013 23:09:01 -0400
Subject: [PATCH 1/3] Add 'scm_gc_typed_calloc' macro.

* libguile/gc.h (scm_gc_typed_calloc): New macro.
---
 libguile/gc.h |    1 +
 1 file changed, 1 insertion(+)

diff --git a/libguile/gc.h b/libguile/gc.h
index 9f00e01..c6f7a4b 100644
--- a/libguile/gc.h
+++ b/libguile/gc.h
@@ -207,6 +207,7 @@ SCM_API char *scm_gc_strdup (const char *str, const char 
*what)
 SCM_API char *scm_gc_strndup (const char *str, size_t n, const char *what)
   SCM_MALLOC;
 
+#define scm_gc_typed_calloc(t) ((t *) scm_gc_calloc (sizeof (t), #t))
 
 #ifdef BUILDING_LIBGUILE
 #include "libguile/bdw-gc.h"
-- 
1.7.10.4

>From 9eb48ea34c3f22f178cb1d889b93911283395a91 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sat, 30 Mar 2013 22:34:56 -0400
Subject: [PATCH 2/3] Add internal-only port structure and move iconv
 descriptors there.

* libguile/ports-internal.h: New file.

* libguile/Makefile.am (noinst_HEADERS): Add ports-internal.h.

* libguile/ports.h (scm_t_port): Add a comment mentioning that the
  'input_cd' and 'output_cd' fields of the public structure are no
  longer what they seem to be.

* libguile/ports.c: Include ports-internal.h.

  (finalize_port, scm_i_remove_port, get_iconv_codepoint, get_codepoint,
  scm_i_set_port_encoding_x): Access 'input_cd' and 'output_cd' via the
  new internal port structure.

  (scm_new_port_table_entry): Allocate and initialize the internal port
  structure.

* libguile/print.c: Include ports-internal.h.

  (display_string_using_iconv, display_string): Access 'input_cd' and
  'output_cd' via 'internal' pointer.
---
 libguile/Makefile.am      |    2 +-
 libguile/ports-internal.h |   40 ++++++++++++++++++++++++++++
 libguile/ports.c          |   65 +++++++++++++++++++++++++++------------------
 libguile/ports.h          |   19 ++++++++++---
 libguile/print.c          |   15 ++++++-----
 5 files changed, 105 insertions(+), 36 deletions(-)
 create mode 100644 libguile/ports-internal.h

diff --git a/libguile/Makefile.am b/libguile/Makefile.am
index 4b1f96b..0c4b6ce 100644
--- a/libguile/Makefile.am
+++ b/libguile/Makefile.am
@@ -455,7 +455,7 @@ noinst_HEADERS = conv-integer.i.c conv-uinteger.i.c         
\
                  srfi-14.i.c                                   \
                  quicksort.i.c                                  \
                  win32-uname.h                                 \
-                private-gc.h private-options.h
+                private-gc.h private-options.h ports-internal.h
 
 # vm instructions
 noinst_HEADERS += vm-engine.c vm-i-system.c vm-i-scheme.c vm-i-loader.c
diff --git a/libguile/ports-internal.h b/libguile/ports-internal.h
new file mode 100644
index 0000000..eba91c8
--- /dev/null
+++ b/libguile/ports-internal.h
@@ -0,0 +1,40 @@
+/*
+ * ports-internal.h - internal-only declarations for ports.
+ *
+ * Copyright (C) 2013 Free Software Foundation, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef SCM_PORTS_INTERNAL
+#define SCM_PORTS_INTERNAL
+
+#include "libguile/_scm.h"
+#include "libguile/ports.h"
+
+struct scm_port_internal
+{
+  /* input/output iconv conversion descriptors */
+  void *input_cd;
+  void *output_cd;
+};
+
+typedef struct scm_port_internal scm_t_port_internal;
+
+#define SCM_INTERNAL_PTAB_ENTRY(x)                              \
+  ((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
+
+#endif
diff --git a/libguile/ports.c b/libguile/ports.c
index 8737a76..86db4a0 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -55,6 +55,7 @@
 #include "libguile/mallocs.h"
 #include "libguile/validate.h"
 #include "libguile/ports.h"
+#include "libguile/ports-internal.h"
 #include "libguile/vectors.h"
 #include "libguile/weaks.h"
 #include "libguile/fluids.h"
@@ -576,7 +577,7 @@ finalize_port (void *ptr, void *data)
        register_finalizer_for_port (port);
       else
        {
-         scm_t_port *entry;
+          scm_t_port_internal *pti;
 
          port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
          if (port_type >= scm_numptob)
@@ -587,12 +588,12 @@ finalize_port (void *ptr, void *data)
               is for explicit `close-port' by user.  */
            scm_ptobs[port_type].free (port);
 
-         entry = SCM_PTAB_ENTRY (port);
+         pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
-         if (entry->input_cd != (iconv_t) -1)
-           iconv_close (entry->input_cd);
-         if (entry->output_cd != (iconv_t) -1)
-           iconv_close (entry->output_cd);
+         if (pti->input_cd != (iconv_t) -1)
+           iconv_close (pti->input_cd);
+         if (pti->output_cd != (iconv_t) -1)
+           iconv_close (pti->output_cd);
 
          SCM_SETSTREAM (port, 0);
          SCM_CLR_PORT_OPEN_FLAG (port);
@@ -617,7 +618,8 @@ scm_new_port_table_entry (scm_t_bits tag)
    */
   
   SCM z = scm_cons (SCM_EOL, SCM_EOL);
-  scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), 
"port");
+  scm_t_port *entry = scm_gc_typed_calloc (scm_t_port);
+  scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal);
   const char *enc;
 
   entry->file_name = SCM_BOOL_F;
@@ -630,11 +632,17 @@ scm_new_port_table_entry (scm_t_bits tag)
   entry->encoding = enc ? scm_gc_strdup (enc, "port") : NULL;
 
   /* The conversion descriptors will be opened lazily.  */
-  entry->input_cd = (iconv_t) -1;
-  entry->output_cd = (iconv_t) -1;
+  pti->input_cd = (iconv_t) -1;
+  pti->output_cd = (iconv_t) -1;
 
   entry->ilseq_handler = scm_i_default_port_conversion_handler ();
 
+  /* XXX These fields are not what they seem.  They have been
+     repurposed, but cannot safely be renamed in 2.0 without breaking
+     ABI compatibility.  This will be cleaned up in 2.2.  */
+  entry->input_cd = pti;   /* XXX pointer to the internal port structure */
+  entry->output_cd = NULL; /* XXX unused */
+
   SCM_SET_CELL_TYPE (z, tag);
   SCM_SETPTAB_ENTRY (z, entry);
 
@@ -678,24 +686,26 @@ scm_i_remove_port (SCM port)
 #define FUNC_NAME "scm_remove_port"
 {
   scm_t_port *p;
+  scm_t_port_internal *pti;
 
   scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
 
   p = SCM_PTAB_ENTRY (port);
+  pti = SCM_INTERNAL_PTAB_ENTRY (port);
   scm_port_non_buffer (p);
   p->putback_buf = NULL;
   p->putback_buf_size = 0;
 
-  if (p->input_cd != (iconv_t) -1)
+  if (pti->input_cd != (iconv_t) -1)
     {
-      iconv_close (p->input_cd);
-      p->input_cd = (iconv_t) -1;
+      iconv_close (pti->input_cd);
+      pti->input_cd = (iconv_t) -1;
     }
   
-  if (p->output_cd != (iconv_t) -1)
+  if (pti->output_cd != (iconv_t) -1)
     {
-      iconv_close (p->output_cd);
-      p->output_cd = (iconv_t) -1;
+      iconv_close (pti->output_cd);
+      pti->output_cd = (iconv_t) -1;
     }
 
   SCM_SETPTAB_ENTRY (port, 0);
@@ -1296,13 +1306,13 @@ static int
 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
                     char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
 {
-  scm_t_port *pt;
+  scm_t_port_internal *pti;
   int err, byte_read;
   size_t bytes_consumed, output_size;
   char *output;
   scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
 
-  pt = SCM_PTAB_ENTRY (port);
+  pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
   for (output_size = 0, output = (char *) utf8_buf,
         bytes_consumed = 0, err = 0;
@@ -1332,7 +1342,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
       input_left = bytes_consumed + 1;
       output_left = sizeof (utf8_buf);
 
-      done = iconv (pt->input_cd, &input, &input_left,
+      done = iconv (pti->input_cd, &input, &input_left,
                    &output, &output_left);
       if (done == (size_t) -1)
        {
@@ -1368,13 +1378,14 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
 {
   int err;
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  scm_t_port_internal *pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
-  if (pt->input_cd == (iconv_t) -1)
+  if (pti->input_cd == (iconv_t) -1)
     /* Initialize the conversion descriptors, if needed.  */
     scm_i_set_port_encoding_x (port, pt->encoding);
 
   /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8.  */
-  if (pt->input_cd == (iconv_t) -1)
+  if (pti->input_cd == (iconv_t) -1)
     err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
   else
     err = get_iconv_codepoint (port, codepoint, buf, len);
@@ -2208,6 +2219,7 @@ void
 scm_i_set_port_encoding_x (SCM port, const char *encoding)
 {
   scm_t_port *pt;
+  scm_t_port_internal *pti;
   iconv_t new_input_cd, new_output_cd;
 
   new_input_cd = (iconv_t) -1;
@@ -2215,6 +2227,7 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
 
   /* Set the character encoding for this port.  */
   pt = SCM_PTAB_ENTRY (port);
+  pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
   if (encoding == NULL)
     encoding = "ISO-8859-1";
@@ -2251,13 +2264,13 @@ scm_i_set_port_encoding_x (SCM port, const char 
*encoding)
        }
     }
 
-  if (pt->input_cd != (iconv_t) -1)
-    iconv_close (pt->input_cd);
-  if (pt->output_cd != (iconv_t) -1)
-    iconv_close (pt->output_cd);
+  if (pti->input_cd != (iconv_t) -1)
+    iconv_close (pti->input_cd);
+  if (pti->output_cd != (iconv_t) -1)
+    iconv_close (pti->output_cd);
 
-  pt->input_cd = new_input_cd;
-  pt->output_cd = new_output_cd;
+  pti->input_cd = new_input_cd;
+  pti->output_cd = new_output_cd;
 
   return;
 
diff --git a/libguile/ports.h b/libguile/ports.h
index d4d59b7..6791a5c 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -112,9 +112,22 @@ typedef struct
   unsigned char *putback_buf;
   size_t putback_buf_size;        /* allocated size of putback_buf.  */
 
-  /* input/output iconv conversion descriptors */
-  void *input_cd;
-  void *output_cd;
+  /* IMPORTANT: 'input_cd' and 'output_cd' used to be pointers to the
+     input and output iconv descriptors, but those have been moved to
+     the internal-only port structure defined in ports-internal.h.
+
+     Given that we must preserve ABI compatibility in 2.0, we cannot
+     safely change this public structure without running afoul of C
+     strict aliasing rules.  We cannot even change the member names.
+
+     To work around this, in this public structure, 'input_cd' has been
+     repurposed to be a pointer to the internal port structure (see
+     ports-internal.h), and 'output_cd' is now unused.
+
+     This will be cleaned up in 2.2.  */
+
+  void *input_cd;   /* XXX actually a pointer to scm_t_port_internal */
+  void *output_cd;  /* XXX actually unused */
 } scm_t_port;
 
 
diff --git a/libguile/print.c b/libguile/print.c
index 647eed8..62d7267 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -45,6 +45,7 @@
 #include "libguile/alist.h"
 #include "libguile/struct.h"
 #include "libguile/ports.h"
+#include "libguile/ports-internal.h"
 #include "libguile/root.h"
 #include "libguile/strings.h"
 #include "libguile/strports.h"
@@ -879,9 +880,9 @@ display_string_using_iconv (const void *str, int narrow_p, 
size_t len,
                            scm_t_string_failed_conversion_handler strategy)
 {
   size_t printed;
-  scm_t_port *pt;
+  scm_t_port_internal *pti;
 
-  pt = SCM_PTAB_ENTRY (port);
+  pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
   printed = 0;
 
@@ -910,7 +911,7 @@ display_string_using_iconv (const void *str, int narrow_p, 
size_t len,
       output = encoded_output;
       output_left = sizeof (encoded_output);
 
-      done = iconv (pt->output_cd, &input, &input_left,
+      done = iconv (pti->output_cd, &input, &input_left,
                    &output, &output_left);
 
       output_len = sizeof (encoded_output) - output_left;
@@ -920,7 +921,7 @@ display_string_using_iconv (const void *str, int narrow_p, 
size_t len,
           int errno_save = errno;
 
          /* Reset the `iconv' state.  */
-         iconv (pt->output_cd, NULL, NULL, NULL, NULL);
+         iconv (pti->output_cd, NULL, NULL, NULL, NULL);
 
          /* Print the OUTPUT_LEN bytes successfully converted.  */
          scm_lfwrite (encoded_output, output_len, port);
@@ -981,15 +982,17 @@ display_string (const void *str, int narrow_p,
 
 {
   scm_t_port *pt;
+  scm_t_port_internal *pti;
 
   pt = SCM_PTAB_ENTRY (port);
+  pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
-  if (pt->output_cd == (iconv_t) -1)
+  if (pti->output_cd == (iconv_t) -1)
     /* Initialize the conversion descriptors, if needed.  */
     scm_i_set_port_encoding_x (port, pt->encoding);
 
   /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8.  */
-  if (pt->output_cd == (iconv_t) -1)
+  if (pti->output_cd == (iconv_t) -1)
     return display_string_as_utf8 (str, narrow_p, len, port);
   else
     return display_string_using_iconv (str, narrow_p, len,
-- 
1.7.10.4

>From b5304dfe27075784532a73ae2a587612ddc717fd Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sun, 31 Mar 2013 15:23:32 -0400
Subject: [PATCH 3/3] Refactor port encoding modes: utf-8 and iconv

Based on 6c98257f2ead0855f218369ea7f9a823cdb9727e by Andy Wingo.

* libguile/ports-internal.h (struct scm_port_internal): Add a flag
  for the port encoding mode: UTF8 or iconv.  The iconv descriptors
  are now in a separate structure so that we can avoid attaching
  finalizers to the ports themselves in the future.
  (enum scm_port_encoding_mode): New enum.
  (struct scm_iconv_descriptors): New struct.
  (scm_i_port_iconv_descriptors): Add prototype.

* libguile/ports.c (finalize_port): Don't close iconv descriptors here.
  (scm_new_port_table_entry): Adapt to the iconv descriptors being
  moved.  Initialize 'encoding_mode'.
  (scm_i_remove_port): Adapt to call 'close_iconv_descriptors'.
  (close_iconv_descriptors): New static function.
  (get_iconv_codepoint): Use 'scm_i_port_iconv_descriptors'.
  (get_codepoint): Check the port 'encoding_mode'.
  (finalize_iconv_descriptors, open_iconv_descriptors,
  close_iconv_descriptors, scm_i_port_iconv_descriptors): New static
  functions.
  (scm_i_set_port_encoding_x): Adapt to iconv descriptors being moved
  to separate structure, to set the 'encoding_mode' flag, and to use
  'open_iconv_descriptors' and 'close_iconv_descriptors'.

* libguile/print.c (display_string_using_iconv): Use
  'scm_i_port_iconv_descriptors'.
  (display_string): Use 'encoding_mode' flag.
---
 libguile/ports-internal.h |   22 ++++-
 libguile/ports.c          |  229 ++++++++++++++++++++++++++++-----------------
 libguile/print.c          |   17 +---
 3 files changed, 169 insertions(+), 99 deletions(-)

diff --git a/libguile/ports-internal.h b/libguile/ports-internal.h
index eba91c8..d52eab2 100644
--- a/libguile/ports-internal.h
+++ b/libguile/ports-internal.h
@@ -25,16 +25,36 @@
 #include "libguile/_scm.h"
 #include "libguile/ports.h"
 
-struct scm_port_internal
+enum scm_port_encoding_mode {
+  SCM_PORT_ENCODING_MODE_UTF8,
+  SCM_PORT_ENCODING_MODE_ICONV
+};
+
+typedef enum scm_port_encoding_mode scm_t_port_encoding_mode;
+
+/* This is a separate object so that only those ports that use iconv
+   cause finalizers to be registered (FIXME: although currently in 2.0
+   finalizers are always registered for ports anyway).  */
+struct scm_iconv_descriptors
 {
   /* input/output iconv conversion descriptors */
   void *input_cd;
   void *output_cd;
 };
 
+typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
+
+struct scm_port_internal
+{
+  scm_t_port_encoding_mode encoding_mode;
+  scm_t_iconv_descriptors *iconv_descriptors;
+};
+
 typedef struct scm_port_internal scm_t_port_internal;
 
 #define SCM_INTERNAL_PTAB_ENTRY(x)                              \
   ((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
 
+SCM_INTERNAL scm_t_iconv_descriptors *scm_i_port_iconv_descriptors (SCM port);
+
 #endif
diff --git a/libguile/ports.c b/libguile/ports.c
index 86db4a0..3c67e67 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -555,8 +555,8 @@ static void finalize_port (void *, void *);
 static SCM_C_INLINE_KEYWORD void
 register_finalizer_for_port (SCM port)
 {
-  /* Register a finalizer for PORT so that its iconv CDs get freed and
-     optionally its type's `free' function gets called.  */
+  /* Register a finalizer for PORT so that its
+     type's `free' function gets called.  */
   scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
 }
 
@@ -577,8 +577,6 @@ finalize_port (void *ptr, void *data)
        register_finalizer_for_port (port);
       else
        {
-          scm_t_port_internal *pti;
-
          port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
          if (port_type >= scm_numptob)
            abort ();
@@ -588,13 +586,6 @@ finalize_port (void *ptr, void *data)
               is for explicit `close-port' by user.  */
            scm_ptobs[port_type].free (port);
 
-         pti = SCM_INTERNAL_PTAB_ENTRY (port);
-
-         if (pti->input_cd != (iconv_t) -1)
-           iconv_close (pti->input_cd);
-         if (pti->output_cd != (iconv_t) -1)
-           iconv_close (pti->output_cd);
-
          SCM_SETSTREAM (port, 0);
          SCM_CLR_PORT_OPEN_FLAG (port);
 
@@ -620,7 +611,7 @@ scm_new_port_table_entry (scm_t_bits tag)
   SCM z = scm_cons (SCM_EOL, SCM_EOL);
   scm_t_port *entry = scm_gc_typed_calloc (scm_t_port);
   scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal);
-  const char *enc;
+  const char *encoding;
 
   entry->file_name = SCM_BOOL_F;
   entry->rw_active = SCM_PORT_NEITHER;
@@ -628,14 +619,14 @@ scm_new_port_table_entry (scm_t_bits tag)
 
   /* Initialize this port with the thread's current default
      encoding.  */
-  enc = scm_i_default_port_encoding ();
-  entry->encoding = enc ? scm_gc_strdup (enc, "port") : NULL;
-
-  /* The conversion descriptors will be opened lazily.  */
-  pti->input_cd = (iconv_t) -1;
-  pti->output_cd = (iconv_t) -1;
-
+  encoding = scm_i_default_port_encoding ();
   entry->ilseq_handler = scm_i_default_port_conversion_handler ();
+  entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
+  if (encoding && strcmp (encoding, "UTF-8") == 0)
+    pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+  else
+    pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+  pti->iconv_descriptors = NULL;
 
   /* XXX These fields are not what they seem.  They have been
      repurposed, but cannot safely be renamed in 2.0 without breaking
@@ -681,6 +672,8 @@ scm_add_to_port_table (SCM port)
 
 /* Remove a port from the table and destroy it.  */
 
+static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
+
 static void
 scm_i_remove_port (SCM port)
 #define FUNC_NAME "scm_remove_port"
@@ -696,16 +689,10 @@ scm_i_remove_port (SCM port)
   p->putback_buf = NULL;
   p->putback_buf_size = 0;
 
-  if (pti->input_cd != (iconv_t) -1)
+  if (pti->iconv_descriptors)
     {
-      iconv_close (pti->input_cd);
-      pti->input_cd = (iconv_t) -1;
-    }
-  
-  if (pti->output_cd != (iconv_t) -1)
-    {
-      iconv_close (pti->output_cd);
-      pti->output_cd = (iconv_t) -1;
+      close_iconv_descriptors (pti->iconv_descriptors);
+      pti->iconv_descriptors = NULL;
     }
 
   SCM_SETPTAB_ENTRY (port, 0);
@@ -1306,13 +1293,13 @@ static int
 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
                     char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
 {
-  scm_t_port_internal *pti;
+  scm_t_iconv_descriptors *id;
   int err, byte_read;
   size_t bytes_consumed, output_size;
   char *output;
   scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
 
-  pti = SCM_INTERNAL_PTAB_ENTRY (port);
+  id = scm_i_port_iconv_descriptors (port);
 
   for (output_size = 0, output = (char *) utf8_buf,
         bytes_consumed = 0, err = 0;
@@ -1342,8 +1329,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
       input_left = bytes_consumed + 1;
       output_left = sizeof (utf8_buf);
 
-      done = iconv (pti->input_cd, &input, &input_left,
-                   &output, &output_left);
+      done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
       if (done == (size_t) -1)
        {
          err = errno;
@@ -1380,12 +1366,7 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
   scm_t_port_internal *pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
-  if (pti->input_cd == (iconv_t) -1)
-    /* Initialize the conversion descriptors, if needed.  */
-    scm_i_set_port_encoding_x (port, pt->encoding);
-
-  /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8.  */
-  if (pti->input_cd == (iconv_t) -1)
+  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
     err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
   else
     err = get_iconv_codepoint (port, codepoint, buf, len);
@@ -2215,73 +2196,149 @@ scm_i_default_port_encoding (void)
     }
 }
 
+static void
+finalize_iconv_descriptors (GC_PTR ptr, GC_PTR data)
+{
+  close_iconv_descriptors (ptr);
+}
+
+static scm_t_iconv_descriptors *
+open_iconv_descriptors (const char *encoding, int reading, int writing)
+{
+  scm_t_iconv_descriptors *id;
+  iconv_t input_cd, output_cd;
+
+  input_cd = (iconv_t) -1;
+  output_cd = (iconv_t) -1;
+  if (reading)
+    {
+      /* Open an input iconv conversion descriptor, from ENCODING
+         to UTF-8.  We choose UTF-8, not UTF-32, because iconv
+         implementations can typically convert from anything to
+         UTF-8, but not to UTF-32 (see
+         
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>).  */
+
+      /* Assume opening an iconv descriptor causes about 16 KB of
+         allocation.  */
+      scm_gc_register_allocation (16 * 1024);
+
+      input_cd = iconv_open ("UTF-8", encoding);
+      if (input_cd == (iconv_t) -1)
+        goto invalid_encoding;
+    }
+
+  if (writing)
+    {
+      /* Assume opening an iconv descriptor causes about 16 KB of
+         allocation.  */
+      scm_gc_register_allocation (16 * 1024);
+
+      output_cd = iconv_open (encoding, "UTF-8");
+      if (output_cd == (iconv_t) -1)
+        {
+          if (input_cd != (iconv_t) -1)
+            iconv_close (input_cd);
+          goto invalid_encoding;
+        }
+    }
+
+  id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
+  id->input_cd = input_cd;
+  id->output_cd = output_cd;
+
+  {
+    GC_finalization_proc prev_finalizer;
+    GC_PTR prev_finalization_data;
+
+    /* Register a finalizer to close the descriptors.  */
+    GC_REGISTER_FINALIZER_NO_ORDER (id, finalize_iconv_descriptors, 0,
+                                    &prev_finalizer, &prev_finalization_data);
+  }
+
+  return id;
+ 
+ invalid_encoding:
+  {
+    SCM err;
+    err = scm_from_locale_string (encoding);
+    scm_misc_error ("open_iconv_descriptors",
+                    "invalid or unknown character encoding ~s",
+                    scm_list_1 (err));
+  }
+}
+
+static void
+close_iconv_descriptors (scm_t_iconv_descriptors *id)
+{
+  if (id->input_cd != (iconv_t) -1)
+    iconv_close (id->input_cd);
+  if (id->output_cd != (iconv_t) -1)
+    iconv_close (id->output_cd);
+  id->input_cd = (void *) -1;
+  id->output_cd = (void *) -1;
+}
+
+scm_t_iconv_descriptors *
+scm_i_port_iconv_descriptors (SCM port)
+{
+  scm_t_port *pt;
+  scm_t_port_internal *pti;
+
+  pt = SCM_PTAB_ENTRY (port);
+  pti = SCM_INTERNAL_PTAB_ENTRY (port);
+
+  assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
+
+  if (!pti->iconv_descriptors)
+    {
+      if (!pt->encoding)
+        pt->encoding = "ISO-8859-1";
+      pti->iconv_descriptors =
+        open_iconv_descriptors (pt->encoding,
+                                SCM_INPUT_PORT_P (port),
+                                SCM_OUTPUT_PORT_P (port));
+    }
+
+  return pti->iconv_descriptors;
+}
+
 void
 scm_i_set_port_encoding_x (SCM port, const char *encoding)
 {
   scm_t_port *pt;
   scm_t_port_internal *pti;
-  iconv_t new_input_cd, new_output_cd;
-
-  new_input_cd = (iconv_t) -1;
-  new_output_cd = (iconv_t) -1;
+  scm_t_iconv_descriptors *prev;
 
   /* Set the character encoding for this port.  */
   pt = SCM_PTAB_ENTRY (port);
   pti = SCM_INTERNAL_PTAB_ENTRY (port);
+  prev = pti->iconv_descriptors;
 
   if (encoding == NULL)
     encoding = "ISO-8859-1";
 
-  if (pt->encoding != encoding)
-    pt->encoding = scm_gc_strdup (encoding, "port");
-
   /* If ENCODING is UTF-8, then no conversion descriptor is opened
      because we do I/O ourselves.  This saves 100+ KiB for each
      descriptor.  */
-  if (strcmp (encoding, "UTF-8"))
+  if (strcmp (encoding, "UTF-8") == 0)
     {
-      if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
-       {
-         /* Open an input iconv conversion descriptor, from ENCODING
-            to UTF-8.  We choose UTF-8, not UTF-32, because iconv
-            implementations can typically convert from anything to
-            UTF-8, but not to UTF-32 (see
-            
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>).  */
-         new_input_cd = iconv_open ("UTF-8", encoding);
-         if (new_input_cd == (iconv_t) -1)
-           goto invalid_encoding;
-       }
-
-      if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
-       {
-         new_output_cd = iconv_open (encoding, "UTF-8");
-         if (new_output_cd == (iconv_t) -1)
-           {
-             if (new_input_cd != (iconv_t) -1)
-               iconv_close (new_input_cd);
-             goto invalid_encoding;
-           }
-       }
+      pt->encoding = "UTF-8";
+      pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+      pti->iconv_descriptors = NULL;
+    }
+  else
+    {
+      /* Open descriptors before mutating the port. */
+      pti->iconv_descriptors =
+        open_iconv_descriptors (encoding,
+                                SCM_INPUT_PORT_P (port),
+                                SCM_OUTPUT_PORT_P (port));
+      pt->encoding = scm_gc_strdup (encoding, "port");
+      pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
     }
 
-  if (pti->input_cd != (iconv_t) -1)
-    iconv_close (pti->input_cd);
-  if (pti->output_cd != (iconv_t) -1)
-    iconv_close (pti->output_cd);
-
-  pti->input_cd = new_input_cd;
-  pti->output_cd = new_output_cd;
-
-  return;
-
- invalid_encoding:
-  {
-    SCM err;
-    err = scm_from_locale_string (encoding);
-    scm_misc_error ("scm_i_set_port_encoding_x",
-                   "invalid or unknown character encoding ~s",
-                   scm_list_1 (err));
-  }
+  if (prev)
+    close_iconv_descriptors (prev);
 }
 
 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
diff --git a/libguile/print.c b/libguile/print.c
index 62d7267..3469d71 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -880,9 +880,9 @@ display_string_using_iconv (const void *str, int narrow_p, 
size_t len,
                            scm_t_string_failed_conversion_handler strategy)
 {
   size_t printed;
-  scm_t_port_internal *pti;
+  scm_t_iconv_descriptors *id;
 
-  pti = SCM_INTERNAL_PTAB_ENTRY (port);
+  id = scm_i_port_iconv_descriptors (port);
 
   printed = 0;
 
@@ -911,7 +911,7 @@ display_string_using_iconv (const void *str, int narrow_p, 
size_t len,
       output = encoded_output;
       output_left = sizeof (encoded_output);
 
-      done = iconv (pti->output_cd, &input, &input_left,
+      done = iconv (id->output_cd, &input, &input_left,
                    &output, &output_left);
 
       output_len = sizeof (encoded_output) - output_left;
@@ -921,7 +921,7 @@ display_string_using_iconv (const void *str, int narrow_p, 
size_t len,
           int errno_save = errno;
 
          /* Reset the `iconv' state.  */
-         iconv (pti->output_cd, NULL, NULL, NULL, NULL);
+         iconv (id->output_cd, NULL, NULL, NULL, NULL);
 
          /* Print the OUTPUT_LEN bytes successfully converted.  */
          scm_lfwrite (encoded_output, output_len, port);
@@ -981,18 +981,11 @@ display_string (const void *str, int narrow_p,
                scm_t_string_failed_conversion_handler strategy)
 
 {
-  scm_t_port *pt;
   scm_t_port_internal *pti;
 
-  pt = SCM_PTAB_ENTRY (port);
   pti = SCM_INTERNAL_PTAB_ENTRY (port);
 
-  if (pti->output_cd == (iconv_t) -1)
-    /* Initialize the conversion descriptors, if needed.  */
-    scm_i_set_port_encoding_x (port, pt->encoding);
-
-  /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8.  */
-  if (pti->output_cd == (iconv_t) -1)
+  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
     return display_string_as_utf8 (str, narrow_p, len, port);
   else
     return display_string_using_iconv (str, narrow_p, len,
-- 
1.7.10.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]