[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] Add private port structure, and move iconv descriptors there
From: |
Mark H Weaver |
Subject: |
Re: [PATCH] Add private port structure, and move iconv descriptors there |
Date: |
Sun, 31 Mar 2013 15:44:18 -0400 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/24.3 (gnu/linux) |
Andy Wingo <address@hidden> writes:
> Can you take a look at porting
> 6c98257f2ead0855f218369ea7f9a823cdb9727e?
Okay, here's an updated patch set that's essentially a backport of that
commit from master, but with 'encoding_mode' and 'iconv_descriptors' put
in the internal-only port structure.
What do you think?
Mark
>From aeef3dc501dd598e5b3dea3fc5d1552a032d3917 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sat, 30 Mar 2013 23:09:01 -0400
Subject: [PATCH 1/3] Add 'scm_gc_typed_calloc' macro.
* libguile/gc.h (scm_gc_typed_calloc): New macro.
---
libguile/gc.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/libguile/gc.h b/libguile/gc.h
index 9f00e01..c6f7a4b 100644
--- a/libguile/gc.h
+++ b/libguile/gc.h
@@ -207,6 +207,7 @@ SCM_API char *scm_gc_strdup (const char *str, const char
*what)
SCM_API char *scm_gc_strndup (const char *str, size_t n, const char *what)
SCM_MALLOC;
+#define scm_gc_typed_calloc(t) ((t *) scm_gc_calloc (sizeof (t), #t))
#ifdef BUILDING_LIBGUILE
#include "libguile/bdw-gc.h"
--
1.7.10.4
>From 9eb48ea34c3f22f178cb1d889b93911283395a91 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sat, 30 Mar 2013 22:34:56 -0400
Subject: [PATCH 2/3] Add internal-only port structure and move iconv
descriptors there.
* libguile/ports-internal.h: New file.
* libguile/Makefile.am (noinst_HEADERS): Add ports-internal.h.
* libguile/ports.h (scm_t_port): Add a comment mentioning that the
'input_cd' and 'output_cd' fields of the public structure are no
longer what they seem to be.
* libguile/ports.c: Include ports-internal.h.
(finalize_port, scm_i_remove_port, get_iconv_codepoint, get_codepoint,
scm_i_set_port_encoding_x): Access 'input_cd' and 'output_cd' via the
new internal port structure.
(scm_new_port_table_entry): Allocate and initialize the internal port
structure.
* libguile/print.c: Include ports-internal.h.
(display_string_using_iconv, display_string): Access 'input_cd' and
'output_cd' via 'internal' pointer.
---
libguile/Makefile.am | 2 +-
libguile/ports-internal.h | 40 ++++++++++++++++++++++++++++
libguile/ports.c | 65 +++++++++++++++++++++++++++------------------
libguile/ports.h | 19 ++++++++++---
libguile/print.c | 15 ++++++-----
5 files changed, 105 insertions(+), 36 deletions(-)
create mode 100644 libguile/ports-internal.h
diff --git a/libguile/Makefile.am b/libguile/Makefile.am
index 4b1f96b..0c4b6ce 100644
--- a/libguile/Makefile.am
+++ b/libguile/Makefile.am
@@ -455,7 +455,7 @@ noinst_HEADERS = conv-integer.i.c conv-uinteger.i.c
\
srfi-14.i.c \
quicksort.i.c \
win32-uname.h \
- private-gc.h private-options.h
+ private-gc.h private-options.h ports-internal.h
# vm instructions
noinst_HEADERS += vm-engine.c vm-i-system.c vm-i-scheme.c vm-i-loader.c
diff --git a/libguile/ports-internal.h b/libguile/ports-internal.h
new file mode 100644
index 0000000..eba91c8
--- /dev/null
+++ b/libguile/ports-internal.h
@@ -0,0 +1,40 @@
+/*
+ * ports-internal.h - internal-only declarations for ports.
+ *
+ * Copyright (C) 2013 Free Software Foundation, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef SCM_PORTS_INTERNAL
+#define SCM_PORTS_INTERNAL
+
+#include "libguile/_scm.h"
+#include "libguile/ports.h"
+
+struct scm_port_internal
+{
+ /* input/output iconv conversion descriptors */
+ void *input_cd;
+ void *output_cd;
+};
+
+typedef struct scm_port_internal scm_t_port_internal;
+
+#define SCM_INTERNAL_PTAB_ENTRY(x) \
+ ((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
+
+#endif
diff --git a/libguile/ports.c b/libguile/ports.c
index 8737a76..86db4a0 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -55,6 +55,7 @@
#include "libguile/mallocs.h"
#include "libguile/validate.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/vectors.h"
#include "libguile/weaks.h"
#include "libguile/fluids.h"
@@ -576,7 +577,7 @@ finalize_port (void *ptr, void *data)
register_finalizer_for_port (port);
else
{
- scm_t_port *entry;
+ scm_t_port_internal *pti;
port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
if (port_type >= scm_numptob)
@@ -587,12 +588,12 @@ finalize_port (void *ptr, void *data)
is for explicit `close-port' by user. */
scm_ptobs[port_type].free (port);
- entry = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
- if (entry->input_cd != (iconv_t) -1)
- iconv_close (entry->input_cd);
- if (entry->output_cd != (iconv_t) -1)
- iconv_close (entry->output_cd);
+ if (pti->input_cd != (iconv_t) -1)
+ iconv_close (pti->input_cd);
+ if (pti->output_cd != (iconv_t) -1)
+ iconv_close (pti->output_cd);
SCM_SETSTREAM (port, 0);
SCM_CLR_PORT_OPEN_FLAG (port);
@@ -617,7 +618,8 @@ scm_new_port_table_entry (scm_t_bits tag)
*/
SCM z = scm_cons (SCM_EOL, SCM_EOL);
- scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port),
"port");
+ scm_t_port *entry = scm_gc_typed_calloc (scm_t_port);
+ scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal);
const char *enc;
entry->file_name = SCM_BOOL_F;
@@ -630,11 +632,17 @@ scm_new_port_table_entry (scm_t_bits tag)
entry->encoding = enc ? scm_gc_strdup (enc, "port") : NULL;
/* The conversion descriptors will be opened lazily. */
- entry->input_cd = (iconv_t) -1;
- entry->output_cd = (iconv_t) -1;
+ pti->input_cd = (iconv_t) -1;
+ pti->output_cd = (iconv_t) -1;
entry->ilseq_handler = scm_i_default_port_conversion_handler ();
+ /* XXX These fields are not what they seem. They have been
+ repurposed, but cannot safely be renamed in 2.0 without breaking
+ ABI compatibility. This will be cleaned up in 2.2. */
+ entry->input_cd = pti; /* XXX pointer to the internal port structure */
+ entry->output_cd = NULL; /* XXX unused */
+
SCM_SET_CELL_TYPE (z, tag);
SCM_SETPTAB_ENTRY (z, entry);
@@ -678,24 +686,26 @@ scm_i_remove_port (SCM port)
#define FUNC_NAME "scm_remove_port"
{
scm_t_port *p;
+ scm_t_port_internal *pti;
scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
p = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
scm_port_non_buffer (p);
p->putback_buf = NULL;
p->putback_buf_size = 0;
- if (p->input_cd != (iconv_t) -1)
+ if (pti->input_cd != (iconv_t) -1)
{
- iconv_close (p->input_cd);
- p->input_cd = (iconv_t) -1;
+ iconv_close (pti->input_cd);
+ pti->input_cd = (iconv_t) -1;
}
- if (p->output_cd != (iconv_t) -1)
+ if (pti->output_cd != (iconv_t) -1)
{
- iconv_close (p->output_cd);
- p->output_cd = (iconv_t) -1;
+ iconv_close (pti->output_cd);
+ pti->output_cd = (iconv_t) -1;
}
SCM_SETPTAB_ENTRY (port, 0);
@@ -1296,13 +1306,13 @@ static int
get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
- scm_t_port *pt;
+ scm_t_port_internal *pti;
int err, byte_read;
size_t bytes_consumed, output_size;
char *output;
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
- pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
for (output_size = 0, output = (char *) utf8_buf,
bytes_consumed = 0, err = 0;
@@ -1332,7 +1342,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
input_left = bytes_consumed + 1;
output_left = sizeof (utf8_buf);
- done = iconv (pt->input_cd, &input, &input_left,
+ done = iconv (pti->input_cd, &input, &input_left,
&output, &output_left);
if (done == (size_t) -1)
{
@@ -1368,13 +1378,14 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
{
int err;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_INTERNAL_PTAB_ENTRY (port);
- if (pt->input_cd == (iconv_t) -1)
+ if (pti->input_cd == (iconv_t) -1)
/* Initialize the conversion descriptors, if needed. */
scm_i_set_port_encoding_x (port, pt->encoding);
/* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
- if (pt->input_cd == (iconv_t) -1)
+ if (pti->input_cd == (iconv_t) -1)
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
else
err = get_iconv_codepoint (port, codepoint, buf, len);
@@ -2208,6 +2219,7 @@ void
scm_i_set_port_encoding_x (SCM port, const char *encoding)
{
scm_t_port *pt;
+ scm_t_port_internal *pti;
iconv_t new_input_cd, new_output_cd;
new_input_cd = (iconv_t) -1;
@@ -2215,6 +2227,7 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
if (encoding == NULL)
encoding = "ISO-8859-1";
@@ -2251,13 +2264,13 @@ scm_i_set_port_encoding_x (SCM port, const char
*encoding)
}
}
- if (pt->input_cd != (iconv_t) -1)
- iconv_close (pt->input_cd);
- if (pt->output_cd != (iconv_t) -1)
- iconv_close (pt->output_cd);
+ if (pti->input_cd != (iconv_t) -1)
+ iconv_close (pti->input_cd);
+ if (pti->output_cd != (iconv_t) -1)
+ iconv_close (pti->output_cd);
- pt->input_cd = new_input_cd;
- pt->output_cd = new_output_cd;
+ pti->input_cd = new_input_cd;
+ pti->output_cd = new_output_cd;
return;
diff --git a/libguile/ports.h b/libguile/ports.h
index d4d59b7..6791a5c 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -112,9 +112,22 @@ typedef struct
unsigned char *putback_buf;
size_t putback_buf_size; /* allocated size of putback_buf. */
- /* input/output iconv conversion descriptors */
- void *input_cd;
- void *output_cd;
+ /* IMPORTANT: 'input_cd' and 'output_cd' used to be pointers to the
+ input and output iconv descriptors, but those have been moved to
+ the internal-only port structure defined in ports-internal.h.
+
+ Given that we must preserve ABI compatibility in 2.0, we cannot
+ safely change this public structure without running afoul of C
+ strict aliasing rules. We cannot even change the member names.
+
+ To work around this, in this public structure, 'input_cd' has been
+ repurposed to be a pointer to the internal port structure (see
+ ports-internal.h), and 'output_cd' is now unused.
+
+ This will be cleaned up in 2.2. */
+
+ void *input_cd; /* XXX actually a pointer to scm_t_port_internal */
+ void *output_cd; /* XXX actually unused */
} scm_t_port;
diff --git a/libguile/print.c b/libguile/print.c
index 647eed8..62d7267 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -45,6 +45,7 @@
#include "libguile/alist.h"
#include "libguile/struct.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
@@ -879,9 +880,9 @@ display_string_using_iconv (const void *str, int narrow_p,
size_t len,
scm_t_string_failed_conversion_handler strategy)
{
size_t printed;
- scm_t_port *pt;
+ scm_t_port_internal *pti;
- pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
printed = 0;
@@ -910,7 +911,7 @@ display_string_using_iconv (const void *str, int narrow_p,
size_t len,
output = encoded_output;
output_left = sizeof (encoded_output);
- done = iconv (pt->output_cd, &input, &input_left,
+ done = iconv (pti->output_cd, &input, &input_left,
&output, &output_left);
output_len = sizeof (encoded_output) - output_left;
@@ -920,7 +921,7 @@ display_string_using_iconv (const void *str, int narrow_p,
size_t len,
int errno_save = errno;
/* Reset the `iconv' state. */
- iconv (pt->output_cd, NULL, NULL, NULL, NULL);
+ iconv (pti->output_cd, NULL, NULL, NULL, NULL);
/* Print the OUTPUT_LEN bytes successfully converted. */
scm_lfwrite (encoded_output, output_len, port);
@@ -981,15 +982,17 @@ display_string (const void *str, int narrow_p,
{
scm_t_port *pt;
+ scm_t_port_internal *pti;
pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
- if (pt->output_cd == (iconv_t) -1)
+ if (pti->output_cd == (iconv_t) -1)
/* Initialize the conversion descriptors, if needed. */
scm_i_set_port_encoding_x (port, pt->encoding);
/* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
- if (pt->output_cd == (iconv_t) -1)
+ if (pti->output_cd == (iconv_t) -1)
return display_string_as_utf8 (str, narrow_p, len, port);
else
return display_string_using_iconv (str, narrow_p, len,
--
1.7.10.4
>From b5304dfe27075784532a73ae2a587612ddc717fd Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sun, 31 Mar 2013 15:23:32 -0400
Subject: [PATCH 3/3] Refactor port encoding modes: utf-8 and iconv
Based on 6c98257f2ead0855f218369ea7f9a823cdb9727e by Andy Wingo.
* libguile/ports-internal.h (struct scm_port_internal): Add a flag
for the port encoding mode: UTF8 or iconv. The iconv descriptors
are now in a separate structure so that we can avoid attaching
finalizers to the ports themselves in the future.
(enum scm_port_encoding_mode): New enum.
(struct scm_iconv_descriptors): New struct.
(scm_i_port_iconv_descriptors): Add prototype.
* libguile/ports.c (finalize_port): Don't close iconv descriptors here.
(scm_new_port_table_entry): Adapt to the iconv descriptors being
moved. Initialize 'encoding_mode'.
(scm_i_remove_port): Adapt to call 'close_iconv_descriptors'.
(close_iconv_descriptors): New static function.
(get_iconv_codepoint): Use 'scm_i_port_iconv_descriptors'.
(get_codepoint): Check the port 'encoding_mode'.
(finalize_iconv_descriptors, open_iconv_descriptors,
close_iconv_descriptors, scm_i_port_iconv_descriptors): New static
functions.
(scm_i_set_port_encoding_x): Adapt to iconv descriptors being moved
to separate structure, to set the 'encoding_mode' flag, and to use
'open_iconv_descriptors' and 'close_iconv_descriptors'.
* libguile/print.c (display_string_using_iconv): Use
'scm_i_port_iconv_descriptors'.
(display_string): Use 'encoding_mode' flag.
---
libguile/ports-internal.h | 22 ++++-
libguile/ports.c | 229 ++++++++++++++++++++++++++++-----------------
libguile/print.c | 17 +---
3 files changed, 169 insertions(+), 99 deletions(-)
diff --git a/libguile/ports-internal.h b/libguile/ports-internal.h
index eba91c8..d52eab2 100644
--- a/libguile/ports-internal.h
+++ b/libguile/ports-internal.h
@@ -25,16 +25,36 @@
#include "libguile/_scm.h"
#include "libguile/ports.h"
-struct scm_port_internal
+enum scm_port_encoding_mode {
+ SCM_PORT_ENCODING_MODE_UTF8,
+ SCM_PORT_ENCODING_MODE_ICONV
+};
+
+typedef enum scm_port_encoding_mode scm_t_port_encoding_mode;
+
+/* This is a separate object so that only those ports that use iconv
+ cause finalizers to be registered (FIXME: although currently in 2.0
+ finalizers are always registered for ports anyway). */
+struct scm_iconv_descriptors
{
/* input/output iconv conversion descriptors */
void *input_cd;
void *output_cd;
};
+typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
+
+struct scm_port_internal
+{
+ scm_t_port_encoding_mode encoding_mode;
+ scm_t_iconv_descriptors *iconv_descriptors;
+};
+
typedef struct scm_port_internal scm_t_port_internal;
#define SCM_INTERNAL_PTAB_ENTRY(x) \
((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
+SCM_INTERNAL scm_t_iconv_descriptors *scm_i_port_iconv_descriptors (SCM port);
+
#endif
diff --git a/libguile/ports.c b/libguile/ports.c
index 86db4a0..3c67e67 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -555,8 +555,8 @@ static void finalize_port (void *, void *);
static SCM_C_INLINE_KEYWORD void
register_finalizer_for_port (SCM port)
{
- /* Register a finalizer for PORT so that its iconv CDs get freed and
- optionally its type's `free' function gets called. */
+ /* Register a finalizer for PORT so that its
+ type's `free' function gets called. */
scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
}
@@ -577,8 +577,6 @@ finalize_port (void *ptr, void *data)
register_finalizer_for_port (port);
else
{
- scm_t_port_internal *pti;
-
port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
if (port_type >= scm_numptob)
abort ();
@@ -588,13 +586,6 @@ finalize_port (void *ptr, void *data)
is for explicit `close-port' by user. */
scm_ptobs[port_type].free (port);
- pti = SCM_INTERNAL_PTAB_ENTRY (port);
-
- if (pti->input_cd != (iconv_t) -1)
- iconv_close (pti->input_cd);
- if (pti->output_cd != (iconv_t) -1)
- iconv_close (pti->output_cd);
-
SCM_SETSTREAM (port, 0);
SCM_CLR_PORT_OPEN_FLAG (port);
@@ -620,7 +611,7 @@ scm_new_port_table_entry (scm_t_bits tag)
SCM z = scm_cons (SCM_EOL, SCM_EOL);
scm_t_port *entry = scm_gc_typed_calloc (scm_t_port);
scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal);
- const char *enc;
+ const char *encoding;
entry->file_name = SCM_BOOL_F;
entry->rw_active = SCM_PORT_NEITHER;
@@ -628,14 +619,14 @@ scm_new_port_table_entry (scm_t_bits tag)
/* Initialize this port with the thread's current default
encoding. */
- enc = scm_i_default_port_encoding ();
- entry->encoding = enc ? scm_gc_strdup (enc, "port") : NULL;
-
- /* The conversion descriptors will be opened lazily. */
- pti->input_cd = (iconv_t) -1;
- pti->output_cd = (iconv_t) -1;
-
+ encoding = scm_i_default_port_encoding ();
entry->ilseq_handler = scm_i_default_port_conversion_handler ();
+ entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
+ if (encoding && strcmp (encoding, "UTF-8") == 0)
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ else
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ pti->iconv_descriptors = NULL;
/* XXX These fields are not what they seem. They have been
repurposed, but cannot safely be renamed in 2.0 without breaking
@@ -681,6 +672,8 @@ scm_add_to_port_table (SCM port)
/* Remove a port from the table and destroy it. */
+static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
+
static void
scm_i_remove_port (SCM port)
#define FUNC_NAME "scm_remove_port"
@@ -696,16 +689,10 @@ scm_i_remove_port (SCM port)
p->putback_buf = NULL;
p->putback_buf_size = 0;
- if (pti->input_cd != (iconv_t) -1)
+ if (pti->iconv_descriptors)
{
- iconv_close (pti->input_cd);
- pti->input_cd = (iconv_t) -1;
- }
-
- if (pti->output_cd != (iconv_t) -1)
- {
- iconv_close (pti->output_cd);
- pti->output_cd = (iconv_t) -1;
+ close_iconv_descriptors (pti->iconv_descriptors);
+ pti->iconv_descriptors = NULL;
}
SCM_SETPTAB_ENTRY (port, 0);
@@ -1306,13 +1293,13 @@ static int
get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
- scm_t_port_internal *pti;
+ scm_t_iconv_descriptors *id;
int err, byte_read;
size_t bytes_consumed, output_size;
char *output;
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
- pti = SCM_INTERNAL_PTAB_ENTRY (port);
+ id = scm_i_port_iconv_descriptors (port);
for (output_size = 0, output = (char *) utf8_buf,
bytes_consumed = 0, err = 0;
@@ -1342,8 +1329,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
input_left = bytes_consumed + 1;
output_left = sizeof (utf8_buf);
- done = iconv (pti->input_cd, &input, &input_left,
- &output, &output_left);
+ done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
if (done == (size_t) -1)
{
err = errno;
@@ -1380,12 +1366,7 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_port_internal *pti = SCM_INTERNAL_PTAB_ENTRY (port);
- if (pti->input_cd == (iconv_t) -1)
- /* Initialize the conversion descriptors, if needed. */
- scm_i_set_port_encoding_x (port, pt->encoding);
-
- /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
- if (pti->input_cd == (iconv_t) -1)
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
else
err = get_iconv_codepoint (port, codepoint, buf, len);
@@ -2215,73 +2196,149 @@ scm_i_default_port_encoding (void)
}
}
+static void
+finalize_iconv_descriptors (GC_PTR ptr, GC_PTR data)
+{
+ close_iconv_descriptors (ptr);
+}
+
+static scm_t_iconv_descriptors *
+open_iconv_descriptors (const char *encoding, int reading, int writing)
+{
+ scm_t_iconv_descriptors *id;
+ iconv_t input_cd, output_cd;
+
+ input_cd = (iconv_t) -1;
+ output_cd = (iconv_t) -1;
+ if (reading)
+ {
+ /* Open an input iconv conversion descriptor, from ENCODING
+ to UTF-8. We choose UTF-8, not UTF-32, because iconv
+ implementations can typically convert from anything to
+ UTF-8, but not to UTF-32 (see
+
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
+
+ /* Assume opening an iconv descriptor causes about 16 KB of
+ allocation. */
+ scm_gc_register_allocation (16 * 1024);
+
+ input_cd = iconv_open ("UTF-8", encoding);
+ if (input_cd == (iconv_t) -1)
+ goto invalid_encoding;
+ }
+
+ if (writing)
+ {
+ /* Assume opening an iconv descriptor causes about 16 KB of
+ allocation. */
+ scm_gc_register_allocation (16 * 1024);
+
+ output_cd = iconv_open (encoding, "UTF-8");
+ if (output_cd == (iconv_t) -1)
+ {
+ if (input_cd != (iconv_t) -1)
+ iconv_close (input_cd);
+ goto invalid_encoding;
+ }
+ }
+
+ id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
+ id->input_cd = input_cd;
+ id->output_cd = output_cd;
+
+ {
+ GC_finalization_proc prev_finalizer;
+ GC_PTR prev_finalization_data;
+
+ /* Register a finalizer to close the descriptors. */
+ GC_REGISTER_FINALIZER_NO_ORDER (id, finalize_iconv_descriptors, 0,
+ &prev_finalizer, &prev_finalization_data);
+ }
+
+ return id;
+
+ invalid_encoding:
+ {
+ SCM err;
+ err = scm_from_locale_string (encoding);
+ scm_misc_error ("open_iconv_descriptors",
+ "invalid or unknown character encoding ~s",
+ scm_list_1 (err));
+ }
+}
+
+static void
+close_iconv_descriptors (scm_t_iconv_descriptors *id)
+{
+ if (id->input_cd != (iconv_t) -1)
+ iconv_close (id->input_cd);
+ if (id->output_cd != (iconv_t) -1)
+ iconv_close (id->output_cd);
+ id->input_cd = (void *) -1;
+ id->output_cd = (void *) -1;
+}
+
+scm_t_iconv_descriptors *
+scm_i_port_iconv_descriptors (SCM port)
+{
+ scm_t_port *pt;
+ scm_t_port_internal *pti;
+
+ pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_INTERNAL_PTAB_ENTRY (port);
+
+ assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
+
+ if (!pti->iconv_descriptors)
+ {
+ if (!pt->encoding)
+ pt->encoding = "ISO-8859-1";
+ pti->iconv_descriptors =
+ open_iconv_descriptors (pt->encoding,
+ SCM_INPUT_PORT_P (port),
+ SCM_OUTPUT_PORT_P (port));
+ }
+
+ return pti->iconv_descriptors;
+}
+
void
scm_i_set_port_encoding_x (SCM port, const char *encoding)
{
scm_t_port *pt;
scm_t_port_internal *pti;
- iconv_t new_input_cd, new_output_cd;
-
- new_input_cd = (iconv_t) -1;
- new_output_cd = (iconv_t) -1;
+ scm_t_iconv_descriptors *prev;
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
pti = SCM_INTERNAL_PTAB_ENTRY (port);
+ prev = pti->iconv_descriptors;
if (encoding == NULL)
encoding = "ISO-8859-1";
- if (pt->encoding != encoding)
- pt->encoding = scm_gc_strdup (encoding, "port");
-
/* If ENCODING is UTF-8, then no conversion descriptor is opened
because we do I/O ourselves. This saves 100+ KiB for each
descriptor. */
- if (strcmp (encoding, "UTF-8"))
+ if (strcmp (encoding, "UTF-8") == 0)
{
- if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
- {
- /* Open an input iconv conversion descriptor, from ENCODING
- to UTF-8. We choose UTF-8, not UTF-32, because iconv
- implementations can typically convert from anything to
- UTF-8, but not to UTF-32 (see
-
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
- new_input_cd = iconv_open ("UTF-8", encoding);
- if (new_input_cd == (iconv_t) -1)
- goto invalid_encoding;
- }
-
- if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
- {
- new_output_cd = iconv_open (encoding, "UTF-8");
- if (new_output_cd == (iconv_t) -1)
- {
- if (new_input_cd != (iconv_t) -1)
- iconv_close (new_input_cd);
- goto invalid_encoding;
- }
- }
+ pt->encoding = "UTF-8";
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ pti->iconv_descriptors = NULL;
+ }
+ else
+ {
+ /* Open descriptors before mutating the port. */
+ pti->iconv_descriptors =
+ open_iconv_descriptors (encoding,
+ SCM_INPUT_PORT_P (port),
+ SCM_OUTPUT_PORT_P (port));
+ pt->encoding = scm_gc_strdup (encoding, "port");
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
}
- if (pti->input_cd != (iconv_t) -1)
- iconv_close (pti->input_cd);
- if (pti->output_cd != (iconv_t) -1)
- iconv_close (pti->output_cd);
-
- pti->input_cd = new_input_cd;
- pti->output_cd = new_output_cd;
-
- return;
-
- invalid_encoding:
- {
- SCM err;
- err = scm_from_locale_string (encoding);
- scm_misc_error ("scm_i_set_port_encoding_x",
- "invalid or unknown character encoding ~s",
- scm_list_1 (err));
- }
+ if (prev)
+ close_iconv_descriptors (prev);
}
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
diff --git a/libguile/print.c b/libguile/print.c
index 62d7267..3469d71 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -880,9 +880,9 @@ display_string_using_iconv (const void *str, int narrow_p,
size_t len,
scm_t_string_failed_conversion_handler strategy)
{
size_t printed;
- scm_t_port_internal *pti;
+ scm_t_iconv_descriptors *id;
- pti = SCM_INTERNAL_PTAB_ENTRY (port);
+ id = scm_i_port_iconv_descriptors (port);
printed = 0;
@@ -911,7 +911,7 @@ display_string_using_iconv (const void *str, int narrow_p,
size_t len,
output = encoded_output;
output_left = sizeof (encoded_output);
- done = iconv (pti->output_cd, &input, &input_left,
+ done = iconv (id->output_cd, &input, &input_left,
&output, &output_left);
output_len = sizeof (encoded_output) - output_left;
@@ -921,7 +921,7 @@ display_string_using_iconv (const void *str, int narrow_p,
size_t len,
int errno_save = errno;
/* Reset the `iconv' state. */
- iconv (pti->output_cd, NULL, NULL, NULL, NULL);
+ iconv (id->output_cd, NULL, NULL, NULL, NULL);
/* Print the OUTPUT_LEN bytes successfully converted. */
scm_lfwrite (encoded_output, output_len, port);
@@ -981,18 +981,11 @@ display_string (const void *str, int narrow_p,
scm_t_string_failed_conversion_handler strategy)
{
- scm_t_port *pt;
scm_t_port_internal *pti;
- pt = SCM_PTAB_ENTRY (port);
pti = SCM_INTERNAL_PTAB_ENTRY (port);
- if (pti->output_cd == (iconv_t) -1)
- /* Initialize the conversion descriptors, if needed. */
- scm_i_set_port_encoding_x (port, pt->encoding);
-
- /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
- if (pti->output_cd == (iconv_t) -1)
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
return display_string_as_utf8 (str, narrow_p, len, port);
else
return display_string_using_iconv (str, narrow_p, len,
--
1.7.10.4
- Re: Adding new information to scm_t_port (was Re: always O_BINARY?), Ludovic Courtès, 2013/03/01
- Re: Adding new information to scm_t_port (was Re: always O_BINARY?), Mark H Weaver, 2013/03/05
- [PATCH] Add private port structure, and move iconv descriptors there, Mark H Weaver, 2013/03/27
- Re: [PATCH] Add private port structure, and move iconv descriptors there, Ludovic Courtès, 2013/03/27
- Re: [PATCH] Add private port structure, and move iconv descriptors there, Andy Wingo, 2013/03/27
- Re: [PATCH] Add private port structure, and move iconv descriptors there, Mark H Weaver, 2013/03/27
- [PATCH] Add internal-only port structure; move iconv descriptors there, Mark H Weaver, 2013/03/31
- Re: [PATCH] Add internal-only port structure; move iconv descriptors there, Ludovic Courtès, 2013/03/31
- Re: [PATCH] Add internal-only port structure; move iconv descriptors there, Mark H Weaver, 2013/03/31
- Re: [PATCH] Add internal-only port structure; move iconv descriptors there, Ludovic Courtès, 2013/03/31
- Re: [PATCH] Add private port structure, and move iconv descriptors there,
Mark H Weaver <=
- Re: [PATCH] Add private port structure, and move iconv descriptors there, Ludovic Courtès, 2013/03/31