guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] fix locale string reading


From: Ludovic Courtès
Subject: Re: [PATCH] fix locale string reading
Date: Wed, 16 Nov 2011 00:51:26 +0100
User-agent: Gnus/5.110018 (No Gnus v0.18) Emacs/24.0.90 (gnu/linux)

Hi Mark!

Mark H Weaver <address@hidden> skribis:

> address@hidden (Ludovic Courtès) writes:
>>> I think we should consider decoding the command-line arguments using the
>>> locale specified by the environment variables, at least in cases like
>>> this where there's no way for the user to call setlocale before the
>>> conversion happens.
>>
>> Below is a patch that does roughly that (we should get ‘locale_encoding’
>> reviewed and perhaps added to Gnulib.)
>>
>> It solves the problem:
>>
>> # With the patch.
>> $ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' 
>> -- λ
>> (/home/ludo/src/guile/libguile/.libs/guile -- λ)
>>
>> # Previously.
>> $ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (command-line))' -- λ
>> (guile -- ??)
>
> Looks great, thanks! :)
>
> I have one question though.  You fixed scm_compile_shell_switches, but I
> see another place where command-line arguments are converted to Scheme
> strings before the user is able to call setlocale: guile.c and init.c.
>
> main (guile.c) calls scm_boot_guile (init.c), which uses
> invoke_main_func (init.c), which calls scm_set_program_arguments
> (feature.c).  Does this code need to be fixed also?

Yes, good catch!

An updated patch is attached.  It seems to fulfill its mission:

--8<---------------cut here---------------start------------->8---
# Now:
$ ./meta/guile -c '(setlocale LC_ALL "en_US.UTF8")(display (list (command-line) 
(program-arguments)))' -- λ 
((/home/ludo/src/guile/libguile/.libs/guile -- λ) 
(/home/ludo/src/guile/libguile/.libs/guile -- λ))

# Before:
$ guile -c '(setlocale LC_ALL "en_US.UTF8")(display (list (command-line) 
(program-arguments)))' -- λ
((guile -- ??) (guile -- ??))
--8<---------------cut here---------------end--------------->8---

Note that the code uses SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE, but I
wonder if we couldn’t do better.  For instance, upon conversion failure,
we could pass the argument as a bytevector instead of a string and let
the application cope with it.  OTOH, that would be an API change.

Thoughts?

Thanks,
Ludo’.

diff --git a/libguile/feature.c b/libguile/feature.c
index 7007403..f3bddc7 100644
--- a/libguile/feature.c
+++ b/libguile/feature.c
@@ -1,5 +1,6 @@
-/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2004, 2006, 2007, 
2009 Free Software Foundation, Inc.
- * 
+/* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ *   2006, 2007, 2009, 2011 Free Software Foundation, Inc.
+ *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
  * as published by the Free Software Foundation; either version 3 of
@@ -36,7 +37,8 @@
 
 
 
-static SCM progargs_fluid;
+SCM scm_program_arguments_fluid;
+
 static SCM features_var;
 
 void
@@ -58,7 +60,7 @@ SCM_DEFINE (scm_program_arguments, "program-arguments", 0, 0, 
0,
            "options like @code{-e} and @code{-l}.")
 #define FUNC_NAME s_scm_program_arguments
 {
-  return scm_fluid_ref (progargs_fluid);
+  return scm_fluid_ref (scm_program_arguments_fluid);
 }
 #undef FUNC_NAME
 
@@ -74,7 +76,7 @@ scm_set_program_arguments (int argc, char **argv, char *first)
   SCM args = scm_makfromstrs (argc, argv);
   if (first)
     args = scm_cons (scm_from_locale_string (first), args);
-  scm_fluid_set_x (progargs_fluid, args);
+  scm_fluid_set_x (scm_program_arguments_fluid, args);
 }
 
 SCM_DEFINE (scm_set_program_arguments_scm, "set-program-arguments", 1, 0, 0, 
@@ -89,7 +91,7 @@ SCM_DEFINE (scm_set_program_arguments_scm, 
"set-program-arguments", 1, 0, 0,
            "strings within it are copied, so should not be modified later.")
 #define FUNC_NAME s_scm_set_program_arguments_scm
 {
-  return scm_fluid_set_x (progargs_fluid, lst);
+  return scm_fluid_set_x (scm_program_arguments_fluid, lst);
 }
 #undef FUNC_NAME
 
@@ -99,7 +101,7 @@ SCM_DEFINE (scm_set_program_arguments_scm, 
"set-program-arguments", 1, 0, 0,
 void
 scm_init_feature()
 {
-  progargs_fluid = scm_make_fluid ();
+  scm_program_arguments_fluid = scm_make_fluid ();
 
   features_var = scm_c_define ("*features*", SCM_EOL);
 #ifndef _Windows
diff --git a/libguile/feature.h b/libguile/feature.h
index d373bc7..467f9ed 100644
--- a/libguile/feature.h
+++ b/libguile/feature.h
@@ -3,7 +3,8 @@
 #ifndef SCM_FEATURE_H
 #define SCM_FEATURE_H
 
-/* Copyright (C) 1995,1996,1999,2000,2001, 2006, 2007, 2008 Free Software 
Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1999, 2000, 2001, 2006, 2007, 2008,
+ *   2011 Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -29,6 +30,8 @@ SCM_API void scm_add_feature (const char* str);
 SCM_API SCM scm_program_arguments (void);
 SCM_API void scm_set_program_arguments (int argc, char **argv, char *first);
 SCM_API SCM scm_set_program_arguments_scm (SCM lst);
+
+SCM_INTERNAL SCM scm_program_arguments_fluid;
 SCM_INTERNAL void scm_init_feature (void);
 
 #endif  /* SCM_FEATURE_H */
diff --git a/libguile/init.c b/libguile/init.c
index 8e3888d..633f8c6 100644
--- a/libguile/init.c
+++ b/libguile/init.c
@@ -332,7 +332,7 @@ invoke_main_func (void *body_data)
 {
   struct main_func_closure *closure = (struct main_func_closure *) body_data;
 
-  scm_set_program_arguments (closure->argc, closure->argv, 0);
+  scm_i_set_boot_program_arguments (closure->argc, closure->argv);
   (*closure->main_func) (closure->closure, closure->argc, closure->argv);
 
   scm_restore_signals ();
diff --git a/libguile/script.c b/libguile/script.c
index 5e0685a..b1d3327 100644
--- a/libguile/script.c
+++ b/libguile/script.c
@@ -26,6 +26,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <ctype.h>
+#include <uniconv.h>
 
 #include "libguile/_scm.h"
 #include "libguile/eval.h"
@@ -368,6 +369,87 @@ scm_shell_usage (int fatal, char *message)
                : SCM_BOOL_F));
 }
 
+/* Return the name of the locale encoding suggested by environment
+   variables, even if it's not current, or NULL if no encoding is
+   defined.  Based on Gnulib's `localcharset.c'.  */
+static const char *
+locale_encoding (void)
+{
+  static char buf[2 + 10 + 1];
+  const char *locale, *codeset = NULL;
+
+  /* Allow user to override the codeset, as set in the operating system,
+     with standard language environment variables.  */
+  locale = getenv ("LC_ALL");
+  if (locale == NULL || locale[0] == '\0')
+    {
+      locale = getenv ("LC_CTYPE");
+      if (locale == NULL || locale[0] == '\0')
+        locale = getenv ("LANG");
+    }
+  if (locale != NULL && locale[0] != '\0')
+    {
+      /* If the locale name contains an encoding after the dot, return it.  */
+      const char *dot = strchr (locale, '.');
+
+      if (dot != NULL)
+        {
+          const char *modifier;
+
+          dot++;
+          /* Look for the possible @... trailer and remove it, if any.  */
+          modifier = strchr (dot, '@');
+          if (modifier == NULL)
+            return dot;
+          if (modifier - dot < sizeof (buf))
+            {
+              memcpy (buf, dot, modifier - dot);
+              buf [modifier - dot] = '\0';
+              return buf;
+            }
+        }
+      else if (strcmp (locale, "C") == 0)
+       {
+         strcpy (buf, "ASCII");
+         return buf;
+       }
+
+      /* Resolve through the charset.alias file.  */
+      codeset = locale;
+    }
+
+  return codeset;
+}
+
+/* Return a list of strings from ARGV, which contains ARGC strings
+   assumed to be encoded in the current locale.  Use `locale_charset'
+   instead of relying on `scm_from_locale_string' because the user
+   hasn't had a change to call (setlocale LC_ALL "") yet.  */
+static SCM
+locale_arguments_to_string_list (int argc, char **const argv)
+{
+  int i;
+  SCM lst;
+  const char *encoding;
+
+  encoding = locale_encoding ();
+  for (i = argc - 1, lst = SCM_EOL;
+       i >= 0;
+       i--)
+    lst = scm_cons (scm_from_stringn (argv[i], (size_t) -1, encoding,
+                                     SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE),
+                   lst);
+
+  return lst;
+}
+
+/* Set the value returned by `program-arguments', given ARGC and ARGV.  */
+void
+scm_i_set_boot_program_arguments (int argc, char *argv[])
+{
+  scm_fluid_set_x (scm_program_arguments_fluid,
+                  locale_arguments_to_string_list (argc, argv));
+}
 
 /* Given an array of command-line switches, return a Scheme expression
    to carry out the actions specified by the switches.
@@ -378,7 +460,7 @@ scm_compile_shell_switches (int argc, char **argv)
 {
   return scm_call_2 (scm_c_public_ref ("ice-9 command-line",
                                        "compile-shell-switches"),
-                     scm_makfromstrs (argc, argv),
+                    locale_arguments_to_string_list (argc, argv),
                      (scm_usage_name
                       ? scm_from_locale_string (scm_usage_name)
                       : scm_from_latin1_string ("guile")));
diff --git a/libguile/script.h b/libguile/script.h
index 7e3828a..cf0162a 100644
--- a/libguile/script.h
+++ b/libguile/script.h
@@ -3,7 +3,7 @@
 #ifndef SCM_SCRIPT_H
 #define SCM_SCRIPT_H
 
-/* Copyright (C) 1997,1998,2000, 2006, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1997,1998,2000, 2006, 2008, 2011 Free Software Foundation, 
Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -37,6 +37,7 @@ SCM_API void scm_shell_usage (int fatal, char *message);
 SCM_API SCM scm_compile_shell_switches (int argc, char **argv);
 SCM_API void scm_shell (int argc, char **argv);
 SCM_API char *scm_usage_name;
+SCM_INTERNAL void scm_i_set_boot_program_arguments (int argc, char *argv[]);
 SCM_INTERNAL void scm_init_script (void);
 
 #endif  /* SCM_SCRIPT_H */

reply via email to

[Prev in Thread] Current Thread [Next in Thread]