guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: open-process and related functions for MinGW Guile


From: Mark H Weaver
Subject: Re: open-process and related functions for MinGW Guile
Date: Tue, 12 Aug 2014 14:08:48 -0400
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.3 (gnu/linux)

Hi Eli,

Thanks very much for working on this, and for your patience.

Eli Zaretskii <address@hidden> writes:
> Here's the updated patch:
>
> --- libguile/posix.c.~1~      2014-02-28 22:01:27.000000000 +0200
> +++ libguile/posix.c  2014-08-08 17:27:50.339267200 +0300
> @@ -84,6 +84,42 @@
>  #if HAVE_SYS_WAIT_H
>  # include <sys/wait.h>
>  #endif
> +#ifdef __MINGW32__
> +
> +#include <c-strcase.h>

Please add a space after the '#' here.

> +
> +# define WEXITSTATUS(stat_val) ((stat_val) & 255)
> +/* MS-Windows programs that crash due to a fatal exception exit with
> +   an exit code whose 2 MSB bits are set.  */
> +# define WIFEXITED(stat_val)   (((stat_val) & 0xC0000000) == 0)
> +# define WIFSIGNALED(stat_val) (((stat_val) & 0xC0000000) == 0xC0000000)
> +# define WTERMSIG(stat_val)    w32_status_to_termsig (stat_val)
> +/* The funny conditional avoids a compiler warning in status:stop_sig.  */
> +# define WIFSTOPPED(stat_val)  ((stat_val) == (stat_val) ? 0 : 0)

What is the warning?  Would ((stat_val), 0) also fix it?  If so, I think
that would be preferable.

> +# define WSTOPSIG(stat_var)    (0)
> +# include <process.h>
> +# define HAVE_WAITPID 1
> +  static int w32_status_to_termsig (DWORD);
> +  static int w32_signal_to_status (int);
> +# define getuid()              (500) /* Local Administrator */
> +# define getgid()              (513) /* None */
> +# define setuid(u)             (0)
> +# define setgid(g)             (0)

As I've said before, I'm not okay with making 'setuid', 'seteuid',
'setgid', or 'setegid' into no-ops.  They should at least raise an error
when called on Windows, and maybe they should be undefined.

> +# define WIN32_LEAN_AND_MEAN
> +# include <windows.h>
> +# define WNOHANG               1
> +  int waitpid (intptr_t, int *, int);
> +
> +  typedef DWORD_PTR cpu_set_t;
> +
> +#define CPU_ZERO(s)     memset(s,0,sizeof(*s))
> +#define CPU_ISSET(b,s)  ((*s) & (1U << (b))) != 0
> +#define CPU_SET(b,s)    (*s) |= (1U << (b))

There should be parentheses around all occurrences of 's' above.

[...]

> --- /dev/null 1970-01-01 02:00:00 +0200
> +++ libguile/w32-proc.c       2014-06-29 11:26:08 +0300
> @@ -0,0 +1,563 @@
> +/* Run a child process with redirected standard handles, without
> +   redirecting standard handles of the parent.  This is required in
> +   multithreaded programs, where redirecting a standard handle affects
> +   all threads.  */

Please add a copyright notice and header on this file, like the other
*.c files in libguile.

> +
> +#include <stdlib.h>
> +#include <string.h>
> +
> +/* Prepare a possibly redirected file handle to be passed to a child
> +   process.  The handle is for the file/device open on file descriptor
> +   FD; if FD is invalid, use the null device instead.
> +
> +   USE_STD non-zero means we have been passed the descriptor used by
> +   the parent.
> +
> +   ACCESS is the Windows access mode for opening the null device.
> +
> +   Returns the Win32 handle to be passed to CreateProcess.  */
> +static HANDLE
> +prepare_child_handle (int fd, int use_std, DWORD access)
> +{
> +  HANDLE htem, hret;
> +  DWORD err = 0;
> +
> +  /* Start with the descriptor, if specified by the caller and valid,
> +     otherwise open the null device.  */
> +  if (fd < 0)
> +    htem = INVALID_HANDLE_VALUE;
> +  else
> +    htem = (HANDLE)_get_osfhandle (fd);
> +
> +  /* Duplicate the handle and make it inheritable.  */
> +  if (DuplicateHandle (GetCurrentProcess (),
> +                    htem,
> +                    GetCurrentProcess (),
> +                    &hret,
> +                    0,
> +                    TRUE,
> +                    DUPLICATE_SAME_ACCESS) == FALSE)
> +    {
> +      /* If the original standard handle was invalid (happens, e.g.,
> +      in GUI programs), open the null device instead.  */
> +      if ((err = GetLastError ()) == ERROR_INVALID_HANDLE
> +       && use_std)
> +     {
> +       htem = CreateFile ("NUL", access,
> +                          FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
> +                          OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
> +       if (htem != INVALID_HANDLE_VALUE
> +           && DuplicateHandle (GetCurrentProcess (),
> +                               htem,
> +                               GetCurrentProcess (),
> +                               &hret,
> +                               0,
> +                               TRUE,
> +                               DUPLICATE_SAME_ACCESS) == FALSE)
> +         {
> +           err = GetLastError ();
> +           CloseHandle (htem);
> +           hret = INVALID_HANDLE_VALUE;
> +         }
> +     }
> +    }
> +
> +  if (hret == INVALID_HANDLE_VALUE)
> +    {
> +      switch (err)
> +     {
> +       case ERROR_NO_MORE_FILES:
> +         errno = EMFILE;
> +         break;
> +       case ERROR_INVALID_HANDLE:
> +       default:
> +         errno = EBADF;
> +         break;
> +     }
> +    }
> +
> +  return hret;
> +}
> +
> +/* A comparison function for sorting the environment.  */
> +static int
> +compenv (const void *a1, const void *a2)
> +{
> +  return stricmp (*((char**)a1), *((char**)a2));
> +}
> +
> +/* Convert the program's 'environ' array to a block of environment
> +   variables suitable to be passed to CreateProcess.  This is needed
> +   to ensure the child process inherits the up-to-date environment of
> +   the parent, including any variables inserted by the parent.  */
> +static void
> +prepare_envblk (char **envp, char **envblk)
> +{
> +  char **tmp;
> +  int size_needed;
> +  int envcnt;
> +  char *ptr;
> +
> +  for (envcnt = 0; envp[envcnt]; envcnt++)
> +    ;
> +
> +  tmp = scm_calloc ((envcnt + 1) * sizeof (*tmp));
> +
> +  for (envcnt = size_needed = 0; envp[envcnt]; envcnt++)
> +    {
> +      tmp[envcnt] = envp[envcnt];
> +      size_needed += strlen (envp[envcnt]) + 1;
> +    }
> +  size_needed++;
> +
> +  /* Windows likes its environment variables sorted.  */
> +  qsort ((void *) tmp, (size_t) envcnt, sizeof (char *), compenv);
> +
> +  /* CreateProcess needs the environment block as a linear array,
> +     where each variable is terminated by a null character, and the
> +     last one is terminated by 2 null characters.  */
> +  ptr = *envblk = scm_calloc (size_needed);
> +
> +  for (envcnt = 0; tmp[envcnt]; envcnt++)
> +    {
> +      strcpy (ptr, tmp[envcnt]);
> +      ptr += strlen (tmp[envcnt]) + 1;
> +    }
> +
> +  free (tmp);
> +}
> +
> +/* Find an executable PROGRAM on PATH, return result in malloc'ed
> +   storage.  If PROGRAM is /bin/sh, and no sh.exe was found on PATH,
> +   fall back on the Windows shell and set BIN_SH_REPLACED to non-zero.  */
> +static char *
> +lookup_cmd (const char *program, int *bin_sh_replaced)
> +{
> +  static const char *extensions[] = {
> +    ".exe", ".cmd", ".bat", "", ".com", NULL
> +  };
> +  int bin_sh_requested = 0;
> +  const char *path;
> +  char abs_name[MAX_PATH];
> +  DWORD abs_namelen;
> +  int i;
> +
> +  /* If they ask for the Unix system shell, try to find it on PATH.  */
> +  if (c_strcasecmp (program, "/bin/sh") == 0)
> +    {
> +      bin_sh_requested = 1;
> +      program = "sh.exe";
> +    }

Hmm.  I'm not so comfortable with such cleverness at this low level.
I'd prefer to have it higher in the stack, perhaps in the Scheme code in
(ice-9 popen) or maybe even in the application code.

> +
> +  /* If PROGRAM includes leading directories, the caller already did
> +     our job.  */
> +  if (strchr (program, '/') != NULL
> +      || strchr (program, '\\') != NULL)
> +    return scm_strdup (program);
> +
> +  /* Note: It is OK for getenv below to return NULL -- in that case,
> +     SearchPath will search in the directories whose list is specified
> +     by the system Registry.  */
> +  path = getenv ("PATH");
> +  for (i = 0; extensions[i]; i++)
> +    {
> +      abs_namelen = SearchPath (path, program, extensions[i],
> +                             MAX_PATH, abs_name, NULL);
> +      if (0 < abs_namelen && abs_namelen <= MAX_PATH)        /* found! */
> +     break;
> +    }

This search is not done correctly.  If I ask to run program "foo", and
"foo.cmd" comes early in the path and "foo.exe" comes late in the path,
it should be "foo.cmd" that gets run, not "foo.exe".

> +
> +  /* If they asked for /bin/sh and we didn't find it, fall back on the
> +     default Windows shell.  */
> +  if (abs_namelen <= 0 && bin_sh_requested)
> +    {
> +      const char *shell = getenv ("ComSpec");
> +
> +      if (!shell)
> +     shell = "C:\\Windows\\system32\\cmd.exe";
> +
> +      *bin_sh_replaced = 1;
> +      strcpy (abs_name, shell);
> +      abs_namelen = strlen (abs_name);
> +    }

Again, I'd prefer to put this at a higher level in the stack.  If the
user specifically asks to run "/bin/sh", we should not silently
substitute a different incompatible shell.

> +
> +  /* If not found, return the original PROGRAM name.  */
> +  if (abs_namelen <= 0 || abs_namelen > MAX_PATH)
> +    return scm_strdup (program);
> +
> +  return scm_strndup (abs_name, abs_namelen);
> +}
> +
> +/* Concatenate command-line arguments in argv[] into a single
> +   command-line string, while quoting arguments as needed.  The result
> +   is malloc'ed.  */
> +static char *
> +prepare_cmdline (const char *cmd, const char * const *argv, int 
> bin_sh_replaced)
> +{
> +  /* These characters should include anything that is special to _any_
> +     program, including both Windows and Unixy shells, and the
> +     widlcard expansion in startup code of a typical Windows app.  */
> +  const char need_quotes[] = " \t#;\"\'*?[]&|<>(){}$`^";

Hmm.  This seems unlikely to be correct in the general case.  If the
program being run is not a command shell, then I wouldn't expect it to
process backslash escapes at all.

Is there any way to launch a standard C program on Windows where the
individual 'argv' arguments are passed as separate strings, without
having to combine them together with quoting and escapes?

If a standard C program is compiled using Mingw, precisely how is this
combined string converted into that program's 'argv' argument?  How can
we ensure that those strings are passed reliably and losslessly, without
relying on heuristics that only work some of the time?

I do not expect that arbitrary byte sequences can be passed losslessly,
but we should do our best to ensure that arbitrary character strings are
passed losslessly.

> +  size_t cmdlen = 1; /* for terminating null */
> +  char *cmdline = scm_malloc (cmdlen);
> +  char *dst = cmdline;
> +  int cmd_exe_quoting = 0;
> +  int i;
> +  const char *p;
> +
> +  /* Are we constructing a command line for cmd.exe?  */
> +  if (bin_sh_replaced)
> +    cmd_exe_quoting = 1;
> +  else
> +    {
> +      for (p = cmd + strlen (cmd);
> +        p > cmd && p[-1] != '/' && p[-1] != '\\' && p[-1] != ':';
> +        p--)
> +     ;
> +      if (c_strcasecmp (p, "cmd.exe") == 0
> +       || c_strcasecmp (p, "cmd") == 0)
> +     cmd_exe_quoting = 1;
> +    }

If there's really no way to avoid quoting, then probably these
heuristics should be moved into 'lookup_cmd', and 'lookup_cmd' should
return the 'cmd_exe_quoting' flag instead of the 'bin_sh_replaced' flag,
so that it will do the right thing if the higher levels ask for
'cmd.exe' directly.

> +
> +  /* Initialize the command line to empty.  */
> +  *dst = '\0';
> +
> +  /* Append arguments, if any, from argv[]. */
> +  for (i = 0; argv[i]; i++)
> +    {
> +      const char *src = argv[i];
> +      size_t len;
> +      int quote_this = 0, n_backslashes = 0;
> +      int j;
> +
> +      /* Append the blank separator.  We don't do that for argv[0]
> +      because that is the command name (will end up in child's
> +      argv[0]), and is only recognized as such if there're no
> +      blanks before it.  */
> +      if (i > 0)
> +     *dst++ = ' ';
> +      len = dst - cmdline;
> +
> +      /* How much space is required for this argument?  */
> +      cmdlen += strlen (argv[i]) + 1; /* 1 for a blank separator */
> +      /* cmd.exe needs a different style of quoting: all the arguments
> +      beyond the /c switch are enclosed in an extra pair of quotes,
> +      and not otherwise quoted/escaped. */

Again, I think this should be handled at a higher level in the stack.
It would be good to have primitives at the C level that don't include
such unreliable heuristics.

> +      if (cmd_exe_quoting)
> +     {
> +       if (i == 2)
> +         cmdlen += 2;
> +     }
> +      else if (strpbrk (argv[i], need_quotes))
> +     {
> +       quote_this = 1;
> +       cmdlen += 2;
> +       for ( ; *src; src++)
> +         {
> +           /* An embedded quote needs to be escaped by a backslash.
> +              Any backslashes immediately preceding that quote need
> +              each one to be escaped by another backslash.  */
> +           if (*src == '\"')
> +             cmdlen += n_backslashes + 1;
> +           if (*src == '\\')
> +             n_backslashes++;
> +           else
> +             n_backslashes = 0;
> +         }
> +       /* If the closing quote we will add is preceded by
> +          backslashes, those backslashes need to be escaped.  */
> +       cmdlen += n_backslashes;
> +     }
> +
> +      /* Enlarge the command-line string as needed.  */
> +      cmdline = scm_realloc (cmdline, cmdlen);
> +      dst = cmdline + len;
> +
> +      if (i == 0
> +       && c_strcasecmp (argv[0], "/bin/sh") == 0
> +       && bin_sh_replaced)
> +     {
> +       strcpy (dst, "cmd.exe");
> +       dst += sizeof ("cmd.exe") - 1;
> +       continue;
> +     }
> +      if (i == 1 && bin_sh_replaced && strcmp (argv[1], "-c") == 0)
> +     {
> +       *dst++ = '/';
> +       *dst++ = 'c';
> +       *dst = '\0';
> +       continue;
> +     }
> +
> +      /* Add this argument, possibly quoted, to the command line.  */
> +      if (quote_this || (i == 2 && cmd_exe_quoting))
> +     *dst++ = '\"';
> +      for (src = argv[i]; *src; src++)
> +     {
> +       if (quote_this)
> +         {
> +           if (*src == '\"')
> +             for (j = n_backslashes + 1; j > 0; j--)
> +               *dst++ = '\\';
> +           if (*src == '\\')
> +             n_backslashes++;
> +           else
> +             n_backslashes = 0;
> +         }
> +       *dst++ = *src;
> +     }
> +      if (quote_this)
> +     {
> +       for (j = n_backslashes; j > 0; j--)
> +         *dst++ = '\\';
> +       *dst++ = '\"';
> +     }
> +      *dst = '\0';
> +    }
> +
> +  if (cmd_exe_quoting && i > 2)
> +    {
> +      /* One extra slot was already reserved when we enlarged cmdlen
> +      by 2 in the "if (cmd_exe_quoting)" clause above.  So we can
> +      safely append a closing quote.  */
> +      *dst++ = '\"';
> +      *dst = '\0';
> +    }
> +
> +  return cmdline;
> +}
> +
> +/* Start a child process running the program in EXEC_FILE with its
> +   standard input and output optionally redirected to a pipe.  ARGV is
> +   the array of command-line arguments to pass to the child.  P2C and
> +   C2P are 2 pipes for communicating with the child, and ERRFD is the
> +   standard error file descriptor to be inherited by the child.
> +   READING and WRITING, if non-zero, mean that the corresponding pipe
> +   will be used.
> +
> +   Return the PID of the child process, or -1 if couldn't start a
> +   process.  */
> +static intptr_t
> +start_child (const char *exec_file, char **argv,
> +          int reading, int c2p[2], int writing, int p2c[2], int errfd)
> +{
> +  HANDLE hin = INVALID_HANDLE_VALUE, hout = INVALID_HANDLE_VALUE;
> +  HANDLE herr = INVALID_HANDLE_VALUE;
> +  STARTUPINFO si;
> +  char *env_block = NULL;
> +  char *cmdline = NULL;
> +  PROCESS_INFORMATION pi;
> +  char *progfile, *p;
> +  int errno_save;
> +  intptr_t pid;
> +  int bin_sh_replaced = 0;
> +
> +  /* Prepare standard handles to be passed to the child process.  */
> +  hin = prepare_child_handle (p2c[0], !writing, GENERIC_READ);
> +  if (hin == INVALID_HANDLE_VALUE)
> +    return -1;
> +  hout = prepare_child_handle (c2p[1], !reading, GENERIC_WRITE);
> +  if (hout == INVALID_HANDLE_VALUE)
> +    return -1;
> +  herr = prepare_child_handle (errfd, 1, GENERIC_WRITE);
> +  if (herr == INVALID_HANDLE_VALUE)
> +    return -1;
> +
> +  /* Make sure the parent side of both pipes is not inherited.  This
> +     is required because gnulib's 'pipe' creates pipes whose both ends
> +     are inheritable, which is traditional on Posix (where pipe
> +     descriptors are implicitly duplicated by 'fork'), but wrong on
> +     Windows (where pipe handles need to be explicitly
> +     duplicated).  */
> +  if (writing)
> +    SetHandleInformation ((HANDLE)_get_osfhandle (p2c[1]),
> +                       HANDLE_FLAG_INHERIT, 0);
> +  if (reading)
> +    {
> +      SetHandleInformation ((HANDLE)_get_osfhandle (c2p[0]),
> +                         HANDLE_FLAG_INHERIT, 0);
> +      /* Gnulib's 'pipe' opens the pipe in binary mode, but we don't
> +      want to read text-mode input of subprocesses in binary more,
> +      because then we will get the ^M (a.k.a. "CR") characters we
> +      don't expect.  */
> +      _setmode (c2p[0], _O_TEXT);
> +    }

We should do the newline conversion elsewhere in Guile's I/O stack, so
that it is possible to do binary I/O with subprocesses.

> +
> +  /* Set up the startup info for the child, using the parent's as the
> +     starting point, and specify in it the redirected handles.  */
> +  GetStartupInfo (&si);
> +  si.dwFlags = STARTF_USESTDHANDLES;
> +  si.lpReserved = 0;
> +  si.cbReserved2 = 0;
> +  si.lpReserved2 = 0;
> +  si.hStdInput = hin;
> +  si.hStdOutput = hout;
> +  si.hStdError = herr;
> +
> +  /* Create the environment block for the child.  This is needed
> +     because the environment we have in 'environ' is not in the format
> +     expected by CreateProcess.  */
> +  prepare_envblk (environ, &env_block);
> +
> +  /* CreateProcess doesn't search PATH, so we must do that for it.  */
> +  progfile = lookup_cmd (exec_file, &bin_sh_replaced);
> +
> +  /* CreateProcess doesn't like forward slashes in the application
> +     file name.  */
> +  for (p = progfile; *p; p++)
> +    if (*p == '/')
> +      *p = '\\';
> +
> +  /* Construct the command line.  */
> +  cmdline = prepare_cmdline (exec_file, (const char * const *)argv,
> +                          bin_sh_replaced);
> +
> +  /* All set and ready to fly.  Launch the child process.  */
> +  if (!CreateProcess (progfile, cmdline, NULL, NULL, TRUE, 0, env_block, 
> NULL,
> +                   &si, &pi))
> +    {
> +      pid = -1;
> +
> +      /* Since we use Win32 APIs directly, we need to translate their
> +      errors to errno values by hand.  */
> +      switch (GetLastError ())
> +     {
> +       case ERROR_FILE_NOT_FOUND:
> +       case ERROR_PATH_NOT_FOUND:
> +       case ERROR_INVALID_DRIVE:
> +       case ERROR_BAD_PATHNAME:
> +         errno = ENOENT;
> +         break;
> +       case ERROR_ACCESS_DENIED:
> +         errno = EACCES;
> +         break;
> +       case ERROR_BAD_ENVIRONMENT:
> +         errno = E2BIG;
> +         break;
> +       case ERROR_BROKEN_PIPE:
> +         errno = EPIPE;
> +         break;
> +       case ERROR_INVALID_HANDLE:
> +         errno = EBADF;
> +         break;
> +       case ERROR_MAX_THRDS_REACHED:
> +         errno = EAGAIN;
> +         break;
> +       case ERROR_BAD_EXE_FORMAT:
> +       case ERROR_BAD_FORMAT:
> +       default:
> +         errno = ENOEXEC;
> +         break;
> +     }
> +    }
> +  else
> +    pid = (intptr_t)pi.hProcess;
> +
> +  errno_save = errno;
> +
> +  /* Free resources.  */
> +  free (progfile);
> +  free (cmdline);
> +  free (env_block);
> +  CloseHandle (hin);
> +  CloseHandle (hout);
> +  CloseHandle (herr);
> +  CloseHandle (pi.hThread);
> +
> +  /* Posix requires to call the shell if execvp fails to invoke EXEC_FILE.  
> */
> +  if (errno_save == ENOEXEC || errno_save == ENOENT)
> +    {
> +      const char *shell = getenv ("ComSpec");
> +
> +      if (!shell)
> +     shell = "cmd.exe";
> +
> +      if (c_strcasecmp (exec_file, shell) != 0)
> +     {
> +       argv[0] = (char *)exec_file;
> +       return start_child (shell, argv, reading, c2p, writing, p2c, errfd);
> +     }
> +    }

I think this fallback shouldn't be done.  We don't do it in the POSIX
version of scm_open_process, and we shouldn't do it here either.

What exactly does POSIX require?  Can you provide a reference?

Anyway, thanks again for working on this.

     Mark



reply via email to

[Prev in Thread] Current Thread [Next in Thread]