source: trunk/third/glib2/glib/gshell.c @ 20721

Revision 20721, 18.9 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20720, which included commits to RCS files with non-trunk default branches.
Line 
1/* gshell.c - Shell-related utilities
2 *
3 *  Copyright 2000 Red Hat, Inc.
4 *  g_execvpe implementation based on GNU libc execvp:
5 *   Copyright 1991, 92, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
6 *
7 * GLib is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * GLib is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with GLib; see the file COPYING.LIB.  If not, write
19 * to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23#include "config.h"
24
25#include <string.h>
26
27#include "glib.h"
28
29#ifdef _
30#warning "FIXME remove gettext hack"
31#endif
32
33#include "glibintl.h"
34
35GQuark
36g_shell_error_quark (void)
37{
38  static GQuark quark = 0;
39  if (quark == 0)
40    quark = g_quark_from_static_string ("g-shell-error-quark");
41  return quark;
42}
43
44/* Single quotes preserve the literal string exactly. escape
45 * sequences are not allowed; not even \' - if you want a '
46 * in the quoted text, you have to do something like 'foo'\''bar'
47 *
48 * Double quotes allow $ ` " \ and newline to be escaped with backslash.
49 * Otherwise double quotes preserve things literally.
50 */
51
52static gboolean
53unquote_string_inplace (gchar* str, gchar** end, GError** err)
54{
55  gchar* dest;
56  gchar* s;
57  gchar quote_char;
58 
59  g_return_val_if_fail(end != NULL, FALSE);
60  g_return_val_if_fail(err == NULL || *err == NULL, FALSE);
61  g_return_val_if_fail(str != NULL, FALSE);
62 
63  dest = s = str;
64
65  quote_char = *s;
66 
67  if (!(*s == '"' || *s == '\''))
68    {
69      if (err)
70        *err = g_error_new(G_SHELL_ERROR,
71                           G_SHELL_ERROR_BAD_QUOTING,
72                           _("Quoted text doesn't begin with a quotation mark"));
73      *end = str;
74      return FALSE;
75    }
76
77  /* Skip the initial quote mark */
78  ++s;
79
80  if (quote_char == '"')
81    {
82      while (*s)
83        {
84          g_assert(s > dest); /* loop invariant */
85     
86          switch (*s)
87            {
88            case '"':
89              /* End of the string, return now */
90              *dest = '\0';
91              ++s;
92              *end = s;
93              return TRUE;
94              break;
95
96            case '\\':
97              /* Possible escaped quote or \ */
98              ++s;
99              switch (*s)
100                {
101                case '"':
102                case '\\':
103                case '`':
104                case '$':
105                case '\n':
106                  *dest = *s;
107                  ++s;
108                  ++dest;
109                  break;
110
111                default:
112                  /* not an escaped char */
113                  *dest = '\\';
114                  ++dest;
115                  /* ++s already done. */
116                  break;
117                }
118              break;
119
120            default:
121              *dest = *s;
122              ++dest;
123              ++s;
124              break;
125            }
126
127          g_assert(s > dest); /* loop invariant */
128        }
129    }
130  else
131    {
132      while (*s)
133        {
134          g_assert(s > dest); /* loop invariant */
135         
136          if (*s == '\'')
137            {
138              /* End of the string, return now */
139              *dest = '\0';
140              ++s;
141              *end = s;
142              return TRUE;
143            }
144          else
145            {
146              *dest = *s;
147              ++dest;
148              ++s;
149            }
150
151          g_assert(s > dest); /* loop invariant */
152        }
153    }
154 
155  /* If we reach here this means the close quote was never encountered */
156
157  *dest = '\0';
158 
159  if (err)
160    *err = g_error_new(G_SHELL_ERROR,
161                       G_SHELL_ERROR_BAD_QUOTING,
162                       _("Unmatched quotation mark in command line or other shell-quoted text"));
163  *end = s;
164  return FALSE;
165}
166
167/**
168 * g_shell_quote:
169 * @unquoted_string: a literal string
170 *
171 * Quotes a string so that the shell (/bin/sh) will interpret the
172 * quoted string to mean @unquoted_string. If you pass a filename to
173 * the shell, for example, you should first quote it with this
174 * function.  The return value must be freed with g_free(). The
175 * quoting style used is undefined (single or double quotes may be
176 * used).
177 *
178 * Return value: quoted string
179 **/
180gchar*
181g_shell_quote (const gchar *unquoted_string)
182{
183  /* We always use single quotes, because the algorithm is cheesier.
184   * We could use double if we felt like it, that might be more
185   * human-readable.
186   */
187
188  const gchar *p;
189  GString *dest;
190
191  g_return_val_if_fail (unquoted_string != NULL, NULL);
192 
193  dest = g_string_new ("'");
194
195  p = unquoted_string;
196
197  /* could speed this up a lot by appending chunks of text at a
198   * time.
199   */
200  while (*p)
201    {
202      /* Replace literal ' with a close ', a \', and a open ' */
203      if (*p == '\'')
204        g_string_append (dest, "'\\''");
205      else
206        g_string_append_c (dest, *p);
207
208      ++p;
209    }
210
211  /* close the quote */
212  g_string_append_c (dest, '\'');
213 
214  return g_string_free (dest, FALSE);
215}
216
217/**
218 * g_shell_unquote:
219 * @quoted_string: shell-quoted string
220 * @error: error return location or NULL
221 *
222 * Unquotes a string as the shell (/bin/sh) would. Only handles
223 * quotes; if a string contains file globs, arithmetic operators,
224 * variables, backticks, redirections, or other special-to-the-shell
225 * features, the result will be different from the result a real shell
226 * would produce (the variables, backticks, etc. will be passed
227 * through literally instead of being expanded). This function is
228 * guaranteed to succeed if applied to the result of
229 * g_shell_quote(). If it fails, it returns %NULL and sets the
230 * error. The @quoted_string need not actually contain quoted or
231 * escaped text; g_shell_unquote() simply goes through the string and
232 * unquotes/unescapes anything that the shell would. Both single and
233 * double quotes are handled, as are escapes including escaped
234 * newlines. The return value must be freed with g_free(). Possible
235 * errors are in the #G_SHELL_ERROR domain.
236 *
237 * Shell quoting rules are a bit strange. Single quotes preserve the
238 * literal string exactly. escape sequences are not allowed; not even
239 * \' - if you want a ' in the quoted text, you have to do something
240 * like 'foo'\''bar'.  Double quotes allow $, `, ", \, and newline to
241 * be escaped with backslash. Otherwise double quotes preserve things
242 * literally.
243 *
244 * Return value: an unquoted string
245 **/
246gchar*
247g_shell_unquote (const gchar *quoted_string,
248                 GError     **error)
249{
250  gchar *unquoted;
251  gchar *end;
252  gchar *start;
253  GString *retval;
254 
255  g_return_val_if_fail (quoted_string != NULL, NULL);
256 
257  unquoted = g_strdup (quoted_string);
258
259  start = unquoted;
260  end = unquoted;
261  retval = g_string_new (NULL);
262
263  /* The loop allows cases such as
264   * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo'
265   */
266  while (*start)
267    {
268      /* Append all non-quoted chars, honoring backslash escape
269       */
270     
271      while (*start && !(*start == '"' || *start == '\''))
272        {
273          if (*start == '\\')
274            {
275              /* all characters can get escaped by backslash,
276               * except newline, which is removed if it follows
277               * a backslash outside of quotes
278               */
279             
280              ++start;
281              if (*start)
282                {
283                  if (*start != '\n')
284                    g_string_append_c (retval, *start);
285                  ++start;
286                }
287            }
288          else
289            {
290              g_string_append_c (retval, *start);
291              ++start;
292            }
293        }
294
295      if (*start)
296        {
297          if (!unquote_string_inplace (start, &end, error))
298            {
299              goto error;
300            }
301          else
302            {
303              g_string_append (retval, start);
304              start = end;
305            }
306        }
307    }
308
309  g_free (unquoted);
310  return g_string_free (retval, FALSE);
311 
312 error:
313  g_assert (error == NULL || *error != NULL);
314 
315  g_free (unquoted);
316  g_string_free (retval, TRUE);
317  return NULL;
318}
319
320/* g_parse_argv() does a semi-arbitrary weird subset of the way
321 * the shell parses a command line. We don't do variable expansion,
322 * don't understand that operators are tokens, don't do tilde expansion,
323 * don't do command substitution, no arithmetic expansion, IFS gets ignored,
324 * don't do filename globs, don't remove redirection stuff, etc.
325 *
326 * READ THE UNIX98 SPEC on "Shell Command Language" before changing
327 * the behavior of this code.
328 *
329 * Steps to parsing the argv string:
330 *
331 *  - tokenize the string (but since we ignore operators,
332 *    our tokenization may diverge from what the shell would do)
333 *    note that tokenization ignores the internals of a quoted
334 *    word and it always splits on spaces, not on IFS even
335 *    if we used IFS. We also ignore "end of input indicator"
336 *    (I guess this is control-D?)
337 *
338 *    Tokenization steps, from UNIX98 with operator stuff removed,
339 *    are:
340 *
341 *    1) "If the current character is backslash, single-quote or
342 *        double-quote (\, ' or ") and it is not quoted, it will affect
343 *        quoting for subsequent characters up to the end of the quoted
344 *        text. The rules for quoting are as described in Quoting
345 *        . During token recognition no substitutions will be actually
346 *        performed, and the result token will contain exactly the
347 *        characters that appear in the input (except for newline
348 *        character joining), unmodified, including any embedded or
349 *        enclosing quotes or substitution operators, between the quote
350 *        mark and the end of the quoted text. The token will not be
351 *        delimited by the end of the quoted field."
352 *
353 *    2) "If the current character is an unquoted newline character,
354 *        the current token will be delimited."
355 *
356 *    3) "If the current character is an unquoted blank character, any
357 *        token containing the previous character is delimited and the
358 *        current character will be discarded."
359 *
360 *    4) "If the previous character was part of a word, the current
361 *        character will be appended to that word."
362 *
363 *    5) "If the current character is a "#", it and all subsequent
364 *        characters up to, but excluding, the next newline character
365 *        will be discarded as a comment. The newline character that
366 *        ends the line is not considered part of the comment. The
367 *        "#" starts a comment only when it is at the beginning of a
368 *        token. Since the search for the end-of-comment does not
369 *        consider an escaped newline character specially, a comment
370 *        cannot be continued to the next line."
371 *
372 *    6) "The current character will be used as the start of a new word."
373 *
374 *
375 *  - for each token (word), perform portions of word expansion, namely
376 *    field splitting (using default whitespace IFS) and quote
377 *    removal.  Field splitting may increase the number of words.
378 *    Quote removal does not increase the number of words.
379 *
380 *   "If the complete expansion appropriate for a word results in an
381 *   empty field, that empty field will be deleted from the list of
382 *   fields that form the completely expanded command, unless the
383 *   original word contained single-quote or double-quote characters."
384 *    - UNIX98 spec
385 *
386 *
387 */
388
389static inline void
390ensure_token (GString **token)
391{
392  if (*token == NULL)
393    *token = g_string_new (NULL);
394}
395
396static void
397delimit_token (GString **token,
398               GSList **retval)
399{
400  if (*token == NULL)
401    return;
402
403  *retval = g_slist_prepend (*retval, g_string_free (*token, FALSE));
404
405  *token = NULL;
406}
407
408static GSList*
409tokenize_command_line (const gchar *command_line,
410                       GError **error)
411{
412  gchar current_quote;
413  const gchar *p;
414  GString *current_token = NULL;
415  GSList *retval = NULL;
416  gboolean quoted;;
417
418  current_quote = '\0';
419  quoted = FALSE;
420  p = command_line;
421 
422  while (*p)
423    {
424      if (current_quote == '\\')
425        {
426          if (*p == '\n')
427            {
428              /* we append nothing; backslash-newline become nothing */
429            }
430          else
431            {
432              /* we append the backslash and the current char,
433               * to be interpreted later after tokenization
434               */
435              ensure_token (&current_token);
436              g_string_append_c (current_token, '\\');
437              g_string_append_c (current_token, *p);
438            }
439
440          current_quote = '\0';
441        }
442      else if (current_quote == '#')
443        {
444          /* Discard up to and including next newline */
445          while (*p && *p != '\n')
446            ++p;
447
448          current_quote = '\0';
449         
450          if (*p == '\0')
451            break;
452        }
453      else if (current_quote)
454        {
455          if (*p == current_quote &&
456              /* check that it isn't an escaped double quote */
457              !(current_quote == '"' && quoted))
458            {
459              /* close the quote */
460              current_quote = '\0';
461            }
462
463          /* Everything inside quotes, and the close quote,
464           * gets appended literally.
465           */
466
467          ensure_token (&current_token);
468          g_string_append_c (current_token, *p);
469        }
470      else
471        {
472          switch (*p)
473            {
474            case '\n':
475              delimit_token (&current_token, &retval);
476              break;
477
478            case ' ':
479            case '\t':
480              /* If the current token contains the previous char, delimit
481               * the current token. A nonzero length
482               * token should always contain the previous char.
483               */
484              if (current_token &&
485                  current_token->len > 0)
486                {
487                  delimit_token (&current_token, &retval);
488                }
489             
490              /* discard all unquoted blanks (don't add them to a token) */
491              break;
492
493
494              /* single/double quotes are appended to the token,
495               * escapes are maybe appended next time through the loop,
496               * comment chars are never appended.
497               */
498             
499            case '\'':
500            case '"':
501              ensure_token (&current_token);
502              g_string_append_c (current_token, *p);
503
504              /* FALL THRU */
505             
506            case '#':
507            case '\\':
508              current_quote = *p;
509              break;
510
511            default:
512              /* Combines rules 4) and 6) - if we have a token, append to it,
513               * otherwise create a new token.
514               */
515              ensure_token (&current_token);
516              g_string_append_c (current_token, *p);
517              break;
518            }
519        }
520
521      /* We need to count consecutive backslashes mod 2,
522       * to detect escaped doublequotes.
523       */
524      if (*p != '\\')
525        quoted = FALSE;
526      else
527        quoted = !quoted;
528
529      ++p;
530    }
531
532  delimit_token (&current_token, &retval);
533
534  if (current_quote)
535    {
536      if (current_quote == '\\')
537        g_set_error (error,
538                     G_SHELL_ERROR,
539                     G_SHELL_ERROR_BAD_QUOTING,
540                     _("Text ended just after a '\\' character."
541                       " (The text was '%s')"),
542                     command_line);
543      else
544        g_set_error (error,
545                     G_SHELL_ERROR,
546                     G_SHELL_ERROR_BAD_QUOTING,
547                     _("Text ended before matching quote was found for %c."
548                       " (The text was '%s')"),
549                     current_quote, command_line);
550     
551      goto error;
552    }
553
554  if (retval == NULL)
555    {
556      g_set_error (error,
557                   G_SHELL_ERROR,
558                   G_SHELL_ERROR_EMPTY_STRING,
559                   _("Text was empty (or contained only whitespace)"));
560
561      goto error;
562    }
563 
564  /* we appended backward */
565  retval = g_slist_reverse (retval);
566
567  return retval;
568
569 error:
570  g_assert (error == NULL || *error != NULL);
571 
572  if (retval)
573    {
574      g_slist_foreach (retval, (GFunc)g_free, NULL);
575      g_slist_free (retval);
576    }
577
578  return NULL;
579}
580
581/**
582 * g_shell_parse_argv:
583 * @command_line: command line to parse
584 * @argcp: return location for number of args
585 * @argvp: return location for array of args
586 * @error: return location for error
587 *
588 * Parses a command line into an argument vector, in much the same way
589 * the shell would, but without many of the expansions the shell would
590 * perform (variable expansion, globs, operators, filename expansion,
591 * etc. are not supported). The results are defined to be the same as
592 * those you would get from a UNIX98 /bin/sh, as long as the input
593 * contains none of the unsupported shell expansions. If the input
594 * does contain such expansions, they are passed through
595 * literally. Possible errors are those from the #G_SHELL_ERROR
596 * domain. Free the returned vector with g_strfreev().
597 *
598 * Return value: %TRUE on success, %FALSE if error set
599 **/
600gboolean
601g_shell_parse_argv (const gchar *command_line,
602                    gint        *argcp,
603                    gchar     ***argvp,
604                    GError     **error)
605{
606  /* Code based on poptParseArgvString() from libpopt */
607  gint argc = 0;
608  gchar **argv = NULL;
609  GSList *tokens = NULL;
610  gint i;
611  GSList *tmp_list;
612 
613  g_return_val_if_fail (command_line != NULL, FALSE);
614
615  tokens = tokenize_command_line (command_line, error);
616  if (tokens == NULL)
617    return FALSE;
618
619  /* Because we can't have introduced any new blank space into the
620   * tokens (we didn't do any new expansions), we don't need to
621   * perform field splitting. If we were going to honor IFS or do any
622   * expansions, we would have to do field splitting on each word
623   * here. Also, if we were going to do any expansion we would need to
624   * remove any zero-length words that didn't contain quotes
625   * originally; but since there's no expansion we know all words have
626   * nonzero length, unless they contain quotes.
627   *
628   * So, we simply remove quotes, and don't do any field splitting or
629   * empty word removal, since we know there was no way to introduce
630   * such things.
631   */
632
633  argc = g_slist_length (tokens);
634  argv = g_new0 (gchar*, argc + 1);
635  i = 0;
636  tmp_list = tokens;
637  while (tmp_list)
638    {
639      argv[i] = g_shell_unquote (tmp_list->data, error);
640
641      /* Since we already checked that quotes matched up in the
642       * tokenizer, this shouldn't be possible to reach I guess.
643       */
644      if (argv[i] == NULL)
645        goto failed;
646
647      tmp_list = g_slist_next (tmp_list);
648      ++i;
649    }
650 
651  g_slist_foreach (tokens, (GFunc)g_free, NULL);
652  g_slist_free (tokens);
653 
654  if (argcp)
655    *argcp = argc;
656
657  if (argvp)
658    *argvp = argv;
659  else
660    g_strfreev (argv);
661
662  return TRUE;
663
664 failed:
665
666  g_assert (error == NULL || *error != NULL);
667  g_strfreev (argv);
668  g_slist_foreach (tokens, (GFunc) g_free, NULL);
669  g_slist_free (tokens);
670 
671  return FALSE;
672}
Note: See TracBrowser for help on using the repository browser.