source: trunk/third/glib2/glib/gconvert.c @ 20721

Revision 20721, 42.8 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20720, which included commits to RCS files with non-trunk default branches.
Line 
1/* GLIB - Library of useful routines for C programming
2 *
3 * gconvert.c: Convert between character sets using iconv
4 * Copyright Red Hat Inc., 2000
5 * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23#include "config.h"
24
25#include <iconv.h>
26#include <errno.h>
27#include <stdio.h>
28#include <string.h>
29#include <stdlib.h>
30
31#include "glib.h"
32#include "gprintfint.h"
33#include "gthreadinit.h"
34
35#ifdef G_PLATFORM_WIN32
36#define STRICT
37#include <windows.h>
38#undef STRICT
39#endif
40
41#include "glibintl.h"
42
43#if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H)
44#error GNU libiconv in use but included iconv.h not from libiconv
45#endif
46#if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H)
47#error GNU libiconv not in use but included iconv.h is from libiconv
48#endif
49
50GQuark
51g_convert_error_quark (void)
52{
53  static GQuark quark;
54  if (!quark)
55    quark = g_quark_from_static_string ("g_convert_error");
56
57  return quark;
58}
59
60static gboolean
61try_conversion (const char *to_codeset,
62                const char *from_codeset,
63                iconv_t    *cd)
64{
65  *cd = iconv_open (to_codeset, from_codeset);
66
67  if (*cd == (iconv_t)-1 && errno == EINVAL)
68    return FALSE;
69  else
70    return TRUE;
71}
72
73static gboolean
74try_to_aliases (const char **to_aliases,
75                const char  *from_codeset,
76                iconv_t     *cd)
77{
78  if (to_aliases)
79    {
80      const char **p = to_aliases;
81      while (*p)
82        {
83          if (try_conversion (*p, from_codeset, cd))
84            return TRUE;
85
86          p++;
87        }
88    }
89
90  return FALSE;
91}
92
93extern const char **_g_charset_get_aliases (const char *canonical_name);
94
95/**
96 * g_iconv_open:
97 * @to_codeset: destination codeset
98 * @from_codeset: source codeset
99 *
100 * Same as the standard UNIX routine iconv_open(), but
101 * may be implemented via libiconv on UNIX flavors that lack
102 * a native implementation.
103 *
104 * GLib provides g_convert() and g_locale_to_utf8() which are likely
105 * more convenient than the raw iconv wrappers.
106 *
107 * Return value: a "conversion descriptor", or (GIConv)-1 if
108 *  opening the converter failed.
109 **/
110GIConv
111g_iconv_open (const gchar  *to_codeset,
112              const gchar  *from_codeset)
113{
114  iconv_t cd;
115 
116  if (!try_conversion (to_codeset, from_codeset, &cd))
117    {
118      const char **to_aliases = _g_charset_get_aliases (to_codeset);
119      const char **from_aliases = _g_charset_get_aliases (from_codeset);
120
121      if (from_aliases)
122        {
123          const char **p = from_aliases;
124          while (*p)
125            {
126              if (try_conversion (to_codeset, *p, &cd))
127                goto out;
128
129              if (try_to_aliases (to_aliases, *p, &cd))
130                goto out;
131
132              p++;
133            }
134        }
135
136      if (try_to_aliases (to_aliases, from_codeset, &cd))
137        goto out;
138    }
139
140 out:
141  return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
142}
143
144/**
145 * g_iconv:
146 * @converter: conversion descriptor from g_iconv_open()
147 * @inbuf: bytes to convert
148 * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
149 * @outbuf: converted output bytes
150 * @outbytes_left: inout parameter, bytes available to fill in @outbuf
151 *
152 * Same as the standard UNIX routine iconv(), but
153 * may be implemented via libiconv on UNIX flavors that lack
154 * a native implementation.
155 *
156 * GLib provides g_convert() and g_locale_to_utf8() which are likely
157 * more convenient than the raw iconv wrappers.
158 *
159 * Return value: count of non-reversible conversions, or -1 on error
160 **/
161size_t
162g_iconv (GIConv   converter,
163         gchar  **inbuf,
164         gsize   *inbytes_left,
165         gchar  **outbuf,
166         gsize   *outbytes_left)
167{
168  iconv_t cd = (iconv_t)converter;
169
170  return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
171}
172
173/**
174 * g_iconv_close:
175 * @converter: a conversion descriptor from g_iconv_open()
176 *
177 * Same as the standard UNIX routine iconv_close(), but
178 * may be implemented via libiconv on UNIX flavors that lack
179 * a native implementation. Should be called to clean up
180 * the conversion descriptor from g_iconv_open() when
181 * you are done converting things.
182 *
183 * GLib provides g_convert() and g_locale_to_utf8() which are likely
184 * more convenient than the raw iconv wrappers.
185 *
186 * Return value: -1 on error, 0 on success
187 **/
188gint
189g_iconv_close (GIConv converter)
190{
191  iconv_t cd = (iconv_t)converter;
192
193  return iconv_close (cd);
194}
195
196
197#define ICONV_CACHE_SIZE   (16)
198
199struct _iconv_cache_bucket {
200  gchar *key;
201  guint32 refcount;
202  gboolean used;
203  GIConv cd;
204};
205
206static GList *iconv_cache_list;
207static GHashTable *iconv_cache;
208static GHashTable *iconv_open_hash;
209static guint iconv_cache_size = 0;
210G_LOCK_DEFINE_STATIC (iconv_cache_lock);
211
212/* caller *must* hold the iconv_cache_lock */
213static void
214iconv_cache_init (void)
215{
216  static gboolean initialized = FALSE;
217 
218  if (initialized)
219    return;
220 
221  iconv_cache_list = NULL;
222  iconv_cache = g_hash_table_new (g_str_hash, g_str_equal);
223  iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
224 
225  initialized = TRUE;
226}
227
228
229/**
230 * iconv_cache_bucket_new:
231 * @key: cache key
232 * @cd: iconv descriptor
233 *
234 * Creates a new cache bucket, inserts it into the cache and
235 * increments the cache size.
236 *
237 * Returns a pointer to the newly allocated cache bucket.
238 **/
239static struct _iconv_cache_bucket *
240iconv_cache_bucket_new (const gchar *key, GIConv cd)
241{
242  struct _iconv_cache_bucket *bucket;
243 
244  bucket = g_new (struct _iconv_cache_bucket, 1);
245  bucket->key = g_strdup (key);
246  bucket->refcount = 1;
247  bucket->used = TRUE;
248  bucket->cd = cd;
249 
250  g_hash_table_insert (iconv_cache, bucket->key, bucket);
251 
252  /* FIXME: if we sorted the list so items with few refcounts were
253     first, then we could expire them faster in iconv_cache_expire_unused () */
254  iconv_cache_list = g_list_prepend (iconv_cache_list, bucket);
255 
256  iconv_cache_size++;
257 
258  return bucket;
259}
260
261
262/**
263 * iconv_cache_bucket_expire:
264 * @node: cache bucket's node
265 * @bucket: cache bucket
266 *
267 * Expires a single cache bucket @bucket. This should only ever be
268 * called on a bucket that currently has no used iconv descriptors
269 * open.
270 *
271 * @node is not a required argument. If @node is not supplied, we
272 * search for it ourselves.
273 **/
274static void
275iconv_cache_bucket_expire (GList *node, struct _iconv_cache_bucket *bucket)
276{
277  g_hash_table_remove (iconv_cache, bucket->key);
278 
279  if (node == NULL)
280    node = g_list_find (iconv_cache_list, bucket);
281 
282  g_assert (node != NULL);
283 
284  if (node->prev)
285    {
286      node->prev->next = node->next;
287      if (node->next)
288        node->next->prev = node->prev;
289    }
290  else
291    {
292      iconv_cache_list = node->next;
293      if (node->next)
294        node->next->prev = NULL;
295    }
296 
297  g_list_free_1 (node);
298 
299  g_free (bucket->key);
300  g_iconv_close (bucket->cd);
301  g_free (bucket);
302 
303  iconv_cache_size--;
304}
305
306
307/**
308 * iconv_cache_expire_unused:
309 *
310 * Expires as many unused cache buckets as it needs to in order to get
311 * the total number of buckets < ICONV_CACHE_SIZE.
312 **/
313static void
314iconv_cache_expire_unused (void)
315{
316  struct _iconv_cache_bucket *bucket;
317  GList *node, *next;
318 
319  node = iconv_cache_list;
320  while (node && iconv_cache_size >= ICONV_CACHE_SIZE)
321    {
322      next = node->next;
323     
324      bucket = node->data;
325      if (bucket->refcount == 0)
326        iconv_cache_bucket_expire (node, bucket);
327     
328      node = next;
329    }
330}
331
332static GIConv
333open_converter (const gchar *to_codeset,
334                const gchar *from_codeset,
335                GError     **error)
336{
337  struct _iconv_cache_bucket *bucket;
338  gchar *key;
339  GIConv cd;
340 
341  /* create our key */
342  key = g_alloca (strlen (from_codeset) + strlen (to_codeset) + 2);
343  _g_sprintf (key, "%s:%s", from_codeset, to_codeset);
344 
345  G_LOCK (iconv_cache_lock);
346 
347  /* make sure the cache has been initialized */
348  iconv_cache_init ();
349 
350  bucket = g_hash_table_lookup (iconv_cache, key);
351  if (bucket)
352    {
353      if (bucket->used)
354        {
355          cd = g_iconv_open (to_codeset, from_codeset);
356          if (cd == (GIConv) -1)
357            goto error;
358        }
359      else
360        {
361          /* Apparently iconv on Solaris <= 7 segfaults if you pass in
362           * NULL for anything but inbuf; work around that. (NULL outbuf
363           * or NULL *outbuf is allowed by Unix98.)
364           */
365          gsize inbytes_left = 0;
366          gchar *outbuf = NULL;
367          gsize outbytes_left = 0;
368               
369          cd = bucket->cd;
370          bucket->used = TRUE;
371         
372          /* reset the descriptor */
373          g_iconv (cd, NULL, &inbytes_left, &outbuf, &outbytes_left);
374        }
375     
376      bucket->refcount++;
377    }
378  else
379    {
380      cd = g_iconv_open (to_codeset, from_codeset);
381      if (cd == (GIConv) -1)
382        goto error;
383     
384      iconv_cache_expire_unused ();
385     
386      bucket = iconv_cache_bucket_new (key, cd);
387    }
388 
389  g_hash_table_insert (iconv_open_hash, cd, bucket->key);
390 
391  G_UNLOCK (iconv_cache_lock);
392 
393  return cd;
394 
395 error:
396 
397  G_UNLOCK (iconv_cache_lock);
398 
399  /* Something went wrong.  */
400  if (errno == EINVAL)
401    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
402                 _("Conversion from character set '%s' to '%s' is not supported"),
403                 from_codeset, to_codeset);
404  else
405    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
406                 _("Could not open converter from '%s' to '%s'"),
407                 from_codeset, to_codeset);
408 
409  return cd;
410}
411
412static int
413close_converter (GIConv converter)
414{
415  struct _iconv_cache_bucket *bucket;
416  const gchar *key;
417  GIConv cd;
418 
419  cd = converter;
420 
421  if (cd == (GIConv) -1)
422    return 0;
423 
424  G_LOCK (iconv_cache_lock);
425 
426  key = g_hash_table_lookup (iconv_open_hash, cd);
427  if (key)
428    {
429      g_hash_table_remove (iconv_open_hash, cd);
430     
431      bucket = g_hash_table_lookup (iconv_cache, key);
432      g_assert (bucket);
433     
434      bucket->refcount--;
435     
436      if (cd == bucket->cd)
437        bucket->used = FALSE;
438      else
439        g_iconv_close (cd);
440     
441      if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE)
442        {
443          /* expire this cache bucket */
444          iconv_cache_bucket_expire (NULL, bucket);
445        }
446    }
447  else
448    {
449      G_UNLOCK (iconv_cache_lock);
450     
451      g_warning ("This iconv context wasn't opened using open_converter");
452     
453      return g_iconv_close (converter);
454    }
455 
456  G_UNLOCK (iconv_cache_lock);
457 
458  return 0;
459}
460
461
462/**
463 * g_convert:
464 * @str:           the string to convert
465 * @len:           the length of the string
466 * @to_codeset:    name of character set into which to convert @str
467 * @from_codeset:  character set of @str.
468 * @bytes_read:    location to store the number of bytes in the
469 *                 input string that were successfully converted, or %NULL.
470 *                 Even if the conversion was successful, this may be
471 *                 less than @len if there were partial characters
472 *                 at the end of the input. If the error
473 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
474 *                 stored will the byte offset after the last valid
475 *                 input sequence.
476 * @bytes_written: the number of bytes stored in the output buffer (not
477 *                 including the terminating nul).
478 * @error:         location to store the error occuring, or %NULL to ignore
479 *                 errors. Any of the errors in #GConvertError may occur.
480 *
481 * Converts a string from one character set to another.
482 *
483 * Return value: If the conversion was successful, a newly allocated
484 *               nul-terminated string, which must be freed with
485 *               g_free(). Otherwise %NULL and @error will be set.
486 **/
487gchar*
488g_convert (const gchar *str,
489           gssize       len, 
490           const gchar *to_codeset,
491           const gchar *from_codeset,
492           gsize       *bytes_read,
493           gsize       *bytes_written,
494           GError     **error)
495{
496  gchar *res;
497  GIConv cd;
498 
499  g_return_val_if_fail (str != NULL, NULL);
500  g_return_val_if_fail (to_codeset != NULL, NULL);
501  g_return_val_if_fail (from_codeset != NULL, NULL);
502 
503  cd = open_converter (to_codeset, from_codeset, error);
504
505  if (cd == (GIConv) -1)
506    {
507      if (bytes_read)
508        *bytes_read = 0;
509     
510      if (bytes_written)
511        *bytes_written = 0;
512     
513      return NULL;
514    }
515
516  res = g_convert_with_iconv (str, len, cd,
517                              bytes_read, bytes_written,
518                              error);
519 
520  close_converter (cd);
521
522  return res;
523}
524
525/**
526 * g_convert_with_iconv:
527 * @str:           the string to convert
528 * @len:           the length of the string
529 * @converter:     conversion descriptor from g_iconv_open()
530 * @bytes_read:    location to store the number of bytes in the
531 *                 input string that were successfully converted, or %NULL.
532 *                 Even if the conversion was successful, this may be
533 *                 less than @len if there were partial characters
534 *                 at the end of the input. If the error
535 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
536 *                 stored will the byte offset after the last valid
537 *                 input sequence.
538 * @bytes_written: the number of bytes stored in the output buffer (not
539 *                 including the terminating nul).
540 * @error:         location to store the error occuring, or %NULL to ignore
541 *                 errors. Any of the errors in #GConvertError may occur.
542 *
543 * Converts a string from one character set to another.
544 *
545 * Return value: If the conversion was successful, a newly allocated
546 *               nul-terminated string, which must be freed with
547 *               g_free(). Otherwise %NULL and @error will be set.
548 **/
549gchar*
550g_convert_with_iconv (const gchar *str,
551                      gssize       len,
552                      GIConv       converter,
553                      gsize       *bytes_read,
554                      gsize       *bytes_written,
555                      GError     **error)
556{
557  gchar *dest;
558  gchar *outp;
559  const gchar *p;
560  gsize inbytes_remaining;
561  gsize outbytes_remaining;
562  gsize err;
563  gsize outbuf_size;
564  gboolean have_error = FALSE;
565 
566  g_return_val_if_fail (str != NULL, NULL);
567  g_return_val_if_fail (converter != (GIConv) -1, NULL);
568     
569  if (len < 0)
570    len = strlen (str);
571
572  p = str;
573  inbytes_remaining = len;
574  outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
575 
576  outbytes_remaining = outbuf_size - 1; /* -1 for nul */
577  outp = dest = g_malloc (outbuf_size);
578
579 again:
580 
581  err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
582
583  if (err == (size_t) -1)
584    {
585      switch (errno)
586        {
587        case EINVAL:
588          /* Incomplete text, do not report an error */
589          break;
590        case E2BIG:
591          {
592            size_t used = outp - dest;
593
594            outbuf_size *= 2;
595            dest = g_realloc (dest, outbuf_size);
596               
597            outp = dest + used;
598            outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
599
600            goto again;
601          }
602        case EILSEQ:
603          g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
604                       _("Invalid byte sequence in conversion input"));
605          have_error = TRUE;
606          break;
607        default:
608          g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
609                       _("Error during conversion: %s"),
610                       g_strerror (errno));
611          have_error = TRUE;
612          break;
613        }
614    }
615
616  *outp = '\0';
617 
618  if (bytes_read)
619    *bytes_read = p - str;
620  else
621    {
622      if ((p - str) != len)
623        {
624          if (!have_error)
625            {
626              g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
627                           _("Partial character sequence at end of input"));
628              have_error = TRUE;
629            }
630        }
631    }
632
633  if (bytes_written)
634    *bytes_written = outp - dest;       /* Doesn't include '\0' */
635
636  if (have_error)
637    {
638      g_free (dest);
639      return NULL;
640    }
641  else
642    return dest;
643}
644
645/**
646 * g_convert_with_fallback:
647 * @str:          the string to convert
648 * @len:          the length of the string
649 * @to_codeset:   name of character set into which to convert @str
650 * @from_codeset: character set of @str.
651 * @fallback:     UTF-8 string to use in place of character not
652 *                present in the target encoding. (This must be
653 *                in the target encoding), if %NULL, characters
654 *                not in the target encoding will be represented
655 *                as Unicode escapes \uxxxx or \Uxxxxyyyy.
656 * @bytes_read:   location to store the number of bytes in the
657 *                input string that were successfully converted, or %NULL.
658 *                Even if the conversion was successful, this may be
659 *                less than @len if there were partial characters
660 *                at the end of the input.
661 * @bytes_written: the number of bytes stored in the output buffer (not
662 *                including the terminating nul).
663 * @error:        location to store the error occuring, or %NULL to ignore
664 *                errors. Any of the errors in #GConvertError may occur.
665 *
666 * Converts a string from one character set to another, possibly
667 * including fallback sequences for characters not representable
668 * in the output. Note that it is not guaranteed that the specification
669 * for the fallback sequences in @fallback will be honored. Some
670 * systems may do a approximate conversion from @from_codeset
671 * to @to_codeset in their iconv() functions,
672 * in which case GLib will simply return that approximate conversion.
673 *
674 * Return value: If the conversion was successful, a newly allocated
675 *               nul-terminated string, which must be freed with
676 *               g_free(). Otherwise %NULL and @error will be set.
677 **/
678gchar*
679g_convert_with_fallback (const gchar *str,
680                         gssize       len,   
681                         const gchar *to_codeset,
682                         const gchar *from_codeset,
683                         gchar       *fallback,
684                         gsize       *bytes_read,
685                         gsize       *bytes_written,
686                         GError     **error)
687{
688  gchar *utf8;
689  gchar *dest;
690  gchar *outp;
691  const gchar *insert_str = NULL;
692  const gchar *p;
693  gsize inbytes_remaining;   
694  const gchar *save_p = NULL;
695  gsize save_inbytes = 0;
696  gsize outbytes_remaining;
697  gsize err;
698  GIConv cd;
699  gsize outbuf_size;
700  gboolean have_error = FALSE;
701  gboolean done = FALSE;
702
703  GError *local_error = NULL;
704 
705  g_return_val_if_fail (str != NULL, NULL);
706  g_return_val_if_fail (to_codeset != NULL, NULL);
707  g_return_val_if_fail (from_codeset != NULL, NULL);
708     
709  if (len < 0)
710    len = strlen (str);
711 
712  /* Try an exact conversion; we only proceed if this fails
713   * due to an illegal sequence in the input string.
714   */
715  dest = g_convert (str, len, to_codeset, from_codeset,
716                    bytes_read, bytes_written, &local_error);
717  if (!local_error)
718    return dest;
719
720  if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
721    {
722      g_propagate_error (error, local_error);
723      return NULL;
724    }
725  else
726    g_error_free (local_error);
727
728  local_error = NULL;
729 
730  /* No go; to proceed, we need a converter from "UTF-8" to
731   * to_codeset, and the string as UTF-8.
732   */
733  cd = open_converter (to_codeset, "UTF-8", error);
734  if (cd == (GIConv) -1)
735    {
736      if (bytes_read)
737        *bytes_read = 0;
738     
739      if (bytes_written)
740        *bytes_written = 0;
741     
742      return NULL;
743    }
744
745  utf8 = g_convert (str, len, "UTF-8", from_codeset,
746                    bytes_read, &inbytes_remaining, error);
747  if (!utf8)
748    {
749      close_converter (cd);
750      if (bytes_written)
751        *bytes_written = 0;
752      return NULL;
753    }
754
755  /* Now the heart of the code. We loop through the UTF-8 string, and
756   * whenever we hit an offending character, we form fallback, convert
757   * the fallback to the target codeset, and then go back to
758   * converting the original string after finishing with the fallback.
759   *
760   * The variables save_p and save_inbytes store the input state
761   * for the original string while we are converting the fallback
762   */
763  p = utf8;
764
765  outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
766  outbytes_remaining = outbuf_size - 1; /* -1 for nul */
767  outp = dest = g_malloc (outbuf_size);
768
769  while (!done && !have_error)
770    {
771      size_t inbytes_tmp = inbytes_remaining;
772      err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
773      inbytes_remaining = inbytes_tmp;
774
775      if (err == (size_t) -1)
776        {
777          switch (errno)
778            {
779            case EINVAL:
780              g_assert_not_reached();
781              break;
782            case E2BIG:
783              {
784                size_t used = outp - dest;
785
786                outbuf_size *= 2;
787                dest = g_realloc (dest, outbuf_size);
788               
789                outp = dest + used;
790                outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
791               
792                break;
793              }
794            case EILSEQ:
795              if (save_p)
796                {
797                  /* Error converting fallback string - fatal
798                   */
799                  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
800                               _("Cannot convert fallback '%s' to codeset '%s'"),
801                               insert_str, to_codeset);
802                  have_error = TRUE;
803                  break;
804                }
805              else
806                {
807                  if (!fallback)
808                    {
809                      gunichar ch = g_utf8_get_char (p);
810                      insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
811                                                    ch);
812                    }
813                  else
814                    insert_str = fallback;
815                 
816                  save_p = g_utf8_next_char (p);
817                  save_inbytes = inbytes_remaining - (save_p - p);
818                  p = insert_str;
819                  inbytes_remaining = strlen (p);
820                }
821              break;
822            default:
823              g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
824                           _("Error during conversion: %s"),
825                           g_strerror (errno));
826              have_error = TRUE;
827              break;
828            }
829        }
830      else
831        {
832          if (save_p)
833            {
834              if (!fallback)
835                g_free ((gchar *)insert_str);
836              p = save_p;
837              inbytes_remaining = save_inbytes;
838              save_p = NULL;
839            }
840          else
841            done = TRUE;
842        }
843    }
844
845  /* Cleanup
846   */
847  *outp = '\0';
848 
849  close_converter (cd);
850
851  if (bytes_written)
852    *bytes_written = outp - dest;       /* Doesn't include '\0' */
853
854  g_free (utf8);
855
856  if (have_error)
857    {
858      if (save_p && !fallback)
859        g_free ((gchar *)insert_str);
860      g_free (dest);
861      return NULL;
862    }
863  else
864    return dest;
865}
866
867/*
868 * g_locale_to_utf8
869 *
870 *
871 */
872
873static gchar *
874strdup_len (const gchar *string,
875            gssize       len,
876            gsize       *bytes_written,
877            gsize       *bytes_read,
878            GError      **error)
879         
880{
881  gsize real_len;
882
883  if (!g_utf8_validate (string, len, NULL))
884    {
885      if (bytes_read)
886        *bytes_read = 0;
887      if (bytes_written)
888        *bytes_written = 0;
889
890      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
891                   _("Invalid byte sequence in conversion input"));
892      return NULL;
893    }
894 
895  if (len < 0)
896    real_len = strlen (string);
897  else
898    {
899      real_len = 0;
900     
901      while (real_len < len && string[real_len])
902        real_len++;
903    }
904 
905  if (bytes_read)
906    *bytes_read = real_len;
907  if (bytes_written)
908    *bytes_written = real_len;
909
910  return g_strndup (string, real_len);
911}
912
913/**
914 * g_locale_to_utf8:
915 * @opsysstring:   a string in the encoding of the current locale
916 * @len:           the length of the string, or -1 if the string is
917 *                 nul-terminated.
918 * @bytes_read:    location to store the number of bytes in the
919 *                 input string that were successfully converted, or %NULL.
920 *                 Even if the conversion was successful, this may be
921 *                 less than @len if there were partial characters
922 *                 at the end of the input. If the error
923 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
924 *                 stored will the byte offset after the last valid
925 *                 input sequence.
926 * @bytes_written: the number of bytes stored in the output buffer (not
927 *                 including the terminating nul).
928 * @error:         location to store the error occuring, or %NULL to ignore
929 *                 errors. Any of the errors in #GConvertError may occur.
930 *
931 * Converts a string which is in the encoding used for strings by
932 * the C runtime (usually the same as that used by the operating
933 * system) in the current locale into a UTF-8 string.
934 *
935 * Return value: The converted string, or %NULL on an error.
936 **/
937gchar *
938g_locale_to_utf8 (const gchar  *opsysstring,
939                  gssize        len,           
940                  gsize        *bytes_read,   
941                  gsize        *bytes_written,
942                  GError      **error)
943{
944  const char *charset;
945
946  if (g_get_charset (&charset))
947    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
948  else
949    return g_convert (opsysstring, len,
950                      "UTF-8", charset, bytes_read, bytes_written, error);
951}
952
953/**
954 * g_locale_from_utf8:
955 * @utf8string:    a UTF-8 encoded string
956 * @len:           the length of the string, or -1 if the string is
957 *                 nul-terminated.
958 * @bytes_read:    location to store the number of bytes in the
959 *                 input string that were successfully converted, or %NULL.
960 *                 Even if the conversion was successful, this may be
961 *                 less than @len if there were partial characters
962 *                 at the end of the input. If the error
963 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
964 *                 stored will the byte offset after the last valid
965 *                 input sequence.
966 * @bytes_written: the number of bytes stored in the output buffer (not
967 *                 including the terminating nul).
968 * @error:         location to store the error occuring, or %NULL to ignore
969 *                 errors. Any of the errors in #GConvertError may occur.
970 *
971 * Converts a string from UTF-8 to the encoding used for strings by
972 * the C runtime (usually the same as that used by the operating
973 * system) in the current locale.
974 *
975 * Return value: The converted string, or %NULL on an error.
976 **/
977gchar *
978g_locale_from_utf8 (const gchar *utf8string,
979                    gssize       len,           
980                    gsize       *bytes_read,   
981                    gsize       *bytes_written,
982                    GError     **error)
983{
984  const gchar *charset;
985
986  if (g_get_charset (&charset))
987    return strdup_len (utf8string, len, bytes_read, bytes_written, error);
988  else
989    return g_convert (utf8string, len,
990                      charset, "UTF-8", bytes_read, bytes_written, error);
991}
992
993#ifndef G_PLATFORM_WIN32
994
995typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
996
997struct _GFilenameCharsetCache {
998  gboolean is_utf8;
999  gchar *charset;
1000  gchar *filename_charset;
1001};
1002
1003static void
1004filename_charset_cache_free (gpointer data)
1005{
1006  GFilenameCharsetCache *cache = data;
1007  g_free (cache->charset);
1008  g_free (cache->filename_charset);
1009  g_free (cache);
1010}
1011
1012/*
1013 * get_filename_charset:
1014 * @charset: return location for the name of the filename encoding
1015 *
1016 * Determines the character set used for filenames by consulting the
1017 * environment variables G_FILENAME_ENCODING and G_BROKEN_FILENAMES.
1018 *
1019 * G_FILENAME_ENCODING may be set to a comma-separated list of character
1020 * set names. The special token "@locale" is taken to mean the character set
1021 * for the current locale. The first character set from the list is taken
1022 * as the filename encoding.
1023 * If G_FILENAME_ENCODING is not set, but G_BROKEN_FILENAMES is, the
1024 * character set of the current locale is taken as the filename encoding.
1025 *
1026 * The returned @charset belongs to GLib and must not be freed.
1027 *
1028 * Return value: %TRUE if the charset used for filename is UTF-8.
1029 */
1030static gboolean
1031get_filename_charset (const gchar **filename_charset)
1032{
1033  static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT;
1034  GFilenameCharsetCache *cache = g_static_private_get (&cache_private);
1035  const gchar *charset;
1036 
1037  if (!cache)
1038    {
1039      cache = g_new0 (GFilenameCharsetCache, 1);
1040      g_static_private_set (&cache_private, cache, filename_charset_cache_free);
1041    }
1042
1043  g_get_charset (&charset);
1044
1045  if (!(cache->charset && strcmp (cache->charset, charset) == 0))
1046    {
1047      const gchar *new_charset;
1048      gchar *p, *q;
1049
1050      g_free (cache->charset);
1051      g_free (cache->filename_charset);
1052      cache->charset = g_strdup (charset);
1053     
1054      p = getenv ("G_FILENAME_ENCODING");
1055      if (p != NULL)
1056        {
1057          q = strchr (p, ',');
1058          if (!q)
1059            q = p + strlen (p);
1060
1061          if (strncmp ("@locale", p, q - p) == 0)
1062            {
1063              cache->is_utf8 = g_get_charset (&new_charset);
1064              cache->filename_charset = g_strdup (new_charset);
1065            }
1066          else
1067            {
1068              cache->filename_charset = g_strndup (p, q - p);
1069              cache->is_utf8 = (strcmp (cache->filename_charset, "UTF-8") == 0);
1070            }
1071        }
1072      else if (getenv ("G_BROKEN_FILENAMES") != NULL)
1073        {
1074          cache->is_utf8 = g_get_charset (&new_charset);
1075          cache->filename_charset = g_strdup (new_charset);
1076        }
1077      else
1078        {
1079          cache->filename_charset = g_strdup ("UTF-8");
1080          cache->is_utf8 = TRUE;
1081        }
1082    }
1083
1084  if (filename_charset)
1085    *filename_charset = cache->filename_charset;
1086
1087  return cache->is_utf8;
1088}
1089
1090#else /* G_PLATFORM_WIN32 */
1091static gboolean
1092get_filename_charset (const gchar **filename_charset)
1093{
1094  g_get_charset (filename_charset);
1095  return FALSE;
1096}
1097#endif /* G_PLATFORM_WIN32 */
1098
1099/* This is called from g_thread_init(). It's used to
1100 * initialize some static data in a threadsafe way.
1101 */
1102void
1103_g_convert_thread_init (void)
1104{
1105  const gchar *dummy;
1106  (void) get_filename_charset (&dummy);
1107}
1108
1109/**
1110 * g_filename_to_utf8:
1111 * @opsysstring:   a string in the encoding for filenames
1112 * @len:           the length of the string, or -1 if the string is
1113 *                 nul-terminated.
1114 * @bytes_read:    location to store the number of bytes in the
1115 *                 input string that were successfully converted, or %NULL.
1116 *                 Even if the conversion was successful, this may be
1117 *                 less than @len if there were partial characters
1118 *                 at the end of the input. If the error
1119 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1120 *                 stored will the byte offset after the last valid
1121 *                 input sequence.
1122 * @bytes_written: the number of bytes stored in the output buffer (not
1123 *                 including the terminating nul).
1124 * @error:         location to store the error occuring, or %NULL to ignore
1125 *                 errors. Any of the errors in #GConvertError may occur.
1126 *
1127 * Converts a string which is in the encoding used for filenames
1128 * into a UTF-8 string.
1129 *
1130 * Return value: The converted string, or %NULL on an error.
1131 **/
1132gchar*
1133g_filename_to_utf8 (const gchar *opsysstring,
1134                    gssize       len,           
1135                    gsize       *bytes_read,   
1136                    gsize       *bytes_written,
1137                    GError     **error)
1138{
1139  const gchar *charset;
1140
1141  if (get_filename_charset (&charset))
1142    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1143  else
1144    return g_convert (opsysstring, len,
1145                      "UTF-8", charset, bytes_read, bytes_written, error);
1146}
1147
1148/**
1149 * g_filename_from_utf8:
1150 * @utf8string:    a UTF-8 encoded string.
1151 * @len:           the length of the string, or -1 if the string is
1152 *                 nul-terminated.
1153 * @bytes_read:    location to store the number of bytes in the
1154 *                 input string that were successfully converted, or %NULL.
1155 *                 Even if the conversion was successful, this may be
1156 *                 less than @len if there were partial characters
1157 *                 at the end of the input. If the error
1158 *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1159 *                 stored will the byte offset after the last valid
1160 *                 input sequence.
1161 * @bytes_written: the number of bytes stored in the output buffer (not
1162 *                 including the terminating nul).
1163 * @error:         location to store the error occuring, or %NULL to ignore
1164 *                 errors. Any of the errors in #GConvertError may occur.
1165 *
1166 * Converts a string from UTF-8 to the encoding used for filenames.
1167 *
1168 * Return value: The converted string, or %NULL on an error.
1169 **/
1170gchar*
1171g_filename_from_utf8 (const gchar *utf8string,
1172                      gssize       len,           
1173                      gsize       *bytes_read,   
1174                      gsize       *bytes_written,
1175                      GError     **error)
1176{
1177  const gchar *charset;
1178
1179  if (get_filename_charset (&charset))
1180    return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1181  else
1182    return g_convert (utf8string, len,
1183                      charset, "UTF-8", bytes_read, bytes_written, error);
1184}
1185
1186/* Test of haystack has the needle prefix, comparing case
1187 * insensitive. haystack may be UTF-8, but needle must
1188 * contain only ascii. */
1189static gboolean
1190has_case_prefix (const gchar *haystack, const gchar *needle)
1191{
1192  const gchar *h, *n;
1193 
1194  /* Eat one character at a time. */
1195  h = haystack;
1196  n = needle;
1197
1198  while (*n && *h &&
1199         g_ascii_tolower (*n) == g_ascii_tolower (*h))
1200    {
1201      n++;
1202      h++;
1203    }
1204 
1205  return *n == '\0';
1206}
1207
1208typedef enum {
1209  UNSAFE_ALL        = 0x1,  /* Escape all unsafe characters   */
1210  UNSAFE_ALLOW_PLUS = 0x2,  /* Allows '+'  */
1211  UNSAFE_PATH       = 0x8,  /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
1212  UNSAFE_HOST       = 0x10, /* Allows '/' and ':' and '@' */
1213  UNSAFE_SLASHES    = 0x20  /* Allows all characters except for '/' and '%' */
1214} UnsafeCharacterSet;
1215
1216static const guchar acceptable[96] = {
1217  /* A table of the ASCII chars from space (32) to DEL (127) */
1218  /*      !    "    #    $    %    &    '    (    )    *    +    ,    -    .    / */
1219  0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
1220  /* 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ? */
1221  0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
1222  /* @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O */
1223  0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1224  /* P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _ */
1225  0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
1226  /* `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o */
1227  0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1228  /* p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~  DEL */
1229  0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
1230};
1231
1232static const gchar hex[16] = "0123456789ABCDEF";
1233
1234/* Note: This escape function works on file: URIs, but if you want to
1235 * escape something else, please read RFC-2396 */
1236static gchar *
1237g_escape_uri_string (const gchar *string,
1238                     UnsafeCharacterSet mask)
1239{
1240#define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
1241
1242  const gchar *p;
1243  gchar *q;
1244  gchar *result;
1245  int c;
1246  gint unacceptable;
1247  UnsafeCharacterSet use_mask;
1248 
1249  g_return_val_if_fail (mask == UNSAFE_ALL
1250                        || mask == UNSAFE_ALLOW_PLUS
1251                        || mask == UNSAFE_PATH
1252                        || mask == UNSAFE_HOST
1253                        || mask == UNSAFE_SLASHES, NULL);
1254 
1255  unacceptable = 0;
1256  use_mask = mask;
1257  for (p = string; *p != '\0'; p++)
1258    {
1259      c = (guchar) *p;
1260      if (!ACCEPTABLE (c))
1261        unacceptable++;
1262    }
1263 
1264  result = g_malloc (p - string + unacceptable * 2 + 1);
1265 
1266  use_mask = mask;
1267  for (q = result, p = string; *p != '\0'; p++)
1268    {
1269      c = (guchar) *p;
1270     
1271      if (!ACCEPTABLE (c))
1272        {
1273          *q++ = '%'; /* means hex coming */
1274          *q++ = hex[c >> 4];
1275          *q++ = hex[c & 15];
1276        }
1277      else
1278        *q++ = *p;
1279    }
1280 
1281  *q = '\0';
1282 
1283  return result;
1284}
1285
1286
1287static gchar *
1288g_escape_file_uri (const gchar *hostname,
1289                   const gchar *pathname)
1290{
1291  char *escaped_hostname = NULL;
1292  char *escaped_path;
1293  char *res;
1294
1295#ifdef G_OS_WIN32
1296  char *p, *backslash;
1297
1298  /* Turn backslashes into forward slashes. That's what Netscape
1299   * does, and they are actually more or less equivalent in Windows.
1300   */
1301 
1302  pathname = g_strdup (pathname);
1303  p = (char *) pathname;
1304 
1305  while ((backslash = strchr (p, '\\')) != NULL)
1306    {
1307      *backslash = '/';
1308      p = backslash + 1;
1309    }
1310#endif
1311
1312  if (hostname && *hostname != '\0')
1313    {
1314      escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
1315    }
1316
1317  escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
1318
1319  res = g_strconcat ("file://",
1320                     (escaped_hostname) ? escaped_hostname : "",
1321                     (*escaped_path != '/') ? "/" : "",
1322                     escaped_path,
1323                     NULL);
1324
1325#ifdef G_OS_WIN32
1326  g_free ((char *) pathname);
1327#endif
1328
1329  g_free (escaped_hostname);
1330  g_free (escaped_path);
1331 
1332  return res;
1333}
1334
1335static int
1336unescape_character (const char *scanner)
1337{
1338  int first_digit;
1339  int second_digit;
1340
1341  first_digit = g_ascii_xdigit_value (scanner[0]);
1342  if (first_digit < 0)
1343    return -1;
1344 
1345  second_digit = g_ascii_xdigit_value (scanner[1]);
1346  if (second_digit < 0)
1347    return -1;
1348 
1349  return (first_digit << 4) | second_digit;
1350}
1351
1352static gchar *
1353g_unescape_uri_string (const char *escaped,
1354                       int         len,
1355                       const char *illegal_escaped_characters,
1356                       gboolean    ascii_must_not_be_escaped)
1357{
1358  const gchar *in, *in_end;
1359  gchar *out, *result;
1360  int c;
1361 
1362  if (escaped == NULL)
1363    return NULL;
1364
1365  if (len < 0)
1366    len = strlen (escaped);
1367
1368  result = g_malloc (len + 1);
1369 
1370  out = result;
1371  for (in = escaped, in_end = escaped + len; in < in_end; in++)
1372    {
1373      c = *in;
1374
1375      if (c == '%')
1376        {
1377          /* catch partial escape sequences past the end of the substring */
1378          if (in + 3 > in_end)
1379            break;
1380
1381          c = unescape_character (in + 1);
1382
1383          /* catch bad escape sequences and NUL characters */
1384          if (c <= 0)
1385            break;
1386
1387          /* catch escaped ASCII */
1388          if (ascii_must_not_be_escaped && c <= 0x7F)
1389            break;
1390
1391          /* catch other illegal escaped characters */
1392          if (strchr (illegal_escaped_characters, c) != NULL)
1393            break;
1394
1395          in += 2;
1396        }
1397
1398      *out++ = c;
1399    }
1400 
1401  g_assert (out - result <= len);
1402  *out = '\0';
1403
1404  if (in != in_end)
1405    {
1406      g_free (result);
1407      return NULL;
1408    }
1409
1410  return result;
1411}
1412
1413static gboolean
1414is_asciialphanum (gunichar c)
1415{
1416  return c <= 0x7F && g_ascii_isalnum (c);
1417}
1418
1419static gboolean
1420is_asciialpha (gunichar c)
1421{
1422  return c <= 0x7F && g_ascii_isalpha (c);
1423}
1424
1425/* allows an empty string */
1426static gboolean
1427hostname_validate (const char *hostname)
1428{
1429  const char *p;
1430  gunichar c, first_char, last_char;
1431
1432  p = hostname;
1433  if (*p == '\0')
1434    return TRUE;
1435  do
1436    {
1437      /* read in a label */
1438      c = g_utf8_get_char (p);
1439      p = g_utf8_next_char (p);
1440      if (!is_asciialphanum (c))
1441        return FALSE;
1442      first_char = c;
1443      do
1444        {
1445          last_char = c;
1446          c = g_utf8_get_char (p);
1447          p = g_utf8_next_char (p);
1448        }
1449      while (is_asciialphanum (c) || c == '-');
1450      if (last_char == '-')
1451        return FALSE;
1452     
1453      /* if that was the last label, check that it was a toplabel */
1454      if (c == '\0' || (c == '.' && *p == '\0'))
1455        return is_asciialpha (first_char);
1456    }
1457  while (c == '.');
1458  return FALSE;
1459}
1460
1461/**
1462 * g_filename_from_uri:
1463 * @uri: a uri describing a filename (escaped, encoded in ASCII).
1464 * @hostname: Location to store hostname for the URI, or %NULL.
1465 *            If there is no hostname in the URI, %NULL will be
1466 *            stored in this location.
1467 * @error: location to store the error occuring, or %NULL to ignore
1468 *         errors. Any of the errors in #GConvertError may occur.
1469 *
1470 * Converts an escaped ASCII-encoded URI to a local filename in the
1471 * encoding used for filenames.
1472 *
1473 * Return value: a newly-allocated string holding the resulting
1474 *               filename, or %NULL on an error.
1475 **/
1476gchar *
1477g_filename_from_uri (const gchar *uri,
1478                     gchar      **hostname,
1479                     GError     **error)
1480{
1481  const char *path_part;
1482  const char *host_part;
1483  char *unescaped_hostname;
1484  char *result;
1485  char *filename;
1486  int offs;
1487#ifdef G_OS_WIN32
1488  char *p, *slash;
1489#endif
1490
1491  if (hostname)
1492    *hostname = NULL;
1493
1494  if (!has_case_prefix (uri, "file:/"))
1495    {
1496      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1497                   _("The URI '%s' is not an absolute URI using the file scheme"),
1498                   uri);
1499      return NULL;
1500    }
1501 
1502  path_part = uri + strlen ("file:");
1503 
1504  if (strchr (path_part, '#') != NULL)
1505    {
1506      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1507                   _("The local file URI '%s' may not include a '#'"),
1508                   uri);
1509      return NULL;
1510    }
1511       
1512  if (has_case_prefix (path_part, "///"))
1513    path_part += 2;
1514  else if (has_case_prefix (path_part, "//"))
1515    {
1516      path_part += 2;
1517      host_part = path_part;
1518
1519      path_part = strchr (path_part, '/');
1520
1521      if (path_part == NULL)
1522        {
1523          g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1524                       _("The URI '%s' is invalid"),
1525                       uri);
1526          return NULL;
1527        }
1528
1529      unescaped_hostname = g_unescape_uri_string (host_part, path_part - host_part, "", TRUE);
1530
1531      if (unescaped_hostname == NULL ||
1532          !hostname_validate (unescaped_hostname))
1533        {
1534          g_free (unescaped_hostname);
1535          g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1536                       _("The hostname of the URI '%s' is invalid"),
1537                       uri);
1538          return NULL;
1539        }
1540     
1541      if (hostname)
1542        *hostname = unescaped_hostname;
1543      else
1544        g_free (unescaped_hostname);
1545    }
1546
1547  filename = g_unescape_uri_string (path_part, -1, "/", FALSE);
1548
1549  if (filename == NULL)
1550    {
1551      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1552                   _("The URI '%s' contains invalidly escaped characters"),
1553                   uri);
1554      return NULL;
1555    }
1556
1557  offs = 0;
1558#ifdef G_OS_WIN32
1559  /* Drop localhost */
1560  if (hostname && *hostname != NULL &&
1561      g_ascii_strcasecmp (*hostname, "localhost") == 0)
1562    {
1563      g_free (*hostname);
1564      *hostname = NULL;
1565    }
1566
1567  /* Turn slashes into backslashes, because that's the canonical spelling */
1568  p = filename;
1569  while ((slash = strchr (p, '/')) != NULL)
1570    {
1571      *slash = '\\';
1572      p = slash + 1;
1573    }
1574
1575  /* Windows URIs with a drive letter can be like "file://host/c:/foo"
1576   * or "file://host/c|/foo" (some Netscape versions). In those cases, start
1577   * the filename from the drive letter.
1578   */
1579  if (g_ascii_isalpha (filename[1]))
1580    {
1581      if (filename[2] == ':')
1582        offs = 1;
1583      else if (filename[2] == '|')
1584        {
1585          filename[2] = ':';
1586          offs = 1;
1587        }
1588    }
1589#endif
1590
1591  result = g_strdup (filename + offs);
1592  g_free (filename);
1593
1594  return result;
1595}
1596
1597/**
1598 * g_filename_to_uri:
1599 * @filename: an absolute filename specified in the encoding
1600 *            used for filenames by the operating system.
1601 * @hostname: A UTF-8 encoded hostname, or %NULL for none.
1602 * @error: location to store the error occuring, or %NULL to ignore
1603 *         errors. Any of the errors in #GConvertError may occur.
1604 *
1605 * Converts an absolute filename to an escaped ASCII-encoded URI.
1606 *
1607 * Return value: a newly-allocated string holding the resulting
1608 *               URI, or %NULL on an error.
1609 **/
1610gchar *
1611g_filename_to_uri   (const gchar *filename,
1612                     const gchar *hostname,
1613                     GError     **error)
1614{
1615  char *escaped_uri;
1616
1617  g_return_val_if_fail (filename != NULL, NULL);
1618
1619  if (!g_path_is_absolute (filename))
1620    {
1621      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
1622                   _("The pathname '%s' is not an absolute path"),
1623                   filename);
1624      return NULL;
1625    }
1626
1627  if (hostname &&
1628      !(g_utf8_validate (hostname, -1, NULL)
1629        && hostname_validate (hostname)))
1630    {
1631      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1632                   _("Invalid hostname"));
1633      return NULL;
1634    }
1635 
1636#ifdef G_OS_WIN32
1637  /* Don't use localhost unnecessarily */
1638  if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
1639    hostname = NULL;
1640#endif
1641
1642  escaped_uri = g_escape_file_uri (hostname, filename);
1643
1644  return escaped_uri;
1645}
Note: See TracBrowser for help on using the repository browser.