source: trunk/third/texinfo/intl/localcharset.c @ 18945

Revision 18945, 9.3 KB checked in by amb, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18944, which included commits to RCS files with non-trunk default branches.
Line 
1/* Determine a canonical name for the current locale's character encoding.
2
3   Copyright (C) 2000-2002 Free Software Foundation, Inc.
4
5   This program is free software; you can redistribute it and/or modify it
6   under the terms of the GNU Library General Public License as published
7   by the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Library General Public License for more details.
14
15   You should have received a copy of the GNU Library General Public
16   License along with this program; if not, write to the Free Software
17   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18   USA.  */
19
20/* Written by Bruno Haible <bruno@clisp.org>.  */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26#if HAVE_STDDEF_H
27# include <stddef.h>
28#endif
29
30#include <stdio.h>
31#if HAVE_STRING_H
32# include <string.h>
33#else
34# include <strings.h>
35#endif
36#if HAVE_STDLIB_H
37# include <stdlib.h>
38#endif
39
40#if defined _WIN32 || defined __WIN32__
41# undef WIN32   /* avoid warning on mingw32 */
42# define WIN32
43#endif
44
45#if defined __EMX__
46/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
47# define OS2
48#endif
49
50#if !defined WIN32
51# if HAVE_LANGINFO_CODESET
52#  include <langinfo.h>
53# else
54#  if HAVE_SETLOCALE
55#   include <locale.h>
56#  endif
57# endif
58#elif defined WIN32
59# define WIN32_LEAN_AND_MEAN
60# include <windows.h>
61#endif
62#if defined OS2
63# define INCL_DOS
64# include <os2.h>
65#endif
66
67#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__
68  /* Win32, OS/2, DOS */
69# define ISSLASH(C) ((C) == '/' || (C) == '\\')
70#endif
71
72#ifndef DIRECTORY_SEPARATOR
73# define DIRECTORY_SEPARATOR '/'
74#endif
75
76#ifndef ISSLASH
77# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
78#endif
79
80#ifdef HAVE_GETC_UNLOCKED
81# undef getc
82# define getc getc_unlocked
83#endif
84
85#ifdef __cplusplus
86/* When compiling with "gcc -x c++", produce a function with C linkage.  */
87extern "C" const char * locale_charset (void);
88#endif
89
90/* The following static variable is declared 'volatile' to avoid a
91   possible multithread problem in the function get_charset_aliases. If we
92   are running in a threaded environment, and if two threads initialize
93   'charset_aliases' simultaneously, both will produce the same value,
94   and everything will be ok if the two assignments to 'charset_aliases'
95   are atomic. But I don't know what will happen if the two assignments mix.  */
96#if __STDC__ != 1
97# define volatile /* empty */
98#endif
99/* Pointer to the contents of the charset.alias file, if it has already been
100   read, else NULL.  Its format is:
101   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
102static const char * volatile charset_aliases;
103
104/* Return a pointer to the contents of the charset.alias file.  */
105static const char *
106get_charset_aliases ()
107{
108  const char *cp;
109
110  cp = charset_aliases;
111  if (cp == NULL)
112    {
113#if !defined WIN32
114      FILE *fp;
115      const char *dir = LIBDIR;
116      const char *base = "charset.alias";
117      char *file_name;
118
119      /* Concatenate dir and base into freshly allocated file_name.  */
120      {
121        size_t dir_len = strlen (dir);
122        size_t base_len = strlen (base);
123        int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
124        file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
125        if (file_name != NULL)
126          {
127            memcpy (file_name, dir, dir_len);
128            if (add_slash)
129              file_name[dir_len] = DIRECTORY_SEPARATOR;
130            memcpy (file_name + dir_len + add_slash, base, base_len + 1);
131          }
132      }
133
134      if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
135        /* Out of memory or file not found, treat it as empty.  */
136        cp = "";
137      else
138        {
139          /* Parse the file's contents.  */
140          int c;
141          char buf1[50+1];
142          char buf2[50+1];
143          char *res_ptr = NULL;
144          size_t res_size = 0;
145          size_t l1, l2;
146
147          for (;;)
148            {
149              c = getc (fp);
150              if (c == EOF)
151                break;
152              if (c == '\n' || c == ' ' || c == '\t')
153                continue;
154              if (c == '#')
155                {
156                  /* Skip comment, to end of line.  */
157                  do
158                    c = getc (fp);
159                  while (!(c == EOF || c == '\n'));
160                  if (c == EOF)
161                    break;
162                  continue;
163                }
164              ungetc (c, fp);
165              if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
166                break;
167              l1 = strlen (buf1);
168              l2 = strlen (buf2);
169              if (res_size == 0)
170                {
171                  res_size = l1 + 1 + l2 + 1;
172                  res_ptr = (char *) malloc (res_size + 1);
173                }
174              else
175                {
176                  res_size += l1 + 1 + l2 + 1;
177                  res_ptr = (char *) realloc (res_ptr, res_size + 1);
178                }
179              if (res_ptr == NULL)
180                {
181                  /* Out of memory. */
182                  res_size = 0;
183                  break;
184                }
185              strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
186              strcpy (res_ptr + res_size - (l2 + 1), buf2);
187            }
188          fclose (fp);
189          if (res_size == 0)
190            cp = "";
191          else
192            {
193              *(res_ptr + res_size) = '\0';
194              cp = res_ptr;
195            }
196        }
197
198      if (file_name != NULL)
199        free (file_name);
200
201#else
202
203      /* To avoid the troubles of installing a separate file in the same
204         directory as the DLL and of retrieving the DLL's directory at
205         runtime, simply inline the aliases here.  */
206
207# if defined WIN32
208      cp = "CP936" "\0" "GBK" "\0"
209           "CP1361" "\0" "JOHAB" "\0"
210           "CP20127" "\0" "ASCII" "\0"
211           "CP20866" "\0" "KOI8-R" "\0"
212           "CP21866" "\0" "KOI8-RU" "\0"
213           "CP28591" "\0" "ISO-8859-1" "\0"
214           "CP28592" "\0" "ISO-8859-2" "\0"
215           "CP28593" "\0" "ISO-8859-3" "\0"
216           "CP28594" "\0" "ISO-8859-4" "\0"
217           "CP28595" "\0" "ISO-8859-5" "\0"
218           "CP28596" "\0" "ISO-8859-6" "\0"
219           "CP28597" "\0" "ISO-8859-7" "\0"
220           "CP28598" "\0" "ISO-8859-8" "\0"
221           "CP28599" "\0" "ISO-8859-9" "\0"
222           "CP28605" "\0" "ISO-8859-15" "\0";
223# endif
224#endif
225
226      charset_aliases = cp;
227    }
228
229  return cp;
230}
231
232/* Determine the current locale's character encoding, and canonicalize it
233   into one of the canonical names listed in config.charset.
234   The result must not be freed; it is statically allocated.
235   If the canonical name cannot be determined, the result is a non-canonical
236   name.  */
237
238#ifdef STATIC
239STATIC
240#endif
241const char *
242locale_charset ()
243{
244  const char *codeset;
245  const char *aliases;
246
247#if !(defined WIN32 || defined OS2)
248
249# if HAVE_LANGINFO_CODESET
250
251  /* Most systems support nl_langinfo (CODESET) nowadays.  */
252  codeset = nl_langinfo (CODESET);
253
254# else
255
256  /* On old systems which lack it, use setlocale or getenv.  */
257  const char *locale = NULL;
258
259  /* But most old systems don't have a complete set of locales.  Some
260     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
261     use setlocale here; it would return "C" when it doesn't support the
262     locale name the user has set.  */
263#  if HAVE_SETLOCALE && 0
264  locale = setlocale (LC_CTYPE, NULL);
265#  endif
266  if (locale == NULL || locale[0] == '\0')
267    {
268      locale = getenv ("LC_ALL");
269      if (locale == NULL || locale[0] == '\0')
270        {
271          locale = getenv ("LC_CTYPE");
272          if (locale == NULL || locale[0] == '\0')
273            locale = getenv ("LANG");
274        }
275    }
276
277  /* On some old systems, one used to set locale = "iso8859_1". On others,
278     you set it to "language_COUNTRY.charset". In any case, we resolve it
279     through the charset.alias file.  */
280  codeset = locale;
281
282# endif
283
284#elif defined WIN32
285
286  static char buf[2 + 10 + 1];
287
288  /* Woe32 has a function returning the locale's codepage as a number.  */
289  sprintf (buf, "CP%u", GetACP ());
290  codeset = buf;
291
292#elif defined OS2
293
294  const char *locale;
295  static char buf[2 + 10 + 1];
296  ULONG cp[3];
297  ULONG cplen;
298
299  /* Allow user to override the codeset, as set in the operating system,
300     with standard language environment variables.  */
301  locale = getenv ("LC_ALL");
302  if (locale == NULL || locale[0] == '\0')
303    {
304      locale = getenv ("LC_CTYPE");
305      if (locale == NULL || locale[0] == '\0')
306        locale = getenv ("LANG");
307    }
308  if (locale != NULL && locale[0] != '\0')
309    {
310      /* If the locale name contains an encoding after the dot, return it.  */
311      const char *dot = strchr (locale, '.');
312
313      if (dot != NULL)
314        {
315          const char *modifier;
316
317          dot++;
318          /* Look for the possible @... trailer and remove it, if any.  */
319          modifier = strchr (dot, '@');
320          if (modifier == NULL)
321            return dot;
322          if (modifier - dot < sizeof (buf))
323            {
324              memcpy (buf, dot, modifier - dot);
325              buf [modifier - dot] = '\0';
326              return buf;
327            }
328        }
329
330      /* Resolve through the charset.alias file.  */
331      codeset = locale;
332    }
333  else
334    {
335      /* OS/2 has a function returning the locale's codepage as a number.  */
336      if (DosQueryCp (sizeof (cp), cp, &cplen))
337        codeset = "";
338      else
339        {
340          sprintf (buf, "CP%u", cp[0]);
341          codeset = buf;
342        }
343    }
344
345#endif
346
347  if (codeset == NULL)
348    /* The canonical name cannot be determined.  */
349    codeset = "";
350
351  /* Resolve alias. */
352  for (aliases = get_charset_aliases ();
353       *aliases != '\0';
354       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
355    if (strcmp (codeset, aliases) == 0
356        || (aliases[0] == '*' && aliases[1] == '\0'))
357      {
358        codeset = aliases + strlen (aliases) + 1;
359        break;
360      }
361
362  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
363     the empty string as denoting "the locale's character encoding",
364     thus GNU libiconv would call this function a second time.  */
365  if (codeset[0] == '\0')
366    codeset = "ASCII";
367
368  return codeset;
369}
Note: See TracBrowser for help on using the repository browser.