source: trunk/third/libxml/xmlIO.c @ 16738

Revision 16738, 14.1 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r16737, which included commits to RCS files with non-trunk default branches.
Line 
1/*
2 * xmlIO.c : implementation of the I/O interfaces used by the parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
10#include "win32config.h"
11#else
12#include "config.h"
13#endif
14
15#include <stdio.h>
16#include <string.h>
17
18#ifdef HAVE_SYS_TYPES_H
19#include <sys/types.h>
20#endif
21#ifdef HAVE_SYS_STAT_H
22#include <sys/stat.h>
23#endif
24#ifdef HAVE_FCNTL_H
25#include <fcntl.h>
26#endif
27#ifdef HAVE_UNISTD_H
28#include <unistd.h>
29#endif
30#ifdef HAVE_STDLIB_H
31#include <stdlib.h>
32#endif
33#ifdef HAVE_ZLIB_H
34#include <zlib.h>
35#endif
36
37#include "xmlmemory.h"
38#include "parser.h"
39#include "parserInternals.h"
40#include "xmlIO.h"
41#include "nanohttp.h"
42#include "nanoftp.h"
43
44/* #define DEBUG_INPUT */
45/* #define VERBOSE_FAILURE */
46/* #define DEBUG_EXTERNAL_ENTITIES */
47
48#ifdef DEBUG_INPUT
49#define MINLEN 40
50#else
51#define MINLEN 4000
52#endif
53
54/**
55 * xmlAllocParserInputBuffer:
56 * @enc:  the charset encoding if known
57 *
58 * Create a buffered parser input for progressive parsing
59 *
60 * Returns the new parser input or NULL
61 */
62xmlParserInputBufferPtr
63xmlAllocParserInputBuffer(xmlCharEncoding enc) {
64    xmlParserInputBufferPtr ret;
65
66    ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
67    if (ret == NULL) {
68        fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
69        return(NULL);
70    }
71    memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
72    ret->buffer = xmlBufferCreate();
73    if (ret->buffer == NULL) {
74        xmlFree(ret);
75        return(NULL);
76    }
77    ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
78    ret->encoder = xmlGetCharEncodingHandler(enc);
79    ret->fd = -1;
80    ret->httpIO = NULL;
81    ret->ftpIO = NULL;
82    /* 2.3.5 */
83    ret->raw = NULL;
84
85    return(ret);
86}
87
88/**
89 * xmlFreeParserInputBuffer:
90 * @in:  a buffered parser input
91 *
92 * Free up the memory used by a buffered parser input
93 */
94void
95xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
96    if (in->buffer != NULL) {
97        xmlBufferFree(in->buffer);
98        in->buffer = NULL;
99    }
100#ifdef HAVE_ZLIB_H
101    if (in->gzfile != NULL)
102        gzclose(in->gzfile);
103#endif
104    if (in->httpIO != NULL)
105        xmlNanoHTTPClose(in->httpIO);
106    if (in->ftpIO != NULL)
107        xmlNanoFTPClose(in->ftpIO);
108    if (in->fd >= 0)
109        close(in->fd);
110    /* 2.3.5 */
111    if (in->raw) {
112        xmlBufferFree(in->raw);
113        in->raw = NULL;
114    }
115    memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
116    xmlFree(in);
117}
118
119/**
120 * xmlParserInputBufferCreateFilename:
121 * @filename:  a C string containing the filename
122 * @enc:  the charset encoding if known
123 *
124 * Create a buffered parser input for the progressive parsing of a file
125 * If filename is "-' then we use stdin as the input.
126 * Automatic support for ZLIB/Compress compressed document is provided
127 * by default if found at compile-time.
128 *
129 * Returns the new parser input or NULL
130 */
131xmlParserInputBufferPtr
132xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
133    xmlParserInputBufferPtr ret;
134#ifdef HAVE_ZLIB_H
135    gzFile input = 0;
136#else
137    int input = -1;
138#endif
139    void *httpIO = NULL;
140    void *ftpIO = NULL;
141
142    if (filename == NULL) return(NULL);
143
144    if (!strncmp(filename, "http://", 7)) {
145        httpIO = xmlNanoHTTPOpen(filename, NULL);
146        if (httpIO == NULL) {
147#ifdef VERBOSE_FAILURE
148            fprintf (stderr, "Cannot read URL %s\n", filename);
149            perror ("xmlNanoHTTPOpen failed");
150#endif
151            return(NULL);
152        }
153    } else if (!strncmp(filename, "ftp://", 6)) {
154        ftpIO = xmlNanoFTPOpen(filename);
155        if (ftpIO == NULL) {
156#ifdef VERBOSE_FAILURE
157            fprintf (stderr, "Cannot read URL %s\n", filename);
158            perror ("xmlNanoFTPOpen failed");
159#endif
160            return(NULL);
161        }
162    } else if (!strcmp(filename, "-")) {
163#ifdef HAVE_ZLIB_H
164        input = gzdopen (fileno(stdin), "r");
165        if (input == NULL) {
166#ifdef VERBOSE_FAILURE
167            fprintf (stderr, "Cannot read from stdin\n");
168            perror ("gzdopen failed");
169#endif
170            return(NULL);
171        }
172#else
173#ifdef WIN32
174        input = -1;
175#else
176        input = fileno(stdin);
177#endif
178        if (input < 0) {
179#ifdef VERBOSE_FAILURE
180            fprintf (stderr, "Cannot read from stdin\n");
181            perror ("open failed");
182#endif
183            return(NULL);
184        }
185#endif
186    } else {
187#ifdef HAVE_ZLIB_H
188        input = gzopen (filename, "r");
189        if (input == NULL) {
190#ifdef VERBOSE_FAILURE
191            fprintf (stderr, "Cannot read file %s :\n", filename);
192            perror ("gzopen failed");
193#endif
194            return(NULL);
195        }
196#else
197#ifdef WIN32
198        input = _open (filename, O_RDONLY | _O_BINARY);
199#else
200        input = open (filename, O_RDONLY);
201#endif
202        if (input < 0) {
203#ifdef VERBOSE_FAILURE
204            fprintf (stderr, "Cannot read file %s :\n", filename);
205            perror ("open failed");
206#endif
207            return(NULL);
208        }
209#endif
210    }
211    /*
212     * TODO : get the 4 first bytes and decode the charset
213     * if enc == XML_CHAR_ENCODING_NONE
214     * plug some encoding conversion routines here. !!!
215     * enc = xmlDetectCharEncoding(buffer);
216     */
217
218    ret = xmlAllocParserInputBuffer(enc);
219    if (ret != NULL) {
220#ifdef HAVE_ZLIB_H
221        ret->gzfile = input;
222#else
223        ret->fd = input;
224#endif
225        ret->httpIO = httpIO;
226        ret->ftpIO = ftpIO;
227    }
228    xmlParserInputBufferRead(ret, 4);
229
230    return(ret);
231}
232
233/**
234 * xmlParserInputBufferCreateFile:
235 * @file:  a FILE*
236 * @enc:  the charset encoding if known
237 *
238 * Create a buffered parser input for the progressive parsing of a FILE *
239 * buffered C I/O
240 *
241 * Returns the new parser input or NULL
242 */
243xmlParserInputBufferPtr
244xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
245    xmlParserInputBufferPtr ret;
246
247    if (file == NULL) return(NULL);
248
249    ret = xmlAllocParserInputBuffer(enc);
250    if (ret != NULL)
251        ret->file = file;
252
253    return(ret);
254}
255
256/**
257 * xmlParserInputBufferCreateFd:
258 * @fd:  a file descriptor number
259 * @enc:  the charset encoding if known
260 *
261 * Create a buffered parser input for the progressive parsing for the input
262 * from a file descriptor
263 *
264 * Returns the new parser input or NULL
265 */
266xmlParserInputBufferPtr
267xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
268    xmlParserInputBufferPtr ret;
269
270    if (fd < 0) return(NULL);
271
272    ret = xmlAllocParserInputBuffer(enc);
273    if (ret != NULL)
274        ret->fd = fd;
275
276    return(ret);
277}
278
279/**
280 * xmlParserInputBufferPush:
281 * @in:  a buffered parser input
282 * @buf:  an char array
283 * @len:  the size in bytes of the array.
284 *
285 * Push the content of the arry in the input buffer
286 * This routine handle the I18N transcoding to internal UTF-8
287 * This is used when operating the parser in progressive (push) mode.
288 *
289 * Returns the number of chars read and stored in the buffer, or -1
290 *         in case of error.
291 */
292int
293xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
294    int nbchars = 0;
295
296    if (len < 0) return(0);
297    if (in->encoder != NULL) {
298        xmlChar *buffer;
299
300        buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
301        if (buffer == NULL) {
302            fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
303            xmlFree(buffer);
304            return(-1);
305        }
306        nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
307                                     (xmlChar *) buf, len);
308        /*
309         * TODO : we really need to have something atomic or the
310         *        encoder must report the number of bytes read
311         */
312        buffer[nbchars] = 0;
313        xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
314        xmlFree(buffer);
315    } else {
316        nbchars = len;
317        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
318    }
319#ifdef DEBUG_INPUT
320    fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n",
321            nbchars, in->buffer->use, in->buffer->size);
322#endif
323    return(nbchars);
324}
325
326/**
327 * xmlParserInputBufferGrow:
328 * @in:  a buffered parser input
329 * @len:  indicative value of the amount of chars to read
330 *
331 * Grow up the content of the input buffer, the old data are preserved
332 * This routine handle the I18N transcoding to internal UTF-8
333 * This routine is used when operating the parser in normal (pull) mode
334 * TODO: one should be able to remove one extra copy
335 *
336 * Returns the number of chars read and stored in the buffer, or -1
337 *         in case of error.
338 */
339int
340xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
341    char *buffer = NULL;
342#ifdef HAVE_ZLIB_H
343    gzFile input = (gzFile) in->gzfile;
344#endif
345    int res = 0;
346    int nbchars = 0;
347    int buffree;
348
349    if ((len <= MINLEN) && (len != 4))
350        len = MINLEN;
351    buffree = in->buffer->size - in->buffer->use;
352    if (buffree <= 0) {
353        fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
354        return(0);
355    }
356    if (len > buffree)
357        len = buffree;
358
359    buffer = xmlMalloc((len + 1) * sizeof(char));
360    if (buffer == NULL) {
361        fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
362        return(-1);
363    }
364    if (in->httpIO != NULL) {
365        res = xmlNanoHTTPRead(in->httpIO, &buffer[0], len);
366    } else if (in->ftpIO != NULL) {
367        res = xmlNanoFTPRead(in->ftpIO, &buffer[0], len);
368    } else if (in->file != NULL) {
369        res = fread(&buffer[0], 1, len, in->file);
370#ifdef HAVE_ZLIB_H
371    } else if (in->gzfile != NULL) {
372        res = gzread(input, &buffer[0], len);
373#endif
374    } else if (in->fd >= 0) {
375        res = read(in->fd, &buffer[0], len);
376    } else {
377        fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
378        xmlFree(buffer);
379        return(-1);
380    }
381    if (res == 0) {
382        xmlFree(buffer);
383        return(0);
384    }
385    if (res < 0) {
386        perror ("read error");
387        xmlFree(buffer);
388        return(-1);
389    }
390    if (in->encoder != NULL) {
391        xmlChar *buf;
392
393        buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
394        if (buf == NULL) {
395            fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
396            xmlFree(buffer);
397            return(-1);
398        }
399        nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
400                                     BAD_CAST buffer, res);
401        buf[nbchars] = 0;
402        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
403        xmlFree(buf);
404    } else {
405        nbchars = res;
406        buffer[nbchars] = 0;
407        xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
408    }
409#ifdef DEBUG_INPUT
410    fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
411            nbchars, in->buffer->use, in->buffer->size);
412#endif
413    xmlFree(buffer);
414    return(nbchars);
415}
416
417/**
418 * xmlParserInputBufferRead:
419 * @in:  a buffered parser input
420 * @len:  indicative value of the amount of chars to read
421 *
422 * Refresh the content of the input buffer, the old data are considered
423 * consumed
424 * This routine handle the I18N transcoding to internal UTF-8
425 *
426 * Returns the number of chars read and stored in the buffer, or -1
427 *         in case of error.
428 */
429int
430xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
431    /* xmlBufferEmpty(in->buffer); */
432    if ((in->httpIO != NULL) || (in->ftpIO != NULL) || (in->file != NULL) ||
433#ifdef HAVE_ZLIB_H
434        (in->gzfile != NULL) ||
435#endif
436        (in->fd >= 0))
437        return(xmlParserInputBufferGrow(in, len));
438    else
439        return(0);
440}
441
442/*
443 * xmlParserGetDirectory:
444 * @filename:  the path to a file
445 *
446 * lookup the directory for that file
447 *
448 * Returns a new allocated string containing the directory, or NULL.
449 */
450char *
451xmlParserGetDirectory(const char *filename) {
452    char *ret = NULL;
453    char dir[1024];
454    char *cur;
455    char sep = '/';
456
457    if (filename == NULL) return(NULL);
458#ifdef WIN32
459    sep = '\\';
460#endif
461
462    strncpy(dir, filename, 1023);
463    dir[1023] = 0;
464    cur = &dir[strlen(dir)];
465    while (cur > dir) {
466         if (*cur == sep) break;
467         cur --;
468    }
469    if (*cur == sep) {
470        if (cur == dir) dir[1] = 0;
471        else *cur = 0;
472        ret = xmlMemStrdup(dir);
473    } else {
474        if (getcwd(dir, 1024) != NULL) {
475            dir[1023] = 0;
476            ret = xmlMemStrdup(dir);
477        }
478    }
479    return(ret);
480}
481
482/****************************************************************
483 *                                                              *
484 *              External entities loading                       *
485 *                                                              *
486 ****************************************************************/
487
488/*
489 * xmlDefaultExternalEntityLoader:
490 * @URL:  the URL for the entity to load
491 * @ID:  the System ID for the entity to load
492 * @ctxt:  the context in which the entity is called or NULL
493 *
494 * By default we don't load external entitites, yet.
495 *
496 * Returns a new allocated xmlParserInputPtr, or NULL.
497 */
498static
499xmlParserInputPtr
500xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
501                               xmlParserCtxtPtr ctxt) {
502    xmlParserInputPtr ret = NULL;
503#ifdef DEBUG_EXTERNAL_ENTITIES
504    fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
505#endif
506    if (URL == NULL) {
507        if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
508            ctxt->sax->warning(ctxt, "failed to load external entity \"%s\"\n",
509                               ID);
510        return(NULL);
511    }
512    ret = xmlNewInputFromFile(ctxt, URL);
513    if (ret == NULL) {
514        if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
515            ctxt->sax->warning(ctxt, "failed to load external entity \"%s\"\n",
516                               URL);
517    }
518    return(ret);
519}
520
521static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
522       xmlDefaultExternalEntityLoader;
523
524/*
525 * xmlSetExternalEntityLoader:
526 * @f:  the new entity resolver function
527 *
528 * Changes the defaultexternal entity resolver function for the application
529 */
530void
531xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
532    xmlCurrentExternalEntityLoader = f;
533}
534
535/*
536 * xmlGetExternalEntityLoader:
537 *
538 * Get the default external entity resolver function for the application
539 *
540 * Returns the xmlExternalEntityLoader function pointer
541 */
542xmlExternalEntityLoader
543xmlGetExternalEntityLoader(void) {
544    return(xmlCurrentExternalEntityLoader);
545}
546
547/*
548 * xmlLoadExternalEntity:
549 * @URL:  the URL for the entity to load
550 * @ID:  the System ID for the entity to load
551 * @ctxt:  the context in which the entity is called or NULL
552 *
553 * Load an external entity, note that the use of this function for
554 * unparsed entities may generate problems
555 * TODO: a more generic External entitiy API must be designed
556 *
557 * Returns the xmlParserInputPtr or NULL
558 */
559xmlParserInputPtr
560xmlLoadExternalEntity(const char *URL, const char *ID,
561                      xmlParserCtxtPtr ctxt) {
562    return(xmlCurrentExternalEntityLoader(URL, ID, ctxt));
563}
564
Note: See TracBrowser for help on using the repository browser.