1 | /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ |
---|
2 | /* This file is part of the GtkHTML library. |
---|
3 | |
---|
4 | Copyright (C) 1997 Martin Jones (mjones@kde.org) |
---|
5 | Copyright (C) 1997 Torben Weis (weis@kde.org) |
---|
6 | Copyright (C) 2000 Helix Code, Inc. |
---|
7 | |
---|
8 | This library is free software; you can redistribute it and/or |
---|
9 | modify it under the terms of the GNU Library General Public |
---|
10 | License as published by the Free Software Foundation; either |
---|
11 | version 2 of the License, or (at your option) any later version. |
---|
12 | |
---|
13 | This library is distributed in the hope that it will be useful, |
---|
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
16 | Library General Public License for more details. |
---|
17 | |
---|
18 | You should have received a copy of the GNU Library General Public License |
---|
19 | along with this library; see the file COPYING.LIB. If not, write to |
---|
20 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
---|
21 | Boston, MA 02111-1307, USA. |
---|
22 | */ |
---|
23 | |
---|
24 | |
---|
25 | #include <config.h> |
---|
26 | #include <string.h> |
---|
27 | #include <glib.h> |
---|
28 | |
---|
29 | #include "htmltokenizer.h" |
---|
30 | #include "htmlstringtokenizer.h" |
---|
31 | |
---|
32 | |
---|
33 | HTMLStringTokenizer * |
---|
34 | html_string_tokenizer_new (void) |
---|
35 | { |
---|
36 | HTMLStringTokenizer *s; |
---|
37 | |
---|
38 | s = g_new (HTMLStringTokenizer, 1); |
---|
39 | |
---|
40 | s->pos = NULL; |
---|
41 | s->end = NULL; |
---|
42 | s->buffer = NULL; |
---|
43 | |
---|
44 | s->buffer_length = 0; |
---|
45 | |
---|
46 | return s; |
---|
47 | } |
---|
48 | |
---|
49 | void |
---|
50 | html_string_tokenizer_destroy (HTMLStringTokenizer *st) |
---|
51 | { |
---|
52 | g_return_if_fail (st != NULL); |
---|
53 | |
---|
54 | if (st->buffer) |
---|
55 | g_free (st->buffer); |
---|
56 | g_free (st); |
---|
57 | } |
---|
58 | |
---|
59 | |
---|
60 | enum _QuoteType { |
---|
61 | QUOTE_TYPE_NONE, |
---|
62 | QUOTE_TYPE_SINGLE, |
---|
63 | QUOTE_TYPE_DOUBLE |
---|
64 | }; |
---|
65 | typedef enum _QuoteType QuoteType; |
---|
66 | |
---|
67 | void |
---|
68 | html_string_tokenizer_tokenize (HTMLStringTokenizer *t, |
---|
69 | const gchar *str, |
---|
70 | gchar *separators) |
---|
71 | { |
---|
72 | const gchar *src, *x; |
---|
73 | QuoteType quoted; |
---|
74 | gint str_length; |
---|
75 | |
---|
76 | if (*str == '\0') { |
---|
77 | t->pos = 0; |
---|
78 | return; |
---|
79 | } |
---|
80 | |
---|
81 | str_length = strlen (str) + 1; |
---|
82 | |
---|
83 | if (t->buffer_length < str_length) { |
---|
84 | g_free (t->buffer); |
---|
85 | t->buffer = g_malloc (str_length); |
---|
86 | t->buffer_length = str_length; |
---|
87 | } |
---|
88 | |
---|
89 | src = str; |
---|
90 | t->end = t->buffer; |
---|
91 | |
---|
92 | quoted = QUOTE_TYPE_NONE; |
---|
93 | |
---|
94 | for (; *src != '\0'; src++) { |
---|
95 | x = strchr (separators, *src); |
---|
96 | if (*src == '\"' && !quoted) |
---|
97 | quoted = QUOTE_TYPE_DOUBLE; |
---|
98 | else if (*src == '\'' && !quoted) |
---|
99 | quoted = QUOTE_TYPE_SINGLE; |
---|
100 | else if ((*src == '\"' && quoted == QUOTE_TYPE_DOUBLE) |
---|
101 | || (*src == '\'' && quoted == QUOTE_TYPE_SINGLE)) |
---|
102 | quoted = QUOTE_TYPE_NONE; |
---|
103 | else if (x && !quoted) |
---|
104 | *(t->end)++ = 0; |
---|
105 | else |
---|
106 | *(t->end)++ = *src; |
---|
107 | } |
---|
108 | |
---|
109 | *(t->end) = 0; |
---|
110 | |
---|
111 | if (t->end - t->buffer <= 1) |
---|
112 | t->pos = NULL; /* No tokens */ |
---|
113 | else |
---|
114 | t->pos = t->buffer; |
---|
115 | } |
---|
116 | |
---|
117 | gboolean |
---|
118 | html_string_tokenizer_has_more_tokens (HTMLStringTokenizer *t) |
---|
119 | { |
---|
120 | return (t->pos != NULL); |
---|
121 | } |
---|
122 | |
---|
123 | gchar * |
---|
124 | html_string_tokenizer_next_token (HTMLStringTokenizer *t) |
---|
125 | { |
---|
126 | gchar *ret; |
---|
127 | |
---|
128 | if (t->pos == NULL) |
---|
129 | return NULL; |
---|
130 | |
---|
131 | ret = t->pos; |
---|
132 | t->pos += strlen (ret) + 1; |
---|
133 | if (t->pos >= t->end) |
---|
134 | t->pos = NULL; |
---|
135 | |
---|
136 | return ret; |
---|
137 | } |
---|