1 | /* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 8; tab-width: 8 -*- */ |
---|
2 | |
---|
3 | /* |
---|
4 | * gnome-vfs-mime-magic.c |
---|
5 | * |
---|
6 | * Written by: |
---|
7 | * James Youngman (jay@gnu.org) |
---|
8 | * |
---|
9 | * Adatped to the GNOME needs by: |
---|
10 | * Elliot Lee (sopwith@cuc.edu) |
---|
11 | * |
---|
12 | * Rewritten by: |
---|
13 | * Pavel Cisler <pavel@eazel.com> |
---|
14 | */ |
---|
15 | |
---|
16 | #include "gnome-vfs-mime-magic.h" |
---|
17 | |
---|
18 | /* needed for S_ISSOCK with 'gcc -ansi -pedantic' on GNU/Linux */ |
---|
19 | #ifndef _BSD_SOURCE |
---|
20 | # define _BSD_SOURCE 1 |
---|
21 | #endif |
---|
22 | #include <sys/types.h> |
---|
23 | |
---|
24 | #include "gnome-vfs-mime-sniff-buffer-private.h" |
---|
25 | #include "gnome-vfs-mime.h" |
---|
26 | #include "gnome-vfs-private-utils.h" |
---|
27 | |
---|
28 | #include <ctype.h> |
---|
29 | #include <unistd.h> |
---|
30 | #include <fcntl.h> |
---|
31 | #include <stdio.h> |
---|
32 | #include <string.h> |
---|
33 | #include <sys/types.h> |
---|
34 | #include <sys/stat.h> |
---|
35 | |
---|
36 | |
---|
37 | static gboolean |
---|
38 | is_octal_digit (char ch) |
---|
39 | { |
---|
40 | return ch >= '0' && ch <= '7'; |
---|
41 | } |
---|
42 | |
---|
43 | static gboolean |
---|
44 | is_hex_digit (char ch) |
---|
45 | { |
---|
46 | if (ch >= '0' && ch <= '9') { |
---|
47 | return TRUE; |
---|
48 | } |
---|
49 | if (ch >= 'a' && ch <= 'f') { |
---|
50 | return TRUE; |
---|
51 | } |
---|
52 | |
---|
53 | return (ch >= 'A' && ch <= 'F'); |
---|
54 | } |
---|
55 | |
---|
56 | /* FIXME bugzilla.eazel.com 2760: |
---|
57 | * should return error here |
---|
58 | */ |
---|
59 | static guchar |
---|
60 | read_octal_byte (const char **pos) |
---|
61 | { |
---|
62 | guchar retval = 0; |
---|
63 | int count; |
---|
64 | |
---|
65 | for (count = 0; count < 3; count++) { |
---|
66 | if (!is_octal_digit (**pos)) { |
---|
67 | g_error ("bad octal digit %c", **pos); |
---|
68 | return retval; |
---|
69 | } |
---|
70 | |
---|
71 | retval *= 8; |
---|
72 | retval += **pos - '0'; |
---|
73 | (*pos)++; |
---|
74 | } |
---|
75 | |
---|
76 | return retval; |
---|
77 | } |
---|
78 | |
---|
79 | /* FIXME bugzilla.eazel.com 2760: |
---|
80 | * should return error here |
---|
81 | */ |
---|
82 | static guchar |
---|
83 | read_hex_byte (const char **pos) |
---|
84 | { |
---|
85 | guchar retval = 0; |
---|
86 | int count; |
---|
87 | |
---|
88 | for (count = 0; ; count++) { |
---|
89 | if (!is_hex_digit (**pos)) { |
---|
90 | g_error ("bad hex digit %c", **pos); |
---|
91 | return retval; |
---|
92 | } |
---|
93 | if (**pos >= '0' && **pos <= '9') { |
---|
94 | retval += **pos - '0'; |
---|
95 | } else { |
---|
96 | retval += tolower (**pos) - 'a' + 10; |
---|
97 | } |
---|
98 | |
---|
99 | (*pos)++; |
---|
100 | if (count >= 1) { |
---|
101 | break; |
---|
102 | } |
---|
103 | retval *= 16; |
---|
104 | } |
---|
105 | |
---|
106 | return retval; |
---|
107 | } |
---|
108 | |
---|
109 | /* FIXME bugzilla.eazel.com 2760: |
---|
110 | * should return error here |
---|
111 | */ |
---|
112 | static const char * |
---|
113 | read_string_val (const char *scanner, char *intobuf, int max_len, guint16 *into_len) |
---|
114 | { |
---|
115 | char *intobufend; |
---|
116 | char ch; |
---|
117 | |
---|
118 | intobufend = intobuf + max_len - 1; |
---|
119 | *into_len = 0; |
---|
120 | |
---|
121 | while (*scanner && !isspace ((unsigned char)*scanner) && *scanner != '#') { |
---|
122 | ch = *scanner++; |
---|
123 | |
---|
124 | switch (ch) { |
---|
125 | case '\\': |
---|
126 | switch (*scanner) { |
---|
127 | case 'x': |
---|
128 | /* read hex value */ |
---|
129 | scanner++; |
---|
130 | ch = read_hex_byte (&scanner); |
---|
131 | break; |
---|
132 | case '0': |
---|
133 | case '1': |
---|
134 | case '2': |
---|
135 | case '3': |
---|
136 | /* read octal value */ |
---|
137 | ch = read_octal_byte (&scanner); |
---|
138 | break; |
---|
139 | case 'n': |
---|
140 | ch = '\n'; |
---|
141 | scanner++; |
---|
142 | break; |
---|
143 | default: |
---|
144 | /* everything else is a literal */ |
---|
145 | ch = *scanner; |
---|
146 | scanner++; |
---|
147 | break; |
---|
148 | } |
---|
149 | break; |
---|
150 | default: |
---|
151 | break; |
---|
152 | /* already setup c/moved scanner */ |
---|
153 | } |
---|
154 | if (intobuf < intobufend) { |
---|
155 | *intobuf++=ch; |
---|
156 | (*into_len)++; |
---|
157 | } |
---|
158 | } |
---|
159 | |
---|
160 | *intobuf = '\0'; |
---|
161 | return scanner; |
---|
162 | } |
---|
163 | |
---|
164 | static const char * |
---|
165 | read_hex_pattern (const char *scanner, char *result, int length) |
---|
166 | { |
---|
167 | if (*scanner == '0') { |
---|
168 | scanner++; |
---|
169 | } |
---|
170 | if (*scanner++ != 'x') { |
---|
171 | return NULL; |
---|
172 | } |
---|
173 | for (;length > 0; length--) { |
---|
174 | if (!is_hex_digit (scanner[0]) || !is_hex_digit (scanner[1])) { |
---|
175 | return NULL; |
---|
176 | } |
---|
177 | *result++ = read_hex_byte (&scanner); |
---|
178 | } |
---|
179 | |
---|
180 | return scanner; |
---|
181 | } |
---|
182 | |
---|
183 | static gboolean |
---|
184 | read_num_val(const char **offset, int bsize, int *result) |
---|
185 | { |
---|
186 | char fmttype, fmtstr[4]; |
---|
187 | const char *scanner = *offset; |
---|
188 | |
---|
189 | if (*scanner == '0') { |
---|
190 | if (tolower (scanner[1]) == 'x') { |
---|
191 | fmttype = 'x'; |
---|
192 | } else { |
---|
193 | fmttype = 'o'; |
---|
194 | } |
---|
195 | } else { |
---|
196 | fmttype = 'u'; |
---|
197 | } |
---|
198 | |
---|
199 | switch (bsize) { |
---|
200 | case 1: |
---|
201 | fmtstr[0] = '%'; |
---|
202 | fmtstr[1] = fmttype; |
---|
203 | fmtstr[2] = '\0'; |
---|
204 | if (sscanf (scanner, fmtstr, result) < 1) { |
---|
205 | return FALSE; |
---|
206 | } |
---|
207 | break; |
---|
208 | case 2: |
---|
209 | fmtstr[0] = '%'; |
---|
210 | fmtstr[1] = 'h'; |
---|
211 | fmtstr[2] = fmttype; |
---|
212 | fmtstr[3] = '\0'; |
---|
213 | if (sscanf (scanner, fmtstr, result) < 1) { |
---|
214 | return FALSE; |
---|
215 | } |
---|
216 | break; |
---|
217 | case 4: |
---|
218 | fmtstr[0] = '%'; |
---|
219 | fmtstr[1] = fmttype; |
---|
220 | fmtstr[2] = '\0'; |
---|
221 | if (sscanf (scanner, fmtstr, result) < 1) { |
---|
222 | return FALSE; |
---|
223 | } |
---|
224 | break; |
---|
225 | } |
---|
226 | |
---|
227 | while (**offset && !isspace ((guchar)**offset)) { |
---|
228 | (*offset)++; |
---|
229 | } |
---|
230 | |
---|
231 | return TRUE; |
---|
232 | } |
---|
233 | |
---|
234 | static const char * |
---|
235 | eat_white_space (const char *scanner) |
---|
236 | { |
---|
237 | while (*scanner && isspace ((guchar)*scanner)) { |
---|
238 | scanner++; |
---|
239 | } |
---|
240 | return scanner; |
---|
241 | } |
---|
242 | |
---|
243 | static gboolean |
---|
244 | match_pattern (const char *scanner, const char **resulting_scanner, const char *pattern) |
---|
245 | { |
---|
246 | if (strncmp(scanner, pattern, strlen (pattern)) == 0) { |
---|
247 | *resulting_scanner = scanner + strlen (pattern); |
---|
248 | return TRUE; |
---|
249 | } |
---|
250 | *resulting_scanner = scanner; |
---|
251 | return FALSE; |
---|
252 | } |
---|
253 | |
---|
254 | GnomeMagicEntry * |
---|
255 | gnome_vfs_mime_magic_parse (const gchar *filename, gint *nents) |
---|
256 | { |
---|
257 | GArray *array; |
---|
258 | GnomeMagicEntry newent, *retval; |
---|
259 | FILE *infile; |
---|
260 | const char *infile_name; |
---|
261 | int bsize = 0; |
---|
262 | char parsed_line [256]; |
---|
263 | const char *scanner; |
---|
264 | int index; |
---|
265 | |
---|
266 | infile_name = filename; |
---|
267 | |
---|
268 | if (!infile_name) { |
---|
269 | return NULL; |
---|
270 | } |
---|
271 | |
---|
272 | infile = fopen (infile_name, "r"); |
---|
273 | if (!infile) { |
---|
274 | return NULL; |
---|
275 | } |
---|
276 | |
---|
277 | array = g_array_new (FALSE, FALSE, sizeof (GnomeMagicEntry)); |
---|
278 | |
---|
279 | while (fgets (parsed_line, sizeof (parsed_line), infile)) { |
---|
280 | scanner = parsed_line; |
---|
281 | |
---|
282 | /* eat the head */ |
---|
283 | scanner = eat_white_space (scanner); |
---|
284 | |
---|
285 | if (!*scanner || *scanner == '#') { |
---|
286 | continue; |
---|
287 | } |
---|
288 | |
---|
289 | if (!isdigit ((guchar)*scanner)) { |
---|
290 | continue; |
---|
291 | } |
---|
292 | |
---|
293 | if (sscanf (scanner, "%hu", &newent.range_start) < 1) { |
---|
294 | continue; |
---|
295 | } |
---|
296 | newent.range_end = newent.range_start; |
---|
297 | |
---|
298 | while (*scanner && isdigit ((guchar)*scanner)) { |
---|
299 | scanner++; /* eat the offset */ |
---|
300 | } |
---|
301 | |
---|
302 | if (*scanner == ':') { |
---|
303 | /* handle an offset range */ |
---|
304 | scanner++; |
---|
305 | if (sscanf (scanner, "%hu", &newent.range_end) < 1) { |
---|
306 | continue; |
---|
307 | } |
---|
308 | } |
---|
309 | |
---|
310 | while (*scanner && !isspace ((guchar)*scanner)) { |
---|
311 | scanner++; /* eat the offset */ |
---|
312 | } |
---|
313 | |
---|
314 | scanner = eat_white_space (scanner); |
---|
315 | |
---|
316 | if (!*scanner || *scanner == '#') { |
---|
317 | continue; |
---|
318 | } |
---|
319 | |
---|
320 | if (match_pattern (scanner, &scanner, "byte")) { |
---|
321 | newent.type = T_BYTE; |
---|
322 | } else if (match_pattern (scanner, &scanner, "short")) { |
---|
323 | newent.type = T_SHORT; |
---|
324 | } else if (match_pattern (scanner, &scanner, "long")) { |
---|
325 | newent.type = T_LONG; |
---|
326 | } else if (match_pattern (scanner, &scanner, "string")) { |
---|
327 | newent.type = T_STR; |
---|
328 | } else if (match_pattern (scanner, &scanner, "date")) { |
---|
329 | newent.type = T_DATE; |
---|
330 | } else if (match_pattern (scanner, &scanner, "beshort")) { |
---|
331 | newent.type = T_BESHORT; |
---|
332 | } else if (match_pattern (scanner, &scanner, "belong")) { |
---|
333 | newent.type = T_BELONG; |
---|
334 | } else if (match_pattern (scanner, &scanner, "bedate")) { |
---|
335 | newent.type = T_BEDATE; |
---|
336 | } else if (match_pattern (scanner, &scanner, "leshort")) { |
---|
337 | newent.type = T_LESHORT; |
---|
338 | } else if (match_pattern (scanner, &scanner, "lelong")) { |
---|
339 | newent.type = T_LELONG; |
---|
340 | } else if (match_pattern (scanner, &scanner, "ledate")) { |
---|
341 | newent.type = T_LEDATE; |
---|
342 | } else |
---|
343 | continue; /* weird type */ |
---|
344 | |
---|
345 | scanner = eat_white_space (scanner); |
---|
346 | if (!*scanner || *scanner == '#') { |
---|
347 | continue; |
---|
348 | } |
---|
349 | |
---|
350 | switch (newent.type) { |
---|
351 | case T_BYTE: |
---|
352 | bsize = 1; |
---|
353 | break; |
---|
354 | |
---|
355 | case T_SHORT: |
---|
356 | case T_BESHORT: |
---|
357 | case T_LESHORT: |
---|
358 | bsize = 2; |
---|
359 | break; |
---|
360 | |
---|
361 | case T_LONG: |
---|
362 | case T_BELONG: |
---|
363 | case T_LELONG: |
---|
364 | bsize = 4; |
---|
365 | break; |
---|
366 | |
---|
367 | case T_DATE: |
---|
368 | case T_BEDATE: |
---|
369 | case T_LEDATE: |
---|
370 | bsize = 4; |
---|
371 | break; |
---|
372 | |
---|
373 | default: |
---|
374 | /* do nothing */ |
---|
375 | break; |
---|
376 | } |
---|
377 | |
---|
378 | if (newent.type == T_STR) { |
---|
379 | scanner = read_string_val (scanner, newent.pattern, |
---|
380 | sizeof (newent.pattern), &newent.pattern_length); |
---|
381 | } else { |
---|
382 | newent.pattern_length = bsize; |
---|
383 | if (!read_num_val (&scanner, bsize, (int *)&newent.pattern)) { |
---|
384 | continue; |
---|
385 | } |
---|
386 | } |
---|
387 | |
---|
388 | scanner = eat_white_space (scanner); |
---|
389 | if (!*scanner || *scanner == '#') { |
---|
390 | continue; |
---|
391 | } |
---|
392 | |
---|
393 | if (*scanner == '&') { |
---|
394 | scanner++; |
---|
395 | scanner = read_hex_pattern (scanner, &newent.mask [0], newent.pattern_length); |
---|
396 | if (!scanner) { |
---|
397 | g_error ("bad mask"); |
---|
398 | continue; |
---|
399 | } |
---|
400 | newent.use_mask = TRUE; |
---|
401 | |
---|
402 | for (index = 0; index < newent.pattern_length; index++) { |
---|
403 | /* Apply the mask to the pattern itself so we don't have to |
---|
404 | * do it each time we compare it with the tested bytes. |
---|
405 | */ |
---|
406 | newent.pattern[index] &= newent.mask[index]; |
---|
407 | } |
---|
408 | } else { |
---|
409 | newent.use_mask = FALSE; |
---|
410 | } |
---|
411 | |
---|
412 | scanner = eat_white_space (scanner); |
---|
413 | if (!*scanner || *scanner == '#') { |
---|
414 | continue; |
---|
415 | } |
---|
416 | |
---|
417 | g_snprintf (newent.mimetype, sizeof (newent.mimetype), "%s", scanner); |
---|
418 | bsize = strlen (newent.mimetype) - 1; |
---|
419 | while (newent.mimetype [bsize] && isspace ((guchar)(newent.mimetype [bsize]))) { |
---|
420 | newent.mimetype [bsize--] = '\0'; |
---|
421 | } |
---|
422 | |
---|
423 | g_array_append_val (array, newent); |
---|
424 | } |
---|
425 | fclose(infile); |
---|
426 | |
---|
427 | newent.type = T_END; |
---|
428 | g_array_append_val (array, newent); |
---|
429 | |
---|
430 | retval = (GnomeMagicEntry *)array->data; |
---|
431 | if (nents) { |
---|
432 | *nents = array->len; |
---|
433 | } |
---|
434 | |
---|
435 | g_array_free (array, FALSE); |
---|
436 | |
---|
437 | return retval; |
---|
438 | } |
---|
439 | |
---|
440 | static void |
---|
441 | endian_swap (guchar *result, const guchar *data, gsize length) |
---|
442 | { |
---|
443 | const guchar *source_ptr = data; |
---|
444 | guchar *dest_ptr = result + length - 1; |
---|
445 | while (dest_ptr >= result) { |
---|
446 | *dest_ptr-- = *source_ptr++; |
---|
447 | } |
---|
448 | } |
---|
449 | |
---|
450 | #if G_BYTE_ORDER == G_LITTLE_ENDIAN |
---|
451 | #define FIRST_ENDIAN_DEPENDENT_TYPE T_BESHORT |
---|
452 | #define LAST_ENDIAN_DEPENDENT_TYPE T_BEDATE |
---|
453 | #else |
---|
454 | #define FIRST_ENDIAN_DEPENDENT_TYPE T_LESHORT |
---|
455 | #define LAST_ENDIAN_DEPENDENT_TYPE T_LEDATE |
---|
456 | #endif |
---|
457 | |
---|
458 | static gboolean |
---|
459 | try_one_pattern_on_buffer (const char *sniffed_stream, GnomeMagicEntry *magic_entry) |
---|
460 | { |
---|
461 | gboolean using_cloned_pattern; |
---|
462 | char pattern_clone [48]; |
---|
463 | int index, count; |
---|
464 | const char *pattern; |
---|
465 | |
---|
466 | using_cloned_pattern = FALSE; |
---|
467 | if (magic_entry->type >= FIRST_ENDIAN_DEPENDENT_TYPE && magic_entry->type <= LAST_ENDIAN_DEPENDENT_TYPE) { |
---|
468 | /* Endian-convert the data we are trying to recognize to |
---|
469 | * our host endianness. |
---|
470 | */ |
---|
471 | char swap_buffer [sizeof(magic_entry->pattern)]; |
---|
472 | |
---|
473 | g_assert(magic_entry->pattern_length <= 4); |
---|
474 | |
---|
475 | memcpy (swap_buffer, sniffed_stream, magic_entry->pattern_length); |
---|
476 | |
---|
477 | endian_swap (pattern_clone, swap_buffer, magic_entry->pattern_length); |
---|
478 | sniffed_stream = &pattern_clone[0]; |
---|
479 | using_cloned_pattern = TRUE; |
---|
480 | } |
---|
481 | |
---|
482 | if (magic_entry->use_mask) { |
---|
483 | /* Apply mask to the examined data. At this point the data in |
---|
484 | * sniffed_stream is in the same endianness as the mask. |
---|
485 | */ |
---|
486 | |
---|
487 | if (!using_cloned_pattern) { |
---|
488 | memcpy (pattern_clone, sniffed_stream, magic_entry->pattern_length); |
---|
489 | using_cloned_pattern = TRUE; |
---|
490 | sniffed_stream = &pattern_clone[0]; |
---|
491 | } |
---|
492 | |
---|
493 | for (index = 0; index < magic_entry->pattern_length; index++) { |
---|
494 | pattern_clone[index] &= magic_entry->mask[index]; |
---|
495 | } |
---|
496 | } |
---|
497 | |
---|
498 | if (*magic_entry->pattern != *sniffed_stream) { |
---|
499 | return FALSE; |
---|
500 | } |
---|
501 | |
---|
502 | for (count = magic_entry->pattern_length, pattern = magic_entry->pattern; |
---|
503 | count > 0; count--) { |
---|
504 | if (*pattern++ != *sniffed_stream++) { |
---|
505 | return FALSE; |
---|
506 | } |
---|
507 | } |
---|
508 | return TRUE; |
---|
509 | } |
---|
510 | |
---|
511 | enum { |
---|
512 | SNIFF_BUFFER_CHUNK = 32 |
---|
513 | }; |
---|
514 | |
---|
515 | |
---|
516 | static gboolean |
---|
517 | gnome_vfs_mime_try_one_magic_pattern (GnomeVFSMimeSniffBuffer *sniff_buffer, |
---|
518 | GnomeMagicEntry *magic_entry) |
---|
519 | { |
---|
520 | int offset; |
---|
521 | |
---|
522 | if (sniff_buffer->read_whole_file && |
---|
523 | sniff_buffer->buffer_length < magic_entry->range_start + magic_entry->pattern_length) { |
---|
524 | /* There's no place this pattern could actually match */ |
---|
525 | return FALSE; |
---|
526 | } |
---|
527 | for (offset = magic_entry->range_start; offset <= magic_entry->range_end; offset++) { |
---|
528 | /* this check is done only as an optimization |
---|
529 | * gnome_vfs_mime_sniff_buffer_get already implements the laziness. |
---|
530 | * This gets called a million times though and every bit performance |
---|
531 | * is valuable. This way we avoid making the call. |
---|
532 | */ |
---|
533 | if (sniff_buffer->buffer_length < offset + magic_entry->pattern_length && |
---|
534 | !sniff_buffer->read_whole_file) { |
---|
535 | if (gnome_vfs_mime_sniff_buffer_get (sniff_buffer, |
---|
536 | offset + magic_entry->pattern_length) != GNOME_VFS_OK) { |
---|
537 | return FALSE; |
---|
538 | } |
---|
539 | } |
---|
540 | |
---|
541 | if (try_one_pattern_on_buffer (sniff_buffer->buffer + offset, magic_entry)) { |
---|
542 | return TRUE; |
---|
543 | } |
---|
544 | } |
---|
545 | return FALSE; |
---|
546 | } |
---|
547 | |
---|
548 | /* We lock this mutex whenever we modify global state in this module. */ |
---|
549 | G_LOCK_DEFINE_STATIC (mime_magic_table_mutex); |
---|
550 | |
---|
551 | static GnomeMagicEntry *mime_magic_table = NULL; |
---|
552 | |
---|
553 | static GnomeMagicEntry * |
---|
554 | gnome_vfs_mime_get_magic_table (void) |
---|
555 | { |
---|
556 | char *filename; |
---|
557 | |
---|
558 | G_LOCK (mime_magic_table_mutex); |
---|
559 | |
---|
560 | if (mime_magic_table == NULL) { |
---|
561 | filename = g_strconcat (GNOME_VFS_CONFDIR, "/gnome-vfs-mime-magic", NULL); |
---|
562 | mime_magic_table = gnome_vfs_mime_magic_parse (filename, NULL); |
---|
563 | g_free (filename); |
---|
564 | } |
---|
565 | |
---|
566 | G_UNLOCK (mime_magic_table_mutex); |
---|
567 | |
---|
568 | return mime_magic_table; |
---|
569 | } |
---|
570 | |
---|
571 | const char * |
---|
572 | gnome_vfs_mime_get_type_from_magic_table (GnomeVFSMimeSniffBuffer *buffer) |
---|
573 | { |
---|
574 | GnomeMagicEntry *magic_table; |
---|
575 | |
---|
576 | magic_table = gnome_vfs_mime_get_magic_table (); |
---|
577 | if (magic_table == NULL) { |
---|
578 | return NULL; |
---|
579 | } |
---|
580 | |
---|
581 | for (; magic_table->type != T_END; magic_table++) { |
---|
582 | if (gnome_vfs_mime_try_one_magic_pattern (buffer, magic_table)) { |
---|
583 | return magic_table->mimetype; |
---|
584 | } |
---|
585 | } |
---|
586 | return NULL; |
---|
587 | } |
---|
588 | |
---|
589 | |
---|
590 | GnomeMagicEntry * |
---|
591 | gnome_vfs_mime_test_get_magic_table (const char *table_path) |
---|
592 | { |
---|
593 | G_LOCK (mime_magic_table_mutex); |
---|
594 | if (mime_magic_table == NULL) { |
---|
595 | mime_magic_table = gnome_vfs_mime_magic_parse (table_path, NULL); |
---|
596 | } |
---|
597 | G_UNLOCK (mime_magic_table_mutex); |
---|
598 | |
---|
599 | return mime_magic_table; |
---|
600 | } |
---|
601 | |
---|
602 | #define HEX_DIGITS "0123456789abcdef" |
---|
603 | |
---|
604 | static void |
---|
605 | print_escaped_string (const guchar *string, int length) |
---|
606 | { |
---|
607 | for (; length > 0; length--, string++) { |
---|
608 | if (*string == '\\' || *string == '#') { |
---|
609 | /* escape \, #, etc. properly */ |
---|
610 | printf ("\\%c", *string); |
---|
611 | } else if (isprint (*string) && *string > ' ') { |
---|
612 | /* everything printable except for white space can go directly */ |
---|
613 | printf ("%c", *string); |
---|
614 | } else { |
---|
615 | /* everything else goes in hex */ |
---|
616 | printf ("\\x%c%c", HEX_DIGITS[(*string) / 16], HEX_DIGITS[(*string) % 16]); |
---|
617 | } |
---|
618 | } |
---|
619 | } |
---|
620 | |
---|
621 | static void |
---|
622 | print_hex_pattern (const guchar *string, int length) |
---|
623 | { |
---|
624 | printf ("\\x"); |
---|
625 | for (; length > 0; length--, string++) { |
---|
626 | printf ("%c%c", HEX_DIGITS[(*string) / 16], HEX_DIGITS[(*string) % 16]); |
---|
627 | } |
---|
628 | } |
---|
629 | void |
---|
630 | gnome_vfs_mime_dump_magic_table (void) |
---|
631 | { |
---|
632 | GnomeMagicEntry *magic_table; |
---|
633 | |
---|
634 | magic_table = gnome_vfs_mime_get_magic_table (); |
---|
635 | if (magic_table == NULL) { |
---|
636 | return; |
---|
637 | } |
---|
638 | |
---|
639 | for (; magic_table->type != T_END; magic_table++) { |
---|
640 | printf ("%d", magic_table->range_start); |
---|
641 | if (magic_table->range_start != magic_table->range_end) { |
---|
642 | printf (":%d", magic_table->range_end); |
---|
643 | } |
---|
644 | printf ("\t"); |
---|
645 | switch (magic_table->type) { |
---|
646 | case T_BYTE: |
---|
647 | printf("byte"); |
---|
648 | break; |
---|
649 | case T_SHORT: |
---|
650 | printf("short"); |
---|
651 | break; |
---|
652 | case T_LONG: |
---|
653 | printf("long"); |
---|
654 | break; |
---|
655 | case T_STR: |
---|
656 | printf("string"); |
---|
657 | break; |
---|
658 | case T_DATE: |
---|
659 | printf("date"); |
---|
660 | break; |
---|
661 | case T_BESHORT: |
---|
662 | printf("beshort"); |
---|
663 | break; |
---|
664 | case T_BELONG: |
---|
665 | printf("belong"); |
---|
666 | break; |
---|
667 | case T_BEDATE: |
---|
668 | printf("bedate"); |
---|
669 | break; |
---|
670 | case T_LESHORT: |
---|
671 | printf("leshort"); |
---|
672 | break; |
---|
673 | case T_LELONG: |
---|
674 | printf("lelong"); |
---|
675 | break; |
---|
676 | case T_LEDATE: |
---|
677 | printf("ledate"); |
---|
678 | break; |
---|
679 | default: |
---|
680 | break; |
---|
681 | } |
---|
682 | printf ("\t"); |
---|
683 | print_escaped_string (magic_table->pattern, magic_table->pattern_length); |
---|
684 | if (magic_table->use_mask) { |
---|
685 | printf (" &"); |
---|
686 | print_hex_pattern (magic_table->mask, magic_table->pattern_length); |
---|
687 | } |
---|
688 | printf ("\t%s\n", magic_table->mimetype); |
---|
689 | } |
---|
690 | } |
---|
691 | |
---|
692 | void |
---|
693 | gnome_vfs_mime_clear_magic_table (void) |
---|
694 | { |
---|
695 | G_LOCK (mime_magic_table_mutex); |
---|
696 | g_free (mime_magic_table); |
---|
697 | mime_magic_table = NULL; |
---|
698 | G_UNLOCK (mime_magic_table_mutex); |
---|
699 | } |
---|
700 | |
---|
701 | /** |
---|
702 | * gnome_vfs_get_mime_type_for_buffer: |
---|
703 | * @buffer: a sniff buffer referencing either a file or data in memory |
---|
704 | * |
---|
705 | * This routine uses a magic database to guess the mime type of the |
---|
706 | * data represented by @buffer. |
---|
707 | * |
---|
708 | * Returns a pointer to an internal copy of the mime-type for @buffer. |
---|
709 | */ |
---|
710 | const char * |
---|
711 | gnome_vfs_get_mime_type_for_buffer (GnomeVFSMimeSniffBuffer *buffer) |
---|
712 | { |
---|
713 | return gnome_vfs_get_mime_type_internal (buffer, NULL); |
---|
714 | } |
---|
715 | |
---|
716 | enum { |
---|
717 | GNOME_VFS_TEXT_SNIFF_LENGTH = 256 |
---|
718 | }; |
---|
719 | |
---|
720 | gboolean |
---|
721 | gnome_vfs_sniff_buffer_looks_like_text (GnomeVFSMimeSniffBuffer *sniff_buffer) |
---|
722 | { |
---|
723 | int index; |
---|
724 | guchar ch; |
---|
725 | |
---|
726 | gnome_vfs_mime_sniff_buffer_get (sniff_buffer, GNOME_VFS_TEXT_SNIFF_LENGTH); |
---|
727 | |
---|
728 | if (sniff_buffer->buffer_length == 0) { |
---|
729 | return FALSE; |
---|
730 | } |
---|
731 | |
---|
732 | for (index = 0; index < sniff_buffer->buffer_length - 3; index++) { |
---|
733 | ch = sniff_buffer->buffer[index]; |
---|
734 | if (!isprint (ch) && !isspace(ch)) { |
---|
735 | /* check if we are dealing with UTF-8 text |
---|
736 | * |
---|
737 | * bytes | bits | representation |
---|
738 | * 1 | 7 | 0vvvvvvv |
---|
739 | * 2 | 11 | 110vvvvv 10vvvvvv |
---|
740 | * 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv |
---|
741 | * 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv |
---|
742 | */ |
---|
743 | if ((ch & 0xc0) != 0xc0) { |
---|
744 | /* not a UTF-8 text */ |
---|
745 | return FALSE; |
---|
746 | } |
---|
747 | |
---|
748 | if ((ch & 0x20) == 0) { |
---|
749 | /* check if this is a 2-byte UTF-8 letter */ |
---|
750 | ++index; |
---|
751 | if ((sniff_buffer->buffer[index] & 0xc0) != 0x80) { |
---|
752 | return FALSE; |
---|
753 | } |
---|
754 | } else if ((ch & 0x30) == 0x20) { |
---|
755 | /* check if this is a 3-byte UTF-8 letter */ |
---|
756 | if ((sniff_buffer->buffer[++index] & 0xc0) != 0x80 |
---|
757 | || (sniff_buffer->buffer[++index] & 0xc0) != 0x80) { |
---|
758 | return FALSE; |
---|
759 | } |
---|
760 | } else if ((ch & 0x38) == 0x30) { |
---|
761 | /* check if this is a 4-byte UTF-8 letter */ |
---|
762 | if ((sniff_buffer->buffer[++index] & 0xc0) != 0x80 |
---|
763 | || (sniff_buffer->buffer[++index] & 0xc0) != 0x80 |
---|
764 | || (sniff_buffer->buffer[++index] & 0xc0) != 0x80) { |
---|
765 | return FALSE; |
---|
766 | } |
---|
767 | } |
---|
768 | } |
---|
769 | } |
---|
770 | |
---|
771 | return TRUE; |
---|
772 | } |
---|
773 | |
---|
774 | static int bitrates[2][15] = { |
---|
775 | { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320}, |
---|
776 | { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160 } |
---|
777 | }; |
---|
778 | |
---|
779 | static int frequencies[2][3] = { |
---|
780 | { 44100, 48000, 32000 }, |
---|
781 | { 22050, 24000, 16000 } |
---|
782 | }; |
---|
783 | |
---|
784 | /* |
---|
785 | * Return length of an MP3 frame using potential 32-bit header value. See |
---|
786 | * "http://www.dv.co.yu/mpgscript/mpeghdr.htm" for details on the header |
---|
787 | * format. |
---|
788 | * |
---|
789 | * NOTE: As an optimization and because they are rare, this returns 0 for |
---|
790 | * version 2.5 or free format MP3s. |
---|
791 | */ |
---|
792 | static gsize |
---|
793 | get_mp3_frame_length (unsigned long mp3_header) |
---|
794 | { |
---|
795 | int ver = 4 - ((mp3_header >> 19) & 3u); |
---|
796 | int br = (mp3_header >> 12) & 0xfu; |
---|
797 | int srf = (mp3_header >> 10) & 3u; |
---|
798 | |
---|
799 | /* are frame sync and layer 3 bits set? */ |
---|
800 | if (((mp3_header & 0xffe20000ul) == 0xffe20000ul) |
---|
801 | /* good version? */ |
---|
802 | && ((ver == 1) || (ver == 2)) |
---|
803 | /* good bitrate index (not free or invalid)? */ |
---|
804 | && (br > 0) && (br < 15) |
---|
805 | /* good sampling rate frequency index? */ |
---|
806 | && (srf != 3) |
---|
807 | /* not using reserved emphasis value? */ |
---|
808 | && ((mp3_header & 3u) != 2)) { |
---|
809 | /* then this is most likely the beginning of a valid frame */ |
---|
810 | |
---|
811 | gsize length = (gsize) bitrates[ver - 1][br] * 144000; |
---|
812 | length /= frequencies[ver - 1][srf]; |
---|
813 | return length += ((mp3_header >> 9) & 1u) - 4; |
---|
814 | } |
---|
815 | return 0; |
---|
816 | } |
---|
817 | |
---|
818 | static unsigned long |
---|
819 | get_4_byte_value (const unsigned char *bytes) |
---|
820 | { |
---|
821 | unsigned long value = 0; |
---|
822 | int count; |
---|
823 | |
---|
824 | for (count = 0; count < 4; ++count) { |
---|
825 | value <<= 8; |
---|
826 | value |= *bytes++; |
---|
827 | } |
---|
828 | return value; |
---|
829 | } |
---|
830 | |
---|
831 | enum { |
---|
832 | GNOME_VFS_MP3_SNIFF_LENGTH = 256 |
---|
833 | }; |
---|
834 | |
---|
835 | gboolean |
---|
836 | gnome_vfs_sniff_buffer_looks_like_mp3 (GnomeVFSMimeSniffBuffer *sniff_buffer) |
---|
837 | { |
---|
838 | unsigned long mp3_header; |
---|
839 | int offset; |
---|
840 | |
---|
841 | if (gnome_vfs_mime_sniff_buffer_get (sniff_buffer, GNOME_VFS_MP3_SNIFF_LENGTH) != GNOME_VFS_OK) { |
---|
842 | return FALSE; |
---|
843 | } |
---|
844 | |
---|
845 | /* |
---|
846 | * Use algorithm described in "ID3 tag version 2.3.0 Informal Standard" |
---|
847 | * at "http://www.id3.org/id3v2.3.0.html" to detect a valid header, "An |
---|
848 | * ID3v2 tag can be detected with the following pattern: |
---|
849 | * $49 44 33 yy yy xx zz zz zz zz |
---|
850 | * Where yy is less than $FF, xx is the 'flags' byte and zz is less than |
---|
851 | * $80." |
---|
852 | * |
---|
853 | * The informal standard also says, "The ID3v2 tag size is encoded with |
---|
854 | * four bytes where the most significant bit (bit 7) is set to zero in |
---|
855 | * every byte, making a total of 28 bits. The zeroed bits are ignored, |
---|
856 | * so a 257 bytes long tag is represented as $00 00 02 01." |
---|
857 | */ |
---|
858 | if (strncmp ((char *) sniff_buffer->buffer, "ID3", 3) == 0 |
---|
859 | && (sniff_buffer->buffer[3] != 0xffu) |
---|
860 | && (sniff_buffer->buffer[4] != 0xffu) |
---|
861 | && (sniff_buffer->buffer[6] < 0x80u) |
---|
862 | && (sniff_buffer->buffer[7] < 0x80u) |
---|
863 | && (sniff_buffer->buffer[8] < 0x80u) |
---|
864 | && (sniff_buffer->buffer[9] < 0x80u)) { |
---|
865 | return TRUE; |
---|
866 | } |
---|
867 | |
---|
868 | /* |
---|
869 | * Scan through the first "GNOME_VFS_MP3_SNIFF_LENGTH" bytes of the |
---|
870 | * buffer to find a potential 32-bit MP3 frame header. |
---|
871 | */ |
---|
872 | mp3_header = 0; |
---|
873 | for (offset = 0; offset < GNOME_VFS_MP3_SNIFF_LENGTH; offset++) { |
---|
874 | gsize length; |
---|
875 | |
---|
876 | mp3_header <<= 8; |
---|
877 | mp3_header |= sniff_buffer->buffer[offset]; |
---|
878 | mp3_header &= 0xfffffffful; |
---|
879 | |
---|
880 | length = get_mp3_frame_length (mp3_header); |
---|
881 | |
---|
882 | if (length != 0) { |
---|
883 | /* |
---|
884 | * Since one frame is available, is there another frame |
---|
885 | * just to be sure this is more likely to be a real MP3 |
---|
886 | * buffer? |
---|
887 | */ |
---|
888 | offset += 1 + length; |
---|
889 | |
---|
890 | if (gnome_vfs_mime_sniff_buffer_get (sniff_buffer, offset + 4) != GNOME_VFS_OK) { |
---|
891 | return FALSE; |
---|
892 | } |
---|
893 | mp3_header = get_4_byte_value (&sniff_buffer->buffer[offset]); |
---|
894 | length = get_mp3_frame_length (mp3_header); |
---|
895 | |
---|
896 | if (length != 0) { |
---|
897 | return TRUE; |
---|
898 | } |
---|
899 | break; |
---|
900 | } |
---|
901 | } |
---|
902 | |
---|
903 | return FALSE; |
---|
904 | } |
---|
905 | |
---|
906 | gboolean |
---|
907 | gnome_vfs_sniff_buffer_looks_like_gzip (GnomeVFSMimeSniffBuffer *sniff_buffer, |
---|
908 | const char *file_name) |
---|
909 | { |
---|
910 | if (sniff_buffer == NULL) { |
---|
911 | return FALSE; |
---|
912 | } |
---|
913 | |
---|
914 | if (gnome_vfs_mime_sniff_buffer_get (sniff_buffer, 2) != GNOME_VFS_OK) { |
---|
915 | return FALSE; |
---|
916 | } |
---|
917 | |
---|
918 | if (sniff_buffer->buffer[0] != 0x1F || sniff_buffer->buffer[1] != 0x8B) { |
---|
919 | /* not a gzipped file */ |
---|
920 | return FALSE; |
---|
921 | } |
---|
922 | |
---|
923 | if (file_name == NULL) { |
---|
924 | return TRUE; |
---|
925 | } |
---|
926 | |
---|
927 | if (gnome_vfs_istr_has_suffix (file_name, ".gnumeric") |
---|
928 | || gnome_vfs_istr_has_suffix (file_name, ".abw") |
---|
929 | || gnome_vfs_istr_has_suffix (file_name, ".chrt") |
---|
930 | || gnome_vfs_istr_has_suffix (file_name, ".dia") |
---|
931 | || gnome_vfs_istr_has_suffix (file_name, ".kfo") |
---|
932 | || gnome_vfs_istr_has_suffix (file_name, ".kil") |
---|
933 | || gnome_vfs_istr_has_suffix (file_name, ".kivio") |
---|
934 | || gnome_vfs_istr_has_suffix (file_name, ".kpr") |
---|
935 | || gnome_vfs_istr_has_suffix (file_name, ".ksp") |
---|
936 | || gnome_vfs_istr_has_suffix (file_name, ".kwd") |
---|
937 | || gnome_vfs_istr_has_suffix (file_name, ".pdf")) { |
---|
938 | /* Have the suffix matching deal with figuring out the actual |
---|
939 | * MIME type. |
---|
940 | * FIXME bugzilla.eazel.com 6867: |
---|
941 | * Get rid of the hardcoded list and have a way to adjust it in the |
---|
942 | * mime magic, etc. files. |
---|
943 | */ |
---|
944 | return FALSE; |
---|
945 | } |
---|
946 | |
---|
947 | return TRUE; |
---|
948 | } |
---|
949 | |
---|