source: trunk/third/ispell/ijoin.c @ 17922

Revision 17922, 13.8 KB checked in by ghudson, 22 years ago (diff)
Remove crufty stuff Linux doesn't like.
Line 
1#ifndef lint
2static char Rcs_Id[] =
3    "$Id: ijoin.c,v 1.2 2002-09-13 00:40:29 ghudson Exp $";
4#endif
5
6/*
7 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All modifications to the source code must be clearly marked as
20 *    such.  Binary redistributions based on modified source code
21 *    must be clearly marked as modified versions in the documentation
22 *    and/or other materials provided with the distribution.
23 * 4. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgment:
25 *      This product includes software developed by Geoff Kuenning and
26 *      other unpaid contributors.
27 * 5. The name of Geoff Kuenning may not be used to endorse or promote
28 *    products derived from this software without specific prior
29 *    written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 */
43
44/*
45 * "Join" command for ispell.
46 *
47 * This command is a complete reimplementation of the UNIX "join"
48 * command, except that fields cannot be separated by a newline, it
49 * can handle lines of unlimited length, and the preceding sort can
50 * treat characters as either signed or unsigned.
51 *
52 * Usage:
53 *
54 *      ijoin [options] file1 file2
55 *
56 * See the UNIX "join" manual page for option descriptions.  Only
57 * nonstandard options are described here.
58 *
59 * Either file1 or file2 may be "-", in which case the standard input
60 * is used for that file.
61 *
62 * Normally, ijoin uses "strcmp" to compare fields.  This is the
63 * correct thing to do on most systems if you are using the
64 * system-provided "sort" command to sort the input files before
65 * feeding them to ijoin.  In some cases, however, the sort command
66 * you use will disagree with strcmp about how to handle characters
67 * with the high bit set.  If this is the case, you can specify the
68 * "-s" (signed comparisons) or "-u" (unsigned comparisons) option to
69 * force ijoin to conform to the method used by the sort program.
70 * This is only necessary if one of the input files contains 8-bit
71 * characters in the field that is being joined on.
72 *
73 * On some older machines with non-ANSI compilers, the "-s" option
74 * will be ineffective because characters default to unsigned.
75 * However, this option should be unnecessary on those machines, so no
76 * harm will be done.
77 */
78
79/*
80 * $Log: not supported by cvs2svn $
81 * Revision 1.1.1.1  1997/09/03 21:08:09  ghudson
82 * Import of ispell 3.1.20
83 *
84 * Revision 1.6  1994/10/18  04:03:21  geoff
85 * Fix a couple of bugs, one where the last field on a line could be
86 * output incorrectly, and one where fields from the wrong file could be
87 * output.
88 *
89 * Revision 1.5  1994/01/25  07:11:36  geoff
90 * Get rid of all old RCS log lines in preparation for the 3.1 release.
91 *
92 */
93
94#include <stdio.h>
95#include "config.h"
96#include "ispell.h"
97#include "proto.h"
98#include "fields.h"
99
100#ifdef __STDC__
101#define SIGNED  signed
102#else /* __STDC */
103#define SIGNED
104#endif /* __STDC */
105
106int             main P ((int argc, char * argv[])); /* Join files */
107static void     usage P ((void));       /* Issue a usage message */
108static void     dojoin P ((void));      /* Actually perform the join */
109static void     full_output P ((field_t * line1, field_t * line2));
110                                        /* Output everything from both lines */
111static void     selected_output P ((field_t * line1, field_t * line2));
112                                        /* Output selected fields */
113static int      strscmp P ((SIGNED char * a, SIGNED char * b));
114                                        /* Signed version of strcmp */
115static int      strucmp P ((unsigned char * a, unsigned char * b));
116                                        /* Unsigned version of strcmp */
117
118typedef struct
119    {
120    int         file;                   /* Number of file to output from */
121    int         field;                  /* Number of field to output */
122    }
123                outlist_t;              /* Output description list */
124
125static int              (*compare) () = strcmp; /* Comparison function */
126static char *           emptyfield = ""; /* Use this to replace empty fields */
127static FILE *           file1;          /* First file to join */
128static FILE *           file2;          /* Second file to join */
129static int              join1field = 0; /* Field to join file 1 on */
130static int              join2field = 0; /* Field to join file 2 on */
131static int              maxf[2] = {0, 0}; /* Max field to parse in each file */
132static outlist_t *      outlist = NULL; /* List of fields to write */
133static int              outlistsize;    /* Number of elements in outlist */
134static int              runs = FLD_RUNS; /* Set if runs of tabchar same as 1 */
135static char *           tabchar = " \t"; /* Field separator character(s) */
136static int              unpairable1 = 0; /* NZ if -a1 */
137static int              unpairable2 = 0; /* NZ if -a2 */
138
139int main (argc, argv)                   /* Join files */
140    int                 argc;           /* Argument count */
141    char *              argv[];         /* Argument vector */
142    {
143   
144    while (argc > 3  &&  argv[1][0] == '-')
145        {
146        argc--;
147        argv++;
148        switch (argv[0][1])
149            {
150            case 'a':                   /* produce output for unpairables */
151                if (argv[0][2] == '1')
152                    unpairable1 = 1;
153                else if (argv[0][2] == '2')
154                    unpairable2 = 1;
155                else if (argv[0][2] == '\0')
156                    unpairable1 = unpairable2 = 1;
157                else
158                    usage ();
159                break;
160            case 'e':                   /* Replace empty fields with this */
161                argc--;
162                argv++;
163                emptyfield = *argv;
164                break;
165            case 'j':                   /* Specify field to join on */
166                if (argv[0][2] == '1')
167                    join1field = atoi (argv[1]) - 1;
168                else if (argv[0][2] == '2')
169                    join2field = atoi (argv[1]) - 1;
170                else if (argv[0][2] == '\0')
171                    join1field = join2field = atoi (argv[1]) - 1;
172                else
173                    usage ();
174                argc--;
175                argv++;
176                break;
177            case 'o':                   /* Specify output list */
178                /*
179                 * We will assume that all remaining switch arguments
180                 * are used to describe the output list.  This will
181                 * occasionally result in malloc'ing a few too many
182                 * elements, but no real harm will be done.
183                 */
184                outlist =
185                  (outlist_t *) malloc ((argc - 3) * sizeof (outlist_t));
186                if (outlist == NULL)
187                    {
188                    (void) fprintf (stderr, "ijoin:  out of memory!\n");
189                    return 1;
190                    }
191                for (outlistsize = 0, argc--, argv++;
192                  argc > 2  &&  (argv[0][0] == '1'  ||  argv[0][0] == '2')
193                    &&  argv[0][1] == '.';
194                  argc--, argv++, outlistsize++)
195                    {
196                    outlist[outlistsize].file = argv[0][0] - '0';
197                    outlist[outlistsize].field = atoi (&argv[0][2]) - 1;
198                    if (maxf[outlist[outlistsize].file - 1]
199                      <= outlist[outlistsize].field)
200                        maxf[outlist[outlistsize].file - 1] =
201                          outlist[outlistsize].field + 1;
202                    }
203                argc++;                 /* Un-do arg that stopped us */
204                argv--;
205                break;
206            case 't':
207                tabchar = &argv[0][2];
208                runs &= ~FLD_RUNS;
209                break;
210            case 's':
211                compare = strscmp;
212                break;
213            case 'u':
214                compare = strucmp;
215                break;
216            default:
217                usage ();
218                break;
219            }
220        }
221    if (argc != 3)
222        usage ();
223    if (strcmp (argv[1], "-") == 0)
224        file1 = stdin;
225    else
226        {
227        file1 = fopen (argv[1], "r");
228        if (file1 == NULL)
229            perror (argv[1]);
230        }
231    file2 = fopen (argv[2], "r");
232    if (file2 == NULL)
233        perror (argv[2]);
234    if (file1 == NULL  ||  file2 == NULL)
235        return 1;
236    dojoin ();
237    return 0;
238    }
239
240static void usage ()                    /* Issue a usage message */
241    {
242
243    (void) fprintf (stderr,
244      "Usage:  ijoin [-an] [-e s] [-jn m] [-o n.m ...] [-tc] file1 file2\n");
245    exit (1);
246    }
247
248static void dojoin ()                   /* Actually perform the join */
249    {
250    int                 comparison;     /* Result of comparing the lines */
251    long                file2pos;       /* Position file 2 started at */
252    register field_t *  line1;          /* Line from file 1 */
253    register field_t *  line2;          /* Line from file 2 */
254    int                 pairable;       /* NZ if lines can be paired */
255    int                 skip2;          /* No. of "unpairable" 2's to skip */
256
257    runs |= FLD_NOSHRINK;               /* Don't do excessive reallocations */
258    field_line_inc = BUFSIZ;            /* Allocate line bfr in huge chunks */
259    line1 = fieldread (file1, tabchar, runs, maxf[0]);
260    file2pos = ftell (file2);
261    skip2 = 0;
262    if (file2pos == -1)
263        {
264        (void) fprintf (stderr, "ijoin:  Can't seek file ");
265        perror ("2");
266        exit (1);
267        }
268    line2 = fieldread (file2, tabchar, runs, maxf[1]);
269    while (line1 != NULL  ||  line2 != NULL)
270        {
271        /*
272         * Do a little work to reduce the number of calls to realloc, at
273         * the expense of slightly-increased memory usage.
274         */
275        if (line1 != NULL  &&  line1->nfields >= field_field_inc)
276            field_field_inc = line1->nfields + 1;
277        if (line2 != NULL  &&  line2->nfields >= field_field_inc)
278            field_field_inc = line2->nfields + 1;
279        /*
280         * Determine if the lines can be paired.
281         */
282        pairable = 1;
283        if (line1 == NULL)
284            {
285            pairable = 0;
286            comparison = 1;             /* This causes file 2 to advance */
287            }
288        else if (join1field >= line1->nfields)
289            {
290            pairable = 0;
291            comparison = -1;            /* This causes file 1 to advance */
292            }
293        if (line2 == NULL)
294            {
295            pairable = 0;
296            comparison = -1;            /* This causes file 1 to advance */
297            }
298        else if (join2field >= line2->nfields)
299            {
300            pairable = 0;
301            comparison = 1;             /* This causes file 2 to advance */
302            }
303        if (pairable)
304            {
305            comparison = (*compare) (line1->fields[join1field],
306              line2->fields[join2field]);
307            pairable = (comparison == 0);
308            }
309        if (pairable)
310            {
311            /*
312             * The two lines can be paired.  Produce output.
313             */
314            if (outlist == NULL)
315                full_output (line1, line2);
316            else
317                selected_output (line1, line2);
318            }
319        /*
320         * Advance through the files
321         */
322        if (comparison < 0)
323            {
324            if (unpairable1)
325                {
326                if (outlist == NULL)
327                    (void) fieldwrite (stdout, line1, tabchar[0]);
328                else
329                    selected_output (line1, (field_t *) NULL);
330                }
331            fieldfree (line1);
332            line1 = fieldread (file1, tabchar, runs, maxf[0]);
333            }
334        else if (comparison > 0)
335            {
336            if (skip2 > 0)
337                skip2--;
338            else if (unpairable2)
339                {
340                if (outlist == NULL)
341                    (void) fieldwrite (stdout, line2, tabchar[0]);
342                else
343                    selected_output ((field_t *) NULL, line2);
344                }
345            fieldfree (line2);
346            file2pos = ftell (file2);
347            line2 = fieldread (file2, tabchar, runs, maxf[1]);
348            }
349        else
350            {
351            /*
352             * Here's the tricky part.  We have to advance file 2
353             * until comparisons fail, and then back it up and advance
354             * file 1.
355             */
356            skip2++;
357            fieldfree (line2);
358            line2 = fieldread (file2, tabchar, runs, maxf[1]);
359            if (line2 == NULL
360              ||  join2field >= line2->nfields
361              ||  (*compare) (line1->fields[join1field],
362                  line2->fields[join2field])
363                != 0)
364                {
365                (void) fseek (file2, file2pos, 0);
366                fieldfree (line2);
367                line2 = fieldread (file2, tabchar, runs, maxf[1]);
368                fieldfree (line1);
369                line1 = fieldread (file1, tabchar, runs, maxf[0]);
370                if (line1 != NULL  &&  line2 != NULL
371                  &&  join1field < line1->nfields
372                  &&  join2field < line2->nfields
373                  &&  (*compare) (line1->fields[join1field],
374                        line2->fields[join2field])
375                    == 0)
376                    skip2 = 0;
377                }
378            }
379        }
380    }
381
382static void full_output (line1, line2)  /* Output everything from both lines */
383    register field_t *  line1;          /* Line from file 1 */
384    register field_t *  line2;          /* Line from file 2 */
385    {
386    register int        fieldno;        /* Number of field being handled */
387
388    (void) fputs (line1->fields[join1field], stdout);
389    for (fieldno = 0;  fieldno < line1->nfields;  fieldno++)
390        {
391        if (fieldno == join1field)
392            continue;
393        (void) putchar (tabchar[0]);
394        if (line1->fields[fieldno][0] == '\0')
395            (void) fputs (emptyfield, stdout);
396        else
397            (void) fputs (line1->fields[fieldno], stdout);
398        }
399    for (fieldno = 0;  fieldno < line2->nfields;  fieldno++)
400        {
401        if (fieldno == join2field)
402            continue;
403        (void) putchar (tabchar[0]);
404        if (line2->fields[fieldno][0] == '\0')
405            (void) fputs (emptyfield, stdout);
406        else
407            (void) fputs (line2->fields[fieldno], stdout);
408        }
409    (void) putchar ('\n');
410    }
411
412static void     selected_output (line1, line2) /* Output selected fields */
413    field_t *           line1;          /* Line from file 1 */
414    field_t *           line2;          /* Line from file 2 */
415    {
416    register field_t *  cline;          /* Current line being handled */
417    register int        listno;         /* Number of output list entry */
418   
419    for (listno = 0;  listno < outlistsize;  listno++)
420        {
421        if (listno != 0)
422            (void) putchar (tabchar[0]);
423        if (outlist[listno].file == 1)
424            cline = line1;
425        else
426            cline = line2;
427        if (cline == NULL
428          ||  outlist[listno].field >= cline->nfields
429          ||  cline->fields[outlist[listno].field][0] == '\0')
430            (void) fputs (emptyfield, stdout);
431        else
432            (void) fputs (cline->fields[outlist[listno].field], stdout);
433        }
434    (void) putchar ('\n');
435    }
436
437static int strscmp (a, b)               /* Compare signed strings */
438    register SIGNED char * a;           /* First string to compare */
439    register SIGNED char * b;           /* Second string to compare */
440    {
441
442    while (*a != '\0')
443        {
444        if (*a++ != *b++)
445            return *--a - *--b;
446        }
447    return *a - *b;
448    }
449
450static int strucmp (a, b)               /* Compare unsigned strings */
451    register unsigned char * a;         /* First string to compare */
452    register unsigned char * b;         /* Second string to compare */
453    {
454
455    while (*a != '\0')
456        {
457        if (*a++ != *b++)
458            return *--a - *--b;
459        }
460    return *a - *b;
461    }
Note: See TracBrowser for help on using the repository browser.