1 | #ifndef lint |
---|
2 | static char Rcs_Id[] = |
---|
3 | "$Id: ijoin.c,v 1.2 2002-09-13 00:40:29 ghudson Exp $"; |
---|
4 | #endif |
---|
5 | |
---|
6 | /* |
---|
7 | * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA |
---|
8 | * All rights reserved. |
---|
9 | * |
---|
10 | * Redistribution and use in source and binary forms, with or without |
---|
11 | * modification, are permitted provided that the following conditions |
---|
12 | * are met: |
---|
13 | * |
---|
14 | * 1. Redistributions of source code must retain the above copyright |
---|
15 | * notice, this list of conditions and the following disclaimer. |
---|
16 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
17 | * notice, this list of conditions and the following disclaimer in the |
---|
18 | * documentation and/or other materials provided with the distribution. |
---|
19 | * 3. All modifications to the source code must be clearly marked as |
---|
20 | * such. Binary redistributions based on modified source code |
---|
21 | * must be clearly marked as modified versions in the documentation |
---|
22 | * and/or other materials provided with the distribution. |
---|
23 | * 4. All advertising materials mentioning features or use of this software |
---|
24 | * must display the following acknowledgment: |
---|
25 | * This product includes software developed by Geoff Kuenning and |
---|
26 | * other unpaid contributors. |
---|
27 | * 5. The name of Geoff Kuenning may not be used to endorse or promote |
---|
28 | * products derived from this software without specific prior |
---|
29 | * written permission. |
---|
30 | * |
---|
31 | * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
---|
32 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
33 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
34 | * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
---|
35 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
36 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
37 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
38 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
39 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
40 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
41 | * SUCH DAMAGE. |
---|
42 | */ |
---|
43 | |
---|
44 | /* |
---|
45 | * "Join" command for ispell. |
---|
46 | * |
---|
47 | * This command is a complete reimplementation of the UNIX "join" |
---|
48 | * command, except that fields cannot be separated by a newline, it |
---|
49 | * can handle lines of unlimited length, and the preceding sort can |
---|
50 | * treat characters as either signed or unsigned. |
---|
51 | * |
---|
52 | * Usage: |
---|
53 | * |
---|
54 | * ijoin [options] file1 file2 |
---|
55 | * |
---|
56 | * See the UNIX "join" manual page for option descriptions. Only |
---|
57 | * nonstandard options are described here. |
---|
58 | * |
---|
59 | * Either file1 or file2 may be "-", in which case the standard input |
---|
60 | * is used for that file. |
---|
61 | * |
---|
62 | * Normally, ijoin uses "strcmp" to compare fields. This is the |
---|
63 | * correct thing to do on most systems if you are using the |
---|
64 | * system-provided "sort" command to sort the input files before |
---|
65 | * feeding them to ijoin. In some cases, however, the sort command |
---|
66 | * you use will disagree with strcmp about how to handle characters |
---|
67 | * with the high bit set. If this is the case, you can specify the |
---|
68 | * "-s" (signed comparisons) or "-u" (unsigned comparisons) option to |
---|
69 | * force ijoin to conform to the method used by the sort program. |
---|
70 | * This is only necessary if one of the input files contains 8-bit |
---|
71 | * characters in the field that is being joined on. |
---|
72 | * |
---|
73 | * On some older machines with non-ANSI compilers, the "-s" option |
---|
74 | * will be ineffective because characters default to unsigned. |
---|
75 | * However, this option should be unnecessary on those machines, so no |
---|
76 | * harm will be done. |
---|
77 | */ |
---|
78 | |
---|
79 | /* |
---|
80 | * $Log: not supported by cvs2svn $ |
---|
81 | * Revision 1.1.1.1 1997/09/03 21:08:09 ghudson |
---|
82 | * Import of ispell 3.1.20 |
---|
83 | * |
---|
84 | * Revision 1.6 1994/10/18 04:03:21 geoff |
---|
85 | * Fix a couple of bugs, one where the last field on a line could be |
---|
86 | * output incorrectly, and one where fields from the wrong file could be |
---|
87 | * output. |
---|
88 | * |
---|
89 | * Revision 1.5 1994/01/25 07:11:36 geoff |
---|
90 | * Get rid of all old RCS log lines in preparation for the 3.1 release. |
---|
91 | * |
---|
92 | */ |
---|
93 | |
---|
94 | #include <stdio.h> |
---|
95 | #include "config.h" |
---|
96 | #include "ispell.h" |
---|
97 | #include "proto.h" |
---|
98 | #include "fields.h" |
---|
99 | |
---|
100 | #ifdef __STDC__ |
---|
101 | #define SIGNED signed |
---|
102 | #else /* __STDC */ |
---|
103 | #define SIGNED |
---|
104 | #endif /* __STDC */ |
---|
105 | |
---|
106 | int main P ((int argc, char * argv[])); /* Join files */ |
---|
107 | static void usage P ((void)); /* Issue a usage message */ |
---|
108 | static void dojoin P ((void)); /* Actually perform the join */ |
---|
109 | static void full_output P ((field_t * line1, field_t * line2)); |
---|
110 | /* Output everything from both lines */ |
---|
111 | static void selected_output P ((field_t * line1, field_t * line2)); |
---|
112 | /* Output selected fields */ |
---|
113 | static int strscmp P ((SIGNED char * a, SIGNED char * b)); |
---|
114 | /* Signed version of strcmp */ |
---|
115 | static int strucmp P ((unsigned char * a, unsigned char * b)); |
---|
116 | /* Unsigned version of strcmp */ |
---|
117 | |
---|
118 | typedef struct |
---|
119 | { |
---|
120 | int file; /* Number of file to output from */ |
---|
121 | int field; /* Number of field to output */ |
---|
122 | } |
---|
123 | outlist_t; /* Output description list */ |
---|
124 | |
---|
125 | static int (*compare) () = strcmp; /* Comparison function */ |
---|
126 | static char * emptyfield = ""; /* Use this to replace empty fields */ |
---|
127 | static FILE * file1; /* First file to join */ |
---|
128 | static FILE * file2; /* Second file to join */ |
---|
129 | static int join1field = 0; /* Field to join file 1 on */ |
---|
130 | static int join2field = 0; /* Field to join file 2 on */ |
---|
131 | static int maxf[2] = {0, 0}; /* Max field to parse in each file */ |
---|
132 | static outlist_t * outlist = NULL; /* List of fields to write */ |
---|
133 | static int outlistsize; /* Number of elements in outlist */ |
---|
134 | static int runs = FLD_RUNS; /* Set if runs of tabchar same as 1 */ |
---|
135 | static char * tabchar = " \t"; /* Field separator character(s) */ |
---|
136 | static int unpairable1 = 0; /* NZ if -a1 */ |
---|
137 | static int unpairable2 = 0; /* NZ if -a2 */ |
---|
138 | |
---|
139 | int main (argc, argv) /* Join files */ |
---|
140 | int argc; /* Argument count */ |
---|
141 | char * argv[]; /* Argument vector */ |
---|
142 | { |
---|
143 | |
---|
144 | while (argc > 3 && argv[1][0] == '-') |
---|
145 | { |
---|
146 | argc--; |
---|
147 | argv++; |
---|
148 | switch (argv[0][1]) |
---|
149 | { |
---|
150 | case 'a': /* produce output for unpairables */ |
---|
151 | if (argv[0][2] == '1') |
---|
152 | unpairable1 = 1; |
---|
153 | else if (argv[0][2] == '2') |
---|
154 | unpairable2 = 1; |
---|
155 | else if (argv[0][2] == '\0') |
---|
156 | unpairable1 = unpairable2 = 1; |
---|
157 | else |
---|
158 | usage (); |
---|
159 | break; |
---|
160 | case 'e': /* Replace empty fields with this */ |
---|
161 | argc--; |
---|
162 | argv++; |
---|
163 | emptyfield = *argv; |
---|
164 | break; |
---|
165 | case 'j': /* Specify field to join on */ |
---|
166 | if (argv[0][2] == '1') |
---|
167 | join1field = atoi (argv[1]) - 1; |
---|
168 | else if (argv[0][2] == '2') |
---|
169 | join2field = atoi (argv[1]) - 1; |
---|
170 | else if (argv[0][2] == '\0') |
---|
171 | join1field = join2field = atoi (argv[1]) - 1; |
---|
172 | else |
---|
173 | usage (); |
---|
174 | argc--; |
---|
175 | argv++; |
---|
176 | break; |
---|
177 | case 'o': /* Specify output list */ |
---|
178 | /* |
---|
179 | * We will assume that all remaining switch arguments |
---|
180 | * are used to describe the output list. This will |
---|
181 | * occasionally result in malloc'ing a few too many |
---|
182 | * elements, but no real harm will be done. |
---|
183 | */ |
---|
184 | outlist = |
---|
185 | (outlist_t *) malloc ((argc - 3) * sizeof (outlist_t)); |
---|
186 | if (outlist == NULL) |
---|
187 | { |
---|
188 | (void) fprintf (stderr, "ijoin: out of memory!\n"); |
---|
189 | return 1; |
---|
190 | } |
---|
191 | for (outlistsize = 0, argc--, argv++; |
---|
192 | argc > 2 && (argv[0][0] == '1' || argv[0][0] == '2') |
---|
193 | && argv[0][1] == '.'; |
---|
194 | argc--, argv++, outlistsize++) |
---|
195 | { |
---|
196 | outlist[outlistsize].file = argv[0][0] - '0'; |
---|
197 | outlist[outlistsize].field = atoi (&argv[0][2]) - 1; |
---|
198 | if (maxf[outlist[outlistsize].file - 1] |
---|
199 | <= outlist[outlistsize].field) |
---|
200 | maxf[outlist[outlistsize].file - 1] = |
---|
201 | outlist[outlistsize].field + 1; |
---|
202 | } |
---|
203 | argc++; /* Un-do arg that stopped us */ |
---|
204 | argv--; |
---|
205 | break; |
---|
206 | case 't': |
---|
207 | tabchar = &argv[0][2]; |
---|
208 | runs &= ~FLD_RUNS; |
---|
209 | break; |
---|
210 | case 's': |
---|
211 | compare = strscmp; |
---|
212 | break; |
---|
213 | case 'u': |
---|
214 | compare = strucmp; |
---|
215 | break; |
---|
216 | default: |
---|
217 | usage (); |
---|
218 | break; |
---|
219 | } |
---|
220 | } |
---|
221 | if (argc != 3) |
---|
222 | usage (); |
---|
223 | if (strcmp (argv[1], "-") == 0) |
---|
224 | file1 = stdin; |
---|
225 | else |
---|
226 | { |
---|
227 | file1 = fopen (argv[1], "r"); |
---|
228 | if (file1 == NULL) |
---|
229 | perror (argv[1]); |
---|
230 | } |
---|
231 | file2 = fopen (argv[2], "r"); |
---|
232 | if (file2 == NULL) |
---|
233 | perror (argv[2]); |
---|
234 | if (file1 == NULL || file2 == NULL) |
---|
235 | return 1; |
---|
236 | dojoin (); |
---|
237 | return 0; |
---|
238 | } |
---|
239 | |
---|
240 | static void usage () /* Issue a usage message */ |
---|
241 | { |
---|
242 | |
---|
243 | (void) fprintf (stderr, |
---|
244 | "Usage: ijoin [-an] [-e s] [-jn m] [-o n.m ...] [-tc] file1 file2\n"); |
---|
245 | exit (1); |
---|
246 | } |
---|
247 | |
---|
248 | static void dojoin () /* Actually perform the join */ |
---|
249 | { |
---|
250 | int comparison; /* Result of comparing the lines */ |
---|
251 | long file2pos; /* Position file 2 started at */ |
---|
252 | register field_t * line1; /* Line from file 1 */ |
---|
253 | register field_t * line2; /* Line from file 2 */ |
---|
254 | int pairable; /* NZ if lines can be paired */ |
---|
255 | int skip2; /* No. of "unpairable" 2's to skip */ |
---|
256 | |
---|
257 | runs |= FLD_NOSHRINK; /* Don't do excessive reallocations */ |
---|
258 | field_line_inc = BUFSIZ; /* Allocate line bfr in huge chunks */ |
---|
259 | line1 = fieldread (file1, tabchar, runs, maxf[0]); |
---|
260 | file2pos = ftell (file2); |
---|
261 | skip2 = 0; |
---|
262 | if (file2pos == -1) |
---|
263 | { |
---|
264 | (void) fprintf (stderr, "ijoin: Can't seek file "); |
---|
265 | perror ("2"); |
---|
266 | exit (1); |
---|
267 | } |
---|
268 | line2 = fieldread (file2, tabchar, runs, maxf[1]); |
---|
269 | while (line1 != NULL || line2 != NULL) |
---|
270 | { |
---|
271 | /* |
---|
272 | * Do a little work to reduce the number of calls to realloc, at |
---|
273 | * the expense of slightly-increased memory usage. |
---|
274 | */ |
---|
275 | if (line1 != NULL && line1->nfields >= field_field_inc) |
---|
276 | field_field_inc = line1->nfields + 1; |
---|
277 | if (line2 != NULL && line2->nfields >= field_field_inc) |
---|
278 | field_field_inc = line2->nfields + 1; |
---|
279 | /* |
---|
280 | * Determine if the lines can be paired. |
---|
281 | */ |
---|
282 | pairable = 1; |
---|
283 | if (line1 == NULL) |
---|
284 | { |
---|
285 | pairable = 0; |
---|
286 | comparison = 1; /* This causes file 2 to advance */ |
---|
287 | } |
---|
288 | else if (join1field >= line1->nfields) |
---|
289 | { |
---|
290 | pairable = 0; |
---|
291 | comparison = -1; /* This causes file 1 to advance */ |
---|
292 | } |
---|
293 | if (line2 == NULL) |
---|
294 | { |
---|
295 | pairable = 0; |
---|
296 | comparison = -1; /* This causes file 1 to advance */ |
---|
297 | } |
---|
298 | else if (join2field >= line2->nfields) |
---|
299 | { |
---|
300 | pairable = 0; |
---|
301 | comparison = 1; /* This causes file 2 to advance */ |
---|
302 | } |
---|
303 | if (pairable) |
---|
304 | { |
---|
305 | comparison = (*compare) (line1->fields[join1field], |
---|
306 | line2->fields[join2field]); |
---|
307 | pairable = (comparison == 0); |
---|
308 | } |
---|
309 | if (pairable) |
---|
310 | { |
---|
311 | /* |
---|
312 | * The two lines can be paired. Produce output. |
---|
313 | */ |
---|
314 | if (outlist == NULL) |
---|
315 | full_output (line1, line2); |
---|
316 | else |
---|
317 | selected_output (line1, line2); |
---|
318 | } |
---|
319 | /* |
---|
320 | * Advance through the files |
---|
321 | */ |
---|
322 | if (comparison < 0) |
---|
323 | { |
---|
324 | if (unpairable1) |
---|
325 | { |
---|
326 | if (outlist == NULL) |
---|
327 | (void) fieldwrite (stdout, line1, tabchar[0]); |
---|
328 | else |
---|
329 | selected_output (line1, (field_t *) NULL); |
---|
330 | } |
---|
331 | fieldfree (line1); |
---|
332 | line1 = fieldread (file1, tabchar, runs, maxf[0]); |
---|
333 | } |
---|
334 | else if (comparison > 0) |
---|
335 | { |
---|
336 | if (skip2 > 0) |
---|
337 | skip2--; |
---|
338 | else if (unpairable2) |
---|
339 | { |
---|
340 | if (outlist == NULL) |
---|
341 | (void) fieldwrite (stdout, line2, tabchar[0]); |
---|
342 | else |
---|
343 | selected_output ((field_t *) NULL, line2); |
---|
344 | } |
---|
345 | fieldfree (line2); |
---|
346 | file2pos = ftell (file2); |
---|
347 | line2 = fieldread (file2, tabchar, runs, maxf[1]); |
---|
348 | } |
---|
349 | else |
---|
350 | { |
---|
351 | /* |
---|
352 | * Here's the tricky part. We have to advance file 2 |
---|
353 | * until comparisons fail, and then back it up and advance |
---|
354 | * file 1. |
---|
355 | */ |
---|
356 | skip2++; |
---|
357 | fieldfree (line2); |
---|
358 | line2 = fieldread (file2, tabchar, runs, maxf[1]); |
---|
359 | if (line2 == NULL |
---|
360 | || join2field >= line2->nfields |
---|
361 | || (*compare) (line1->fields[join1field], |
---|
362 | line2->fields[join2field]) |
---|
363 | != 0) |
---|
364 | { |
---|
365 | (void) fseek (file2, file2pos, 0); |
---|
366 | fieldfree (line2); |
---|
367 | line2 = fieldread (file2, tabchar, runs, maxf[1]); |
---|
368 | fieldfree (line1); |
---|
369 | line1 = fieldread (file1, tabchar, runs, maxf[0]); |
---|
370 | if (line1 != NULL && line2 != NULL |
---|
371 | && join1field < line1->nfields |
---|
372 | && join2field < line2->nfields |
---|
373 | && (*compare) (line1->fields[join1field], |
---|
374 | line2->fields[join2field]) |
---|
375 | == 0) |
---|
376 | skip2 = 0; |
---|
377 | } |
---|
378 | } |
---|
379 | } |
---|
380 | } |
---|
381 | |
---|
382 | static void full_output (line1, line2) /* Output everything from both lines */ |
---|
383 | register field_t * line1; /* Line from file 1 */ |
---|
384 | register field_t * line2; /* Line from file 2 */ |
---|
385 | { |
---|
386 | register int fieldno; /* Number of field being handled */ |
---|
387 | |
---|
388 | (void) fputs (line1->fields[join1field], stdout); |
---|
389 | for (fieldno = 0; fieldno < line1->nfields; fieldno++) |
---|
390 | { |
---|
391 | if (fieldno == join1field) |
---|
392 | continue; |
---|
393 | (void) putchar (tabchar[0]); |
---|
394 | if (line1->fields[fieldno][0] == '\0') |
---|
395 | (void) fputs (emptyfield, stdout); |
---|
396 | else |
---|
397 | (void) fputs (line1->fields[fieldno], stdout); |
---|
398 | } |
---|
399 | for (fieldno = 0; fieldno < line2->nfields; fieldno++) |
---|
400 | { |
---|
401 | if (fieldno == join2field) |
---|
402 | continue; |
---|
403 | (void) putchar (tabchar[0]); |
---|
404 | if (line2->fields[fieldno][0] == '\0') |
---|
405 | (void) fputs (emptyfield, stdout); |
---|
406 | else |
---|
407 | (void) fputs (line2->fields[fieldno], stdout); |
---|
408 | } |
---|
409 | (void) putchar ('\n'); |
---|
410 | } |
---|
411 | |
---|
412 | static void selected_output (line1, line2) /* Output selected fields */ |
---|
413 | field_t * line1; /* Line from file 1 */ |
---|
414 | field_t * line2; /* Line from file 2 */ |
---|
415 | { |
---|
416 | register field_t * cline; /* Current line being handled */ |
---|
417 | register int listno; /* Number of output list entry */ |
---|
418 | |
---|
419 | for (listno = 0; listno < outlistsize; listno++) |
---|
420 | { |
---|
421 | if (listno != 0) |
---|
422 | (void) putchar (tabchar[0]); |
---|
423 | if (outlist[listno].file == 1) |
---|
424 | cline = line1; |
---|
425 | else |
---|
426 | cline = line2; |
---|
427 | if (cline == NULL |
---|
428 | || outlist[listno].field >= cline->nfields |
---|
429 | || cline->fields[outlist[listno].field][0] == '\0') |
---|
430 | (void) fputs (emptyfield, stdout); |
---|
431 | else |
---|
432 | (void) fputs (cline->fields[outlist[listno].field], stdout); |
---|
433 | } |
---|
434 | (void) putchar ('\n'); |
---|
435 | } |
---|
436 | |
---|
437 | static int strscmp (a, b) /* Compare signed strings */ |
---|
438 | register SIGNED char * a; /* First string to compare */ |
---|
439 | register SIGNED char * b; /* Second string to compare */ |
---|
440 | { |
---|
441 | |
---|
442 | while (*a != '\0') |
---|
443 | { |
---|
444 | if (*a++ != *b++) |
---|
445 | return *--a - *--b; |
---|
446 | } |
---|
447 | return *a - *b; |
---|
448 | } |
---|
449 | |
---|
450 | static int strucmp (a, b) /* Compare unsigned strings */ |
---|
451 | register unsigned char * a; /* First string to compare */ |
---|
452 | register unsigned char * b; /* Second string to compare */ |
---|
453 | { |
---|
454 | |
---|
455 | while (*a != '\0') |
---|
456 | { |
---|
457 | if (*a++ != *b++) |
---|
458 | return *--a - *--b; |
---|
459 | } |
---|
460 | return *a - *b; |
---|
461 | } |
---|